mirror of
https://github.com/BerriAI/litellm.git
synced 2025-04-25 18:54:30 +00:00
added exception mapping to anthropic class
This commit is contained in:
parent
7490669218
commit
a4cf7e1edd
6 changed files with 30 additions and 16 deletions
Binary file not shown.
Binary file not shown.
|
@ -4,13 +4,20 @@ import requests
|
||||||
from litellm import logging
|
from litellm import logging
|
||||||
import time
|
import time
|
||||||
from typing import Callable
|
from typing import Callable
|
||||||
|
|
||||||
class AnthropicConstants(Enum):
|
class AnthropicConstants(Enum):
|
||||||
HUMAN_PROMPT = "\n\nHuman:"
|
HUMAN_PROMPT = "\n\nHuman:"
|
||||||
AI_PROMPT = "\n\nAssistant:"
|
AI_PROMPT = "\n\nAssistant:"
|
||||||
|
|
||||||
|
class AnthropicError(Exception):
|
||||||
|
def __init__(self, status_code, message):
|
||||||
|
self.status_code = status_code
|
||||||
|
self.message = message
|
||||||
|
|
||||||
class AnthropicLLM:
|
class AnthropicLLM:
|
||||||
|
|
||||||
def __init__(self, default_max_tokens_to_sample, api_key=None):
|
def __init__(self, encoding, default_max_tokens_to_sample, api_key=None):
|
||||||
|
self.encoding = encoding
|
||||||
self.default_max_tokens_to_sample = default_max_tokens_to_sample
|
self.default_max_tokens_to_sample = default_max_tokens_to_sample
|
||||||
self.completion_url = "https://api.anthropic.com/v1/complete"
|
self.completion_url = "https://api.anthropic.com/v1/complete"
|
||||||
self.validate_environment(api_key=api_key)
|
self.validate_environment(api_key=api_key)
|
||||||
|
@ -33,9 +40,6 @@ class AnthropicLLM:
|
||||||
raise ValueError("Missing Anthropic API Key - A call is being made to anthropic but no key is set either in the environment variables or via params")
|
raise ValueError("Missing Anthropic API Key - A call is being made to anthropic but no key is set either in the environment variables or via params")
|
||||||
pass
|
pass
|
||||||
|
|
||||||
def _stream(self): # logic for handling streaming with the LLM API
|
|
||||||
pass
|
|
||||||
|
|
||||||
def completion(self, model: str, messages: list, model_response: dict, print_verbose: Callable, optional_params=None, litellm_params=None, logger_fn=None): # logic for parsing in - calling - parsing out model completion calls
|
def completion(self, model: str, messages: list, model_response: dict, print_verbose: Callable, optional_params=None, litellm_params=None, logger_fn=None): # logic for parsing in - calling - parsing out model completion calls
|
||||||
model = model
|
model = model
|
||||||
prompt = f"{AnthropicConstants.HUMAN_PROMPT.value}"
|
prompt = f"{AnthropicConstants.HUMAN_PROMPT.value}"
|
||||||
|
@ -73,12 +77,13 @@ class AnthropicLLM:
|
||||||
completion_response = response.json()
|
completion_response = response.json()
|
||||||
print(f"completion_response: {completion_response}")
|
print(f"completion_response: {completion_response}")
|
||||||
if "error" in completion_response:
|
if "error" in completion_response:
|
||||||
raise Exception(completion_response["error"])
|
raise AnthropicError(message=completion_response["error"], status_code=response.status_code)
|
||||||
else:
|
else:
|
||||||
model_response["choices"][0]["message"]["content"] = completion_response["completion"]
|
model_response["choices"][0]["message"]["content"] = completion_response["completion"]
|
||||||
|
|
||||||
## CALCULATING USAGE
|
## CALCULATING USAGE
|
||||||
prompt_tokens = 0
|
prompt_tokens = len(self.encoding.encode(prompt)) ##[TODO] use the anthropic tokenizer here
|
||||||
completion_tokens = 0
|
completion_tokens = len(self.encoding.encode(model_response["choices"][0]["message"]["content"])) ##[TODO] use the anthropic tokenizer here
|
||||||
|
|
||||||
|
|
||||||
model_response["created"] = time.time()
|
model_response["created"] = time.time()
|
||||||
|
@ -91,7 +96,4 @@ class AnthropicLLM:
|
||||||
return model_response
|
return model_response
|
||||||
|
|
||||||
def embedding(): # logic for parsing in - calling - parsing out model embedding calls
|
def embedding(): # logic for parsing in - calling - parsing out model embedding calls
|
||||||
pass
|
pass
|
||||||
|
|
||||||
def stream(): # logic for how to parse in-out model completion streams
|
|
||||||
pass
|
|
11
litellm/llms/base.py
Normal file
11
litellm/llms/base.py
Normal file
|
@ -0,0 +1,11 @@
|
||||||
|
## This is a template base class to be used for adding new LLM providers via API calls
|
||||||
|
|
||||||
|
class BaseLLM():
|
||||||
|
def validate_environment(): # set up the environment required to run the model
|
||||||
|
pass
|
||||||
|
|
||||||
|
def completion(): # logic for parsing in - calling - parsing out model completion calls
|
||||||
|
pass
|
||||||
|
|
||||||
|
def embedding(): # logic for parsing in - calling - parsing out model embedding calls
|
||||||
|
pass
|
|
@ -208,7 +208,7 @@ def completion(
|
||||||
response = model_response
|
response = model_response
|
||||||
elif model in litellm.anthropic_models:
|
elif model in litellm.anthropic_models:
|
||||||
anthropic_key = api_key if api_key is not None else litellm.anthropic_key
|
anthropic_key = api_key if api_key is not None else litellm.anthropic_key
|
||||||
anthropic_client = AnthropicLLM(default_max_tokens_to_sample=litellm.max_tokens, api_key=anthropic_key)
|
anthropic_client = AnthropicLLM(encoding=encoding, default_max_tokens_to_sample=litellm.max_tokens, api_key=anthropic_key)
|
||||||
model_response = anthropic_client.completion(model=model, messages=messages, model_response=model_response, print_verbose=print_verbose, optional_params=optional_params, litellm_params=litellm_params, logger_fn=logger_fn)
|
model_response = anthropic_client.completion(model=model, messages=messages, model_response=model_response, print_verbose=print_verbose, optional_params=optional_params, litellm_params=litellm_params, logger_fn=logger_fn)
|
||||||
if 'stream' in optional_params and optional_params['stream'] == True:
|
if 'stream' in optional_params and optional_params['stream'] == True:
|
||||||
# don't try to access stream object,
|
# don't try to access stream object,
|
||||||
|
|
|
@ -21,7 +21,8 @@ litellm.failure_callback = ["sentry"]
|
||||||
# Approach: Run each model through the test -> assert if the correct error (always the same one) is triggered
|
# Approach: Run each model through the test -> assert if the correct error (always the same one) is triggered
|
||||||
|
|
||||||
# models = ["gpt-3.5-turbo", "chatgpt-test", "claude-instant-1", "command-nightly"]
|
# models = ["gpt-3.5-turbo", "chatgpt-test", "claude-instant-1", "command-nightly"]
|
||||||
models = ["command-nightly"]
|
test_model = "claude-instant-1"
|
||||||
|
models = ["claude-instant-1"]
|
||||||
def logging_fn(model_call_dict):
|
def logging_fn(model_call_dict):
|
||||||
if "model" in model_call_dict:
|
if "model" in model_call_dict:
|
||||||
print(f"model_call_dict: {model_call_dict['model']}")
|
print(f"model_call_dict: {model_call_dict['model']}")
|
||||||
|
@ -35,7 +36,7 @@ def test_context_window(model):
|
||||||
sample_text = "how does a court case get to the Supreme Court?" * 5000
|
sample_text = "how does a court case get to the Supreme Court?" * 5000
|
||||||
messages = [{"content": sample_text, "role": "user"}]
|
messages = [{"content": sample_text, "role": "user"}]
|
||||||
try:
|
try:
|
||||||
azure = model == "chatgpt-test"
|
model = "chatgpt-test"
|
||||||
print(f"model: {model}")
|
print(f"model: {model}")
|
||||||
response = completion(model=model, messages=messages, custom_llm_provider="azure", logger_fn=logging_fn)
|
response = completion(model=model, messages=messages, custom_llm_provider="azure", logger_fn=logging_fn)
|
||||||
print(f"response: {response}")
|
print(f"response: {response}")
|
||||||
|
@ -51,7 +52,7 @@ def test_context_window(model):
|
||||||
print(f"Uncaught Exception - {e}")
|
print(f"Uncaught Exception - {e}")
|
||||||
pytest.fail(f"Error occurred: {e}")
|
pytest.fail(f"Error occurred: {e}")
|
||||||
return
|
return
|
||||||
test_context_window("command-nightly")
|
test_context_window(test_model)
|
||||||
|
|
||||||
# Test 2: InvalidAuth Errors
|
# Test 2: InvalidAuth Errors
|
||||||
@pytest.mark.parametrize("model", models)
|
@pytest.mark.parametrize("model", models)
|
||||||
|
@ -101,7 +102,7 @@ def invalid_auth(model): # set the model key to an invalid key, depending on the
|
||||||
elif model == "replicate/llama-2-70b-chat:2c1608e18606fad2812020dc541930f2d0495ce32eee50074220b87300bc16e1":
|
elif model == "replicate/llama-2-70b-chat:2c1608e18606fad2812020dc541930f2d0495ce32eee50074220b87300bc16e1":
|
||||||
os.environ["REPLICATE_API_KEY"] = temporary_key
|
os.environ["REPLICATE_API_KEY"] = temporary_key
|
||||||
return
|
return
|
||||||
invalid_auth("command-nightly")
|
invalid_auth(test_model)
|
||||||
# # Test 3: Rate Limit Errors
|
# # Test 3: Rate Limit Errors
|
||||||
# def test_model(model):
|
# def test_model(model):
|
||||||
# try:
|
# try:
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue