diff --git a/litellm/__pycache__/main.cpython-311.pyc b/litellm/__pycache__/main.cpython-311.pyc index c0ed4c412b..3358392261 100644 Binary files a/litellm/__pycache__/main.cpython-311.pyc and b/litellm/__pycache__/main.cpython-311.pyc differ diff --git a/litellm/__pycache__/utils.cpython-311.pyc b/litellm/__pycache__/utils.cpython-311.pyc index fece25c518..200ddc9d0b 100644 Binary files a/litellm/__pycache__/utils.cpython-311.pyc and b/litellm/__pycache__/utils.cpython-311.pyc differ diff --git a/litellm/llms/anthropic.py b/litellm/llms/anthropic.py index 5e755e34c3..5b5d928b20 100644 --- a/litellm/llms/anthropic.py +++ b/litellm/llms/anthropic.py @@ -4,13 +4,20 @@ import requests from litellm import logging import time from typing import Callable + class AnthropicConstants(Enum): HUMAN_PROMPT = "\n\nHuman:" AI_PROMPT = "\n\nAssistant:" +class AnthropicError(Exception): + def __init__(self, status_code, message): + self.status_code = status_code + self.message = message + class AnthropicLLM: - def __init__(self, default_max_tokens_to_sample, api_key=None): + def __init__(self, encoding, default_max_tokens_to_sample, api_key=None): + self.encoding = encoding self.default_max_tokens_to_sample = default_max_tokens_to_sample self.completion_url = "https://api.anthropic.com/v1/complete" self.validate_environment(api_key=api_key) @@ -33,9 +40,6 @@ class AnthropicLLM: raise ValueError("Missing Anthropic API Key - A call is being made to anthropic but no key is set either in the environment variables or via params") pass - def _stream(self): # logic for handling streaming with the LLM API - pass - def completion(self, model: str, messages: list, model_response: dict, print_verbose: Callable, optional_params=None, litellm_params=None, logger_fn=None): # logic for parsing in - calling - parsing out model completion calls model = model prompt = f"{AnthropicConstants.HUMAN_PROMPT.value}" @@ -73,12 +77,13 @@ class AnthropicLLM: completion_response = response.json() print(f"completion_response: {completion_response}") if "error" in completion_response: - raise Exception(completion_response["error"]) + raise AnthropicError(message=completion_response["error"], status_code=response.status_code) else: model_response["choices"][0]["message"]["content"] = completion_response["completion"] + ## CALCULATING USAGE - prompt_tokens = 0 - completion_tokens = 0 + prompt_tokens = len(self.encoding.encode(prompt)) ##[TODO] use the anthropic tokenizer here + completion_tokens = len(self.encoding.encode(model_response["choices"][0]["message"]["content"])) ##[TODO] use the anthropic tokenizer here model_response["created"] = time.time() @@ -91,7 +96,4 @@ class AnthropicLLM: return model_response def embedding(): # logic for parsing in - calling - parsing out model embedding calls - pass - - def stream(): # logic for how to parse in-out model completion streams - pass \ No newline at end of file + pass \ No newline at end of file diff --git a/litellm/llms/base.py b/litellm/llms/base.py new file mode 100644 index 0000000000..368df96245 --- /dev/null +++ b/litellm/llms/base.py @@ -0,0 +1,11 @@ +## This is a template base class to be used for adding new LLM providers via API calls + +class BaseLLM(): + def validate_environment(): # set up the environment required to run the model + pass + + def completion(): # logic for parsing in - calling - parsing out model completion calls + pass + + def embedding(): # logic for parsing in - calling - parsing out model embedding calls + pass \ No newline at end of file diff --git a/litellm/main.py b/litellm/main.py index bead3e1d86..9ee8beaefb 100644 --- a/litellm/main.py +++ b/litellm/main.py @@ -208,7 +208,7 @@ def completion( response = model_response elif model in litellm.anthropic_models: anthropic_key = api_key if api_key is not None else litellm.anthropic_key - anthropic_client = AnthropicLLM(default_max_tokens_to_sample=litellm.max_tokens, api_key=anthropic_key) + anthropic_client = AnthropicLLM(encoding=encoding, default_max_tokens_to_sample=litellm.max_tokens, api_key=anthropic_key) model_response = anthropic_client.completion(model=model, messages=messages, model_response=model_response, print_verbose=print_verbose, optional_params=optional_params, litellm_params=litellm_params, logger_fn=logger_fn) if 'stream' in optional_params and optional_params['stream'] == True: # don't try to access stream object, diff --git a/litellm/tests/test_exceptions.py b/litellm/tests/test_exceptions.py index 16f7ae65d4..e224fdf7a6 100644 --- a/litellm/tests/test_exceptions.py +++ b/litellm/tests/test_exceptions.py @@ -21,7 +21,8 @@ litellm.failure_callback = ["sentry"] # Approach: Run each model through the test -> assert if the correct error (always the same one) is triggered # models = ["gpt-3.5-turbo", "chatgpt-test", "claude-instant-1", "command-nightly"] -models = ["command-nightly"] +test_model = "claude-instant-1" +models = ["claude-instant-1"] def logging_fn(model_call_dict): if "model" in model_call_dict: print(f"model_call_dict: {model_call_dict['model']}") @@ -35,7 +36,7 @@ def test_context_window(model): sample_text = "how does a court case get to the Supreme Court?" * 5000 messages = [{"content": sample_text, "role": "user"}] try: - azure = model == "chatgpt-test" + model = "chatgpt-test" print(f"model: {model}") response = completion(model=model, messages=messages, custom_llm_provider="azure", logger_fn=logging_fn) print(f"response: {response}") @@ -51,7 +52,7 @@ def test_context_window(model): print(f"Uncaught Exception - {e}") pytest.fail(f"Error occurred: {e}") return -test_context_window("command-nightly") +test_context_window(test_model) # Test 2: InvalidAuth Errors @pytest.mark.parametrize("model", models) @@ -101,7 +102,7 @@ def invalid_auth(model): # set the model key to an invalid key, depending on the elif model == "replicate/llama-2-70b-chat:2c1608e18606fad2812020dc541930f2d0495ce32eee50074220b87300bc16e1": os.environ["REPLICATE_API_KEY"] = temporary_key return -invalid_auth("command-nightly") +invalid_auth(test_model) # # Test 3: Rate Limit Errors # def test_model(model): # try: