added exception mapping to anthropic class

2025-04-24 18:24:20 +00:00 · 2023-08-12 17:39:11 -07:00 · 2023-08-12 17:39:11 -07:00 · a4cf7e1edd
commit a4cf7e1edd
parent 7490669218
6 changed files with 30 additions and 16 deletions
--- a/litellm/pycache/main.cpython-311.pyc
+++ b/litellm/pycache/main.cpython-311.pyc
--- a/litellm/pycache/utils.cpython-311.pyc
+++ b/litellm/pycache/utils.cpython-311.pyc
--- a/litellm/llms/anthropic.py
+++ b/litellm/llms/anthropic.py
@ -4,13 +4,20 @@ import requests
 from litellm import logging
 import time 
 from typing import Callable
+
 class AnthropicConstants(Enum):
    HUMAN_PROMPT = "\n\nHuman:"
    AI_PROMPT = "\n\nAssistant:"

+class AnthropicError(Exception):
+    def __init__(self, status_code, message):
+        self.status_code = status_code
+        self.message = message
+
 class AnthropicLLM: 
    
-    def __init__(self, default_max_tokens_to_sample, api_key=None):
+    def __init__(self, encoding, default_max_tokens_to_sample, api_key=None):
+        self.encoding = encoding
        self.default_max_tokens_to_sample = default_max_tokens_to_sample
        self.completion_url = "https://api.anthropic.com/v1/complete"
        self.validate_environment(api_key=api_key)
@ -33,9 +40,6 @@ class AnthropicLLM:
            raise ValueError("Missing Anthropic API Key - A call is being made to anthropic but no key is set either in the environment variables or via params")
        pass  

-    def _stream(self): # logic for handling streaming with the LLM API 
-        pass
-
    def completion(self, model: str, messages: list, model_response: dict, print_verbose: Callable, optional_params=None, litellm_params=None, logger_fn=None): # logic for parsing in - calling - parsing out model completion calls
        model = model
        prompt = f"{AnthropicConstants.HUMAN_PROMPT.value}"
@ -73,12 +77,13 @@ class AnthropicLLM:
            completion_response = response.json()
            print(f"completion_response: {completion_response}")
            if "error" in completion_response:
-                raise Exception(completion_response["error"])
+                raise AnthropicError(message=completion_response["error"], status_code=response.status_code)
            else:
                model_response["choices"][0]["message"]["content"] = completion_response["completion"]    
+            
            ## CALCULATING USAGE
-            prompt_tokens = 0
-            completion_tokens = 0
+            prompt_tokens = len(self.encoding.encode(prompt)) ##[TODO] use the anthropic tokenizer here
+            completion_tokens = len(self.encoding.encode(model_response["choices"][0]["message"]["content"])) ##[TODO] use the anthropic tokenizer here
            
            
            model_response["created"] = time.time()
@ -91,7 +96,4 @@ class AnthropicLLM:
            return model_response
    
    def embedding(): # logic for parsing in - calling - parsing out model embedding calls
-        pass 
-    
-    def stream(): # logic for how to parse in-out model completion streams
-        pass
+        pass 
--- a/litellm/llms/base.py
+++ b/litellm/llms/base.py
@ -0,0 +1,11 @@
+## This is a template base class to be used for adding new LLM providers via API calls
+
+class BaseLLM():
+    def validate_environment(): # set up the environment required to run the model 
+        pass  
+
+    def completion(): # logic for parsing in - calling - parsing out model completion calls
+        pass
+
+    def embedding(): # logic for parsing in - calling - parsing out model embedding calls
+        pass 
--- a/litellm/main.py
+++ b/litellm/main.py
@ -208,7 +208,7 @@ def completion(
      response = model_response
    elif model in litellm.anthropic_models:
      anthropic_key = api_key if api_key is not None else litellm.anthropic_key
-      anthropic_client = AnthropicLLM(default_max_tokens_to_sample=litellm.max_tokens, api_key=anthropic_key)
+      anthropic_client = AnthropicLLM(encoding=encoding, default_max_tokens_to_sample=litellm.max_tokens, api_key=anthropic_key)
      model_response = anthropic_client.completion(model=model, messages=messages, model_response=model_response, print_verbose=print_verbose, optional_params=optional_params, litellm_params=litellm_params, logger_fn=logger_fn)
      if 'stream' in optional_params and optional_params['stream'] == True:
        # don't try to access stream object,
--- a/litellm/tests/test_exceptions.py
+++ b/litellm/tests/test_exceptions.py
@ -21,7 +21,8 @@ litellm.failure_callback = ["sentry"]
 # Approach: Run each model through the test -> assert if the correct error (always the same one) is triggered

 # models = ["gpt-3.5-turbo", "chatgpt-test",  "claude-instant-1", "command-nightly"]
-models = ["command-nightly"]
+test_model = "claude-instant-1"
+models = ["claude-instant-1"]
 def logging_fn(model_call_dict):
    if "model" in model_call_dict: 
        print(f"model_call_dict: {model_call_dict['model']}")
@ -35,7 +36,7 @@ def test_context_window(model):
    sample_text = "how does a court case get to the Supreme Court?" * 5000
    messages = [{"content": sample_text, "role": "user"}]
    try:
-        azure = model == "chatgpt-test"
+        model = "chatgpt-test"
        print(f"model: {model}")
        response = completion(model=model, messages=messages, custom_llm_provider="azure", logger_fn=logging_fn)
        print(f"response: {response}")
@ -51,7 +52,7 @@ def test_context_window(model):
        print(f"Uncaught Exception - {e}")
        pytest.fail(f"Error occurred: {e}")
    return
-test_context_window("command-nightly")
+test_context_window(test_model)

 # Test 2: InvalidAuth Errors
@pytest.mark.parametrize("model", models)
@ -101,7 +102,7 @@ def invalid_auth(model): # set the model key to an invalid key, depending on the
        elif model == "replicate/llama-2-70b-chat:2c1608e18606fad2812020dc541930f2d0495ce32eee50074220b87300bc16e1":
            os.environ["REPLICATE_API_KEY"] = temporary_key
    return
-invalid_auth("command-nightly")
+invalid_auth(test_model)
 # # Test 3: Rate Limit Errors 
 # def test_model(model):
 #     try: