update exception mapping and get model cost map

2025-04-25 10:44:24 +00:00 · 2023-09-04 11:53:18 -07:00 · 2023-09-04 11:53:18 -07:00 · 73bb1b96e9
commit 73bb1b96e9
parent e03d442e8f
9 changed files with 54 additions and 96 deletions
--- a/litellm/init.py
+++ b/litellm/init.py
@ -1,4 +1,4 @@
-import threading
+import threading, requests
 from typing import Callable, List, Optional, Dict
 from litellm.caching import Cache

@ -35,94 +35,18 @@ caching = False # deprecated son
 caching_with_models = False  # if you want the caching key to be model + prompt # deprecated soon
 cache: Optional[Cache] = None # cache object
 model_alias_map: Dict[str, str] = {}
-model_cost = {
-    "babbage-002": {
-        "max_tokens": 16384,
-        "input_cost_per_token": 0.0000004,
-        "output_cost_per_token": 0.0000004,
-    },
-    "davinci-002": {
-        "max_tokens": 16384,
-        "input_cost_per_token": 0.000002,
-        "output_cost_per_token": 0.000002,
-    },
-    "gpt-3.5-turbo": {
-        "max_tokens": 4000,
-        "input_cost_per_token": 0.0000015,
-        "output_cost_per_token": 0.000002,
-    },
-    "gpt-35-turbo": {
-        "max_tokens": 4000,
-        "input_cost_per_token": 0.0000015,
-        "output_cost_per_token": 0.000002,
-    },  # azure model name
-    "gpt-3.5-turbo-0613": {
-        "max_tokens": 4000,
-        "input_cost_per_token": 0.0000015,
-        "output_cost_per_token": 0.000002,
-    },
-    "gpt-3.5-turbo-0301": {
-        "max_tokens": 4000,
-        "input_cost_per_token": 0.0000015,
-        "output_cost_per_token": 0.000002,
-    },
-    "gpt-3.5-turbo-16k": {
-        "max_tokens": 16000,
-        "input_cost_per_token": 0.000003,
-        "output_cost_per_token": 0.000004,
-    },
-    "gpt-35-turbo-16k": {
-        "max_tokens": 16000,
-        "input_cost_per_token": 0.000003,
-        "output_cost_per_token": 0.000004,
-    },  # azure model name
-    "gpt-3.5-turbo-16k-0613": {
-        "max_tokens": 16000,
-        "input_cost_per_token": 0.000003,
-        "output_cost_per_token": 0.000004,
-    },
-    "gpt-4": {
-        "max_tokens": 8000,
-        "input_cost_per_token": 0.000003,
-        "output_cost_per_token": 0.00006,
-    },
-    "gpt-4-0613": {
-        "max_tokens": 8000,
-        "input_cost_per_token": 0.000003,
-        "output_cost_per_token": 0.00006,
-    },
-    "gpt-4-32k": {
-        "max_tokens": 8000,
-        "input_cost_per_token": 0.00006,
-        "output_cost_per_token": 0.00012,
-    },
-    "claude-instant-1": {
-        "max_tokens": 100000,
-        "input_cost_per_token": 0.00000163,
-        "output_cost_per_token": 0.00000551,
-    },
-    "claude-2": {
-        "max_tokens": 100000,
-        "input_cost_per_token": 0.00001102,
-        "output_cost_per_token": 0.00003268,
-    },
-    "text-bison-001": {
-        "max_tokens": 8192,
-        "input_cost_per_token": 0.000004,
-        "output_cost_per_token": 0.000004,
-    },
-    "chat-bison-001": {
-        "max_tokens": 4096,
-        "input_cost_per_token": 0.000002,
-        "output_cost_per_token": 0.000002,
-    },
-    "command-nightly": {
-        "max_tokens": 4096,
-        "input_cost_per_token": 0.000015,
-        "output_cost_per_token": 0.000015,
-    },
-}
+def get_model_cost_map():
+    url = "https://raw.githubusercontent.com/BerriAI/litellm/main/cookbook/community-resources/max_tokens.json"
    
+    try:
+        response = requests.get(url)
+        response.raise_for_status()  # Raise an exception if request is unsuccessful
+        content = response.json()
+        return content
+    except requests.exceptions.RequestException as e:
+        print("Error occurred:", e)
+        return None
+model_cost = get_model_cost_map()

 ####### THREAD-SPECIFIC DATA ###################
 class MyLocal(threading.local):
@ -298,7 +222,8 @@ from .utils import (
    Logging,
    acreate,
    get_model_list,
-    completion_with_split_tests
+    completion_with_split_tests,
+    get_max_tokens
 )
 from .main import *  # type: ignore
 from .integrations import *
--- a/litellm/pycache/init.cpython-311.pyc
+++ b/litellm/pycache/init.cpython-311.pyc
--- a/litellm/pycache/main.cpython-311.pyc
+++ b/litellm/pycache/main.cpython-311.pyc
--- a/litellm/pycache/utils.cpython-311.pyc
+++ b/litellm/pycache/utils.cpython-311.pyc
--- a/litellm/exceptions.py
+++ b/litellm/exceptions.py
@ -9,10 +9,11 @@ from openai.error import (


 class AuthenticationError(AuthenticationError):  # type: ignore
-    def __init__(self, message, llm_provider):
+    def __init__(self, message, llm_provider, model):
        self.status_code = 401
        self.message = message
        self.llm_provider = llm_provider
+        self.model = model
        super().__init__(
            self.message
        )  # Call the base class constructor with the parameters it needs
@ -41,20 +42,22 @@ class ContextWindowExceededError(InvalidRequestError):  # type: ignore


 class RateLimitError(RateLimitError):  # type: ignore
-    def __init__(self, message, llm_provider):
+    def __init__(self, message, llm_provider, model):
        self.status_code = 429
        self.message = message
        self.llm_provider = llm_provider
+        self.modle = model
        super().__init__(
            self.message
        )  # Call the base class constructor with the parameters it needs


 class ServiceUnavailableError(ServiceUnavailableError):  # type: ignore
-    def __init__(self, message, llm_provider):
+    def __init__(self, message, llm_provider, model):
        self.status_code = 500
        self.message = message
        self.llm_provider = llm_provider
+        self.model = model
        super().__init__(
            self.message
        )  # Call the base class constructor with the parameters it needs
--- a/litellm/tests/test_exceptions.py
+++ b/litellm/tests/test_exceptions.py
@ -22,7 +22,7 @@ import pytest
 litellm.vertex_project = "pathrise-convert-1606954137718"
 litellm.vertex_location = "us-central1"

-litellm.failure_callback = ["sentry"]
+# litellm.failure_callback = ["sentry"]
 #### What this tests ####
 #    This tests exception mapping -> trigger an exception from an llm provider -> assert if output is of the expected type

--- a/litellm/tests/test_get_model_cost_map.py
+++ b/litellm/tests/test_get_model_cost_map.py
@ -0,0 +1,10 @@
+import sys, os
+import traceback
+
+sys.path.insert(
+    0, os.path.abspath("../..")
+)  # Adds the parent directory to the system path
+import time
+from litellm import get_max_tokens
+
+print(get_max_tokens("gpt-3.5-turbo"))
--- a/litellm/utils.py
+++ b/litellm/utils.py
@ -846,6 +846,13 @@ def get_optional_params(  # use the openai defaults
    return optional_params


+def get_max_tokens(model: str):
+    try:
+        return litellm.model_cost[model]
+    except:
+        raise Exception("This model isn't mapped yet. Add it here - https://raw.githubusercontent.com/BerriAI/litellm/main/cookbook/community-resources/max_tokens.json")
+    
+
 def load_test_model(
    model: str,
    custom_llm_provider: str = "",
@ -1458,6 +1465,7 @@ def exception_type(model, original_exception, custom_llm_provider):
                        raise AuthenticationError(
                            message=f"AnthropicException - {original_exception.message}",
                            llm_provider="anthropic",
+                            model=model
                        )
                    elif original_exception.status_code == 400:
                        exception_mapping_worked = True
@ -1478,6 +1486,7 @@ def exception_type(model, original_exception, custom_llm_provider):
                        raise RateLimitError(
                            message=f"AnthropicException - {original_exception.message}",
                            llm_provider="anthropic",
+                            model=model
                        )
                elif (
                    "Could not resolve authentication method. Expected either api_key or auth_token to be set."
@ -1487,6 +1496,7 @@ def exception_type(model, original_exception, custom_llm_provider):
                    raise AuthenticationError(
                        message=f"AnthropicException - {original_exception.message}",
                        llm_provider="anthropic",
+                        model=model
                    )
            elif "replicate" in model:
                if "Incorrect authentication token" in error_str:
@ -1494,6 +1504,7 @@ def exception_type(model, original_exception, custom_llm_provider):
                    raise AuthenticationError(
                        message=f"ReplicateException - {error_str}",
                        llm_provider="replicate",
+                        model=model
                    )
                elif "input is too long" in error_str:
                    exception_mapping_worked = True
@ -1514,6 +1525,7 @@ def exception_type(model, original_exception, custom_llm_provider):
                    raise RateLimitError(
                        message=f"ReplicateException - {error_str}",
                        llm_provider="replicate",
+                        model=model
                    )
                elif (
                    exception_type == "ReplicateError"
@ -1521,6 +1533,7 @@ def exception_type(model, original_exception, custom_llm_provider):
                    raise ServiceUnavailableError(
                        message=f"ReplicateException - {error_str}",
                        llm_provider="replicate",
+                        model=model
                    )
            elif model in litellm.cohere_models:  # Cohere
                if (
@ -1531,6 +1544,7 @@ def exception_type(model, original_exception, custom_llm_provider):
                    raise AuthenticationError(
                        message=f"CohereException - {original_exception.message}",
                        llm_provider="cohere",
+                        model=model
                    )
                elif "too many tokens" in error_str:
                    exception_mapping_worked = True
@ -1546,6 +1560,7 @@ def exception_type(model, original_exception, custom_llm_provider):
                    raise RateLimitError(
                        message=f"CohereException - {original_exception.message}",
                        llm_provider="cohere",
+                        model=model
                    )
            elif custom_llm_provider == "huggingface":
                if "length limit exceeded" in error_str:
@ -1561,6 +1576,7 @@ def exception_type(model, original_exception, custom_llm_provider):
                        raise AuthenticationError(
                            message=f"HuggingfaceException - {original_exception.message}",
                            llm_provider="huggingface",
+                            model=model
                        )
                    elif original_exception.status_code == 400:
                        exception_mapping_worked = True
@ -1574,6 +1590,7 @@ def exception_type(model, original_exception, custom_llm_provider):
                        raise RateLimitError(
                            message=f"HuggingfaceException - {original_exception.message}",
                            llm_provider="huggingface",
+                            model=model
                        )
            elif custom_llm_provider == "ai21":
                if hasattr(original_exception, "message"):
@ -1590,6 +1607,7 @@ def exception_type(model, original_exception, custom_llm_provider):
                        raise AuthenticationError(
                            message=f"AI21Exception - {original_exception.message}",
                            llm_provider="ai21",
+                            model=model
                        )
                    if original_exception.status_code == 422:
                        exception_mapping_worked = True
@ -1617,7 +1635,8 @@ def exception_type(model, original_exception, custom_llm_provider):
                    exception_mapping_worked = True
                    raise AuthenticationError(
                        message=f"TogetherAIException - {error_response['error']}",
-                        llm_provider="together_ai"
+                        llm_provider="together_ai",
+                        model=model
                    )
                elif "error" in error_response and "INVALID_ARGUMENT" in error_response["error"]:
                    exception_mapping_worked = True
@ -1638,6 +1657,7 @@ def exception_type(model, original_exception, custom_llm_provider):
                        raise RateLimitError(
                            message=f"TogetherAIException - {original_exception.message}",
                            llm_provider="together_ai",
+                            model=model
                        )
            raise original_exception  # base case - return the original exception
        else:
--- a/pyproject.toml
+++ b/pyproject.toml
@ -1,6 +1,6 @@
 [tool.poetry]
 name = "litellm"
-version = "0.1.525"
+version = "0.1.526"
 description = "Library to easily interface with LLM API providers"
 authors = ["BerriAI"]
 license = "MIT License"