diff --git a/litellm/__init__.py b/litellm/__init__.py index b5c84bab4..4c89c441f 100644 --- a/litellm/__init__.py +++ b/litellm/__init__.py @@ -1,4 +1,4 @@ -import threading +import threading, requests from typing import Callable, List, Optional, Dict from litellm.caching import Cache @@ -35,94 +35,18 @@ caching = False # deprecated son caching_with_models = False # if you want the caching key to be model + prompt # deprecated soon cache: Optional[Cache] = None # cache object model_alias_map: Dict[str, str] = {} -model_cost = { - "babbage-002": { - "max_tokens": 16384, - "input_cost_per_token": 0.0000004, - "output_cost_per_token": 0.0000004, - }, - "davinci-002": { - "max_tokens": 16384, - "input_cost_per_token": 0.000002, - "output_cost_per_token": 0.000002, - }, - "gpt-3.5-turbo": { - "max_tokens": 4000, - "input_cost_per_token": 0.0000015, - "output_cost_per_token": 0.000002, - }, - "gpt-35-turbo": { - "max_tokens": 4000, - "input_cost_per_token": 0.0000015, - "output_cost_per_token": 0.000002, - }, # azure model name - "gpt-3.5-turbo-0613": { - "max_tokens": 4000, - "input_cost_per_token": 0.0000015, - "output_cost_per_token": 0.000002, - }, - "gpt-3.5-turbo-0301": { - "max_tokens": 4000, - "input_cost_per_token": 0.0000015, - "output_cost_per_token": 0.000002, - }, - "gpt-3.5-turbo-16k": { - "max_tokens": 16000, - "input_cost_per_token": 0.000003, - "output_cost_per_token": 0.000004, - }, - "gpt-35-turbo-16k": { - "max_tokens": 16000, - "input_cost_per_token": 0.000003, - "output_cost_per_token": 0.000004, - }, # azure model name - "gpt-3.5-turbo-16k-0613": { - "max_tokens": 16000, - "input_cost_per_token": 0.000003, - "output_cost_per_token": 0.000004, - }, - "gpt-4": { - "max_tokens": 8000, - "input_cost_per_token": 0.000003, - "output_cost_per_token": 0.00006, - }, - "gpt-4-0613": { - "max_tokens": 8000, - "input_cost_per_token": 0.000003, - "output_cost_per_token": 0.00006, - }, - "gpt-4-32k": { - "max_tokens": 8000, - "input_cost_per_token": 0.00006, - "output_cost_per_token": 0.00012, - }, - "claude-instant-1": { - "max_tokens": 100000, - "input_cost_per_token": 0.00000163, - "output_cost_per_token": 0.00000551, - }, - "claude-2": { - "max_tokens": 100000, - "input_cost_per_token": 0.00001102, - "output_cost_per_token": 0.00003268, - }, - "text-bison-001": { - "max_tokens": 8192, - "input_cost_per_token": 0.000004, - "output_cost_per_token": 0.000004, - }, - "chat-bison-001": { - "max_tokens": 4096, - "input_cost_per_token": 0.000002, - "output_cost_per_token": 0.000002, - }, - "command-nightly": { - "max_tokens": 4096, - "input_cost_per_token": 0.000015, - "output_cost_per_token": 0.000015, - }, -} - +def get_model_cost_map(): + url = "https://raw.githubusercontent.com/BerriAI/litellm/main/cookbook/community-resources/max_tokens.json" + + try: + response = requests.get(url) + response.raise_for_status() # Raise an exception if request is unsuccessful + content = response.json() + return content + except requests.exceptions.RequestException as e: + print("Error occurred:", e) + return None +model_cost = get_model_cost_map() ####### THREAD-SPECIFIC DATA ################### class MyLocal(threading.local): @@ -298,7 +222,8 @@ from .utils import ( Logging, acreate, get_model_list, - completion_with_split_tests + completion_with_split_tests, + get_max_tokens ) from .main import * # type: ignore from .integrations import * diff --git a/litellm/__pycache__/__init__.cpython-311.pyc b/litellm/__pycache__/__init__.cpython-311.pyc index c2b76dce9..e68c4da03 100644 Binary files a/litellm/__pycache__/__init__.cpython-311.pyc and b/litellm/__pycache__/__init__.cpython-311.pyc differ diff --git a/litellm/__pycache__/main.cpython-311.pyc b/litellm/__pycache__/main.cpython-311.pyc index 5be8c1fc5..4e3285a90 100644 Binary files a/litellm/__pycache__/main.cpython-311.pyc and b/litellm/__pycache__/main.cpython-311.pyc differ diff --git a/litellm/__pycache__/utils.cpython-311.pyc b/litellm/__pycache__/utils.cpython-311.pyc index 2826ca27f..309ff5b2e 100644 Binary files a/litellm/__pycache__/utils.cpython-311.pyc and b/litellm/__pycache__/utils.cpython-311.pyc differ diff --git a/litellm/exceptions.py b/litellm/exceptions.py index 26a6e8b9f..ccf6e2d76 100644 --- a/litellm/exceptions.py +++ b/litellm/exceptions.py @@ -9,10 +9,11 @@ from openai.error import ( class AuthenticationError(AuthenticationError): # type: ignore - def __init__(self, message, llm_provider): + def __init__(self, message, llm_provider, model): self.status_code = 401 self.message = message self.llm_provider = llm_provider + self.model = model super().__init__( self.message ) # Call the base class constructor with the parameters it needs @@ -41,20 +42,22 @@ class ContextWindowExceededError(InvalidRequestError): # type: ignore class RateLimitError(RateLimitError): # type: ignore - def __init__(self, message, llm_provider): + def __init__(self, message, llm_provider, model): self.status_code = 429 self.message = message self.llm_provider = llm_provider + self.modle = model super().__init__( self.message ) # Call the base class constructor with the parameters it needs class ServiceUnavailableError(ServiceUnavailableError): # type: ignore - def __init__(self, message, llm_provider): + def __init__(self, message, llm_provider, model): self.status_code = 500 self.message = message self.llm_provider = llm_provider + self.model = model super().__init__( self.message ) # Call the base class constructor with the parameters it needs diff --git a/litellm/tests/test_exceptions.py b/litellm/tests/test_exceptions.py index d8619abd6..dc89d160e 100644 --- a/litellm/tests/test_exceptions.py +++ b/litellm/tests/test_exceptions.py @@ -22,7 +22,7 @@ import pytest litellm.vertex_project = "pathrise-convert-1606954137718" litellm.vertex_location = "us-central1" -litellm.failure_callback = ["sentry"] +# litellm.failure_callback = ["sentry"] #### What this tests #### # This tests exception mapping -> trigger an exception from an llm provider -> assert if output is of the expected type diff --git a/litellm/tests/test_get_model_cost_map.py b/litellm/tests/test_get_model_cost_map.py new file mode 100644 index 000000000..e8d751af8 --- /dev/null +++ b/litellm/tests/test_get_model_cost_map.py @@ -0,0 +1,10 @@ +import sys, os +import traceback + +sys.path.insert( + 0, os.path.abspath("../..") +) # Adds the parent directory to the system path +import time +from litellm import get_max_tokens + +print(get_max_tokens("gpt-3.5-turbo")) \ No newline at end of file diff --git a/litellm/utils.py b/litellm/utils.py index 2ca755b94..d611c05ed 100644 --- a/litellm/utils.py +++ b/litellm/utils.py @@ -846,6 +846,13 @@ def get_optional_params( # use the openai defaults return optional_params +def get_max_tokens(model: str): + try: + return litellm.model_cost[model] + except: + raise Exception("This model isn't mapped yet. Add it here - https://raw.githubusercontent.com/BerriAI/litellm/main/cookbook/community-resources/max_tokens.json") + + def load_test_model( model: str, custom_llm_provider: str = "", @@ -1458,6 +1465,7 @@ def exception_type(model, original_exception, custom_llm_provider): raise AuthenticationError( message=f"AnthropicException - {original_exception.message}", llm_provider="anthropic", + model=model ) elif original_exception.status_code == 400: exception_mapping_worked = True @@ -1478,6 +1486,7 @@ def exception_type(model, original_exception, custom_llm_provider): raise RateLimitError( message=f"AnthropicException - {original_exception.message}", llm_provider="anthropic", + model=model ) elif ( "Could not resolve authentication method. Expected either api_key or auth_token to be set." @@ -1487,6 +1496,7 @@ def exception_type(model, original_exception, custom_llm_provider): raise AuthenticationError( message=f"AnthropicException - {original_exception.message}", llm_provider="anthropic", + model=model ) elif "replicate" in model: if "Incorrect authentication token" in error_str: @@ -1494,6 +1504,7 @@ def exception_type(model, original_exception, custom_llm_provider): raise AuthenticationError( message=f"ReplicateException - {error_str}", llm_provider="replicate", + model=model ) elif "input is too long" in error_str: exception_mapping_worked = True @@ -1514,6 +1525,7 @@ def exception_type(model, original_exception, custom_llm_provider): raise RateLimitError( message=f"ReplicateException - {error_str}", llm_provider="replicate", + model=model ) elif ( exception_type == "ReplicateError" @@ -1521,6 +1533,7 @@ def exception_type(model, original_exception, custom_llm_provider): raise ServiceUnavailableError( message=f"ReplicateException - {error_str}", llm_provider="replicate", + model=model ) elif model in litellm.cohere_models: # Cohere if ( @@ -1531,6 +1544,7 @@ def exception_type(model, original_exception, custom_llm_provider): raise AuthenticationError( message=f"CohereException - {original_exception.message}", llm_provider="cohere", + model=model ) elif "too many tokens" in error_str: exception_mapping_worked = True @@ -1546,6 +1560,7 @@ def exception_type(model, original_exception, custom_llm_provider): raise RateLimitError( message=f"CohereException - {original_exception.message}", llm_provider="cohere", + model=model ) elif custom_llm_provider == "huggingface": if "length limit exceeded" in error_str: @@ -1561,6 +1576,7 @@ def exception_type(model, original_exception, custom_llm_provider): raise AuthenticationError( message=f"HuggingfaceException - {original_exception.message}", llm_provider="huggingface", + model=model ) elif original_exception.status_code == 400: exception_mapping_worked = True @@ -1574,6 +1590,7 @@ def exception_type(model, original_exception, custom_llm_provider): raise RateLimitError( message=f"HuggingfaceException - {original_exception.message}", llm_provider="huggingface", + model=model ) elif custom_llm_provider == "ai21": if hasattr(original_exception, "message"): @@ -1590,6 +1607,7 @@ def exception_type(model, original_exception, custom_llm_provider): raise AuthenticationError( message=f"AI21Exception - {original_exception.message}", llm_provider="ai21", + model=model ) if original_exception.status_code == 422: exception_mapping_worked = True @@ -1617,7 +1635,8 @@ def exception_type(model, original_exception, custom_llm_provider): exception_mapping_worked = True raise AuthenticationError( message=f"TogetherAIException - {error_response['error']}", - llm_provider="together_ai" + llm_provider="together_ai", + model=model ) elif "error" in error_response and "INVALID_ARGUMENT" in error_response["error"]: exception_mapping_worked = True @@ -1638,6 +1657,7 @@ def exception_type(model, original_exception, custom_llm_provider): raise RateLimitError( message=f"TogetherAIException - {original_exception.message}", llm_provider="together_ai", + model=model ) raise original_exception # base case - return the original exception else: diff --git a/pyproject.toml b/pyproject.toml index 7a2d0b33b..cf6b93ce8 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "litellm" -version = "0.1.525" +version = "0.1.526" description = "Library to easily interface with LLM API providers" authors = ["BerriAI"] license = "MIT License"