mirror of
https://github.com/BerriAI/litellm.git
synced 2025-04-25 10:44:24 +00:00
update exception mapping and get model cost map
This commit is contained in:
parent
e03d442e8f
commit
73bb1b96e9
9 changed files with 54 additions and 96 deletions
|
@ -1,4 +1,4 @@
|
||||||
import threading
|
import threading, requests
|
||||||
from typing import Callable, List, Optional, Dict
|
from typing import Callable, List, Optional, Dict
|
||||||
from litellm.caching import Cache
|
from litellm.caching import Cache
|
||||||
|
|
||||||
|
@ -35,94 +35,18 @@ caching = False # deprecated son
|
||||||
caching_with_models = False # if you want the caching key to be model + prompt # deprecated soon
|
caching_with_models = False # if you want the caching key to be model + prompt # deprecated soon
|
||||||
cache: Optional[Cache] = None # cache object
|
cache: Optional[Cache] = None # cache object
|
||||||
model_alias_map: Dict[str, str] = {}
|
model_alias_map: Dict[str, str] = {}
|
||||||
model_cost = {
|
def get_model_cost_map():
|
||||||
"babbage-002": {
|
url = "https://raw.githubusercontent.com/BerriAI/litellm/main/cookbook/community-resources/max_tokens.json"
|
||||||
"max_tokens": 16384,
|
|
||||||
"input_cost_per_token": 0.0000004,
|
try:
|
||||||
"output_cost_per_token": 0.0000004,
|
response = requests.get(url)
|
||||||
},
|
response.raise_for_status() # Raise an exception if request is unsuccessful
|
||||||
"davinci-002": {
|
content = response.json()
|
||||||
"max_tokens": 16384,
|
return content
|
||||||
"input_cost_per_token": 0.000002,
|
except requests.exceptions.RequestException as e:
|
||||||
"output_cost_per_token": 0.000002,
|
print("Error occurred:", e)
|
||||||
},
|
return None
|
||||||
"gpt-3.5-turbo": {
|
model_cost = get_model_cost_map()
|
||||||
"max_tokens": 4000,
|
|
||||||
"input_cost_per_token": 0.0000015,
|
|
||||||
"output_cost_per_token": 0.000002,
|
|
||||||
},
|
|
||||||
"gpt-35-turbo": {
|
|
||||||
"max_tokens": 4000,
|
|
||||||
"input_cost_per_token": 0.0000015,
|
|
||||||
"output_cost_per_token": 0.000002,
|
|
||||||
}, # azure model name
|
|
||||||
"gpt-3.5-turbo-0613": {
|
|
||||||
"max_tokens": 4000,
|
|
||||||
"input_cost_per_token": 0.0000015,
|
|
||||||
"output_cost_per_token": 0.000002,
|
|
||||||
},
|
|
||||||
"gpt-3.5-turbo-0301": {
|
|
||||||
"max_tokens": 4000,
|
|
||||||
"input_cost_per_token": 0.0000015,
|
|
||||||
"output_cost_per_token": 0.000002,
|
|
||||||
},
|
|
||||||
"gpt-3.5-turbo-16k": {
|
|
||||||
"max_tokens": 16000,
|
|
||||||
"input_cost_per_token": 0.000003,
|
|
||||||
"output_cost_per_token": 0.000004,
|
|
||||||
},
|
|
||||||
"gpt-35-turbo-16k": {
|
|
||||||
"max_tokens": 16000,
|
|
||||||
"input_cost_per_token": 0.000003,
|
|
||||||
"output_cost_per_token": 0.000004,
|
|
||||||
}, # azure model name
|
|
||||||
"gpt-3.5-turbo-16k-0613": {
|
|
||||||
"max_tokens": 16000,
|
|
||||||
"input_cost_per_token": 0.000003,
|
|
||||||
"output_cost_per_token": 0.000004,
|
|
||||||
},
|
|
||||||
"gpt-4": {
|
|
||||||
"max_tokens": 8000,
|
|
||||||
"input_cost_per_token": 0.000003,
|
|
||||||
"output_cost_per_token": 0.00006,
|
|
||||||
},
|
|
||||||
"gpt-4-0613": {
|
|
||||||
"max_tokens": 8000,
|
|
||||||
"input_cost_per_token": 0.000003,
|
|
||||||
"output_cost_per_token": 0.00006,
|
|
||||||
},
|
|
||||||
"gpt-4-32k": {
|
|
||||||
"max_tokens": 8000,
|
|
||||||
"input_cost_per_token": 0.00006,
|
|
||||||
"output_cost_per_token": 0.00012,
|
|
||||||
},
|
|
||||||
"claude-instant-1": {
|
|
||||||
"max_tokens": 100000,
|
|
||||||
"input_cost_per_token": 0.00000163,
|
|
||||||
"output_cost_per_token": 0.00000551,
|
|
||||||
},
|
|
||||||
"claude-2": {
|
|
||||||
"max_tokens": 100000,
|
|
||||||
"input_cost_per_token": 0.00001102,
|
|
||||||
"output_cost_per_token": 0.00003268,
|
|
||||||
},
|
|
||||||
"text-bison-001": {
|
|
||||||
"max_tokens": 8192,
|
|
||||||
"input_cost_per_token": 0.000004,
|
|
||||||
"output_cost_per_token": 0.000004,
|
|
||||||
},
|
|
||||||
"chat-bison-001": {
|
|
||||||
"max_tokens": 4096,
|
|
||||||
"input_cost_per_token": 0.000002,
|
|
||||||
"output_cost_per_token": 0.000002,
|
|
||||||
},
|
|
||||||
"command-nightly": {
|
|
||||||
"max_tokens": 4096,
|
|
||||||
"input_cost_per_token": 0.000015,
|
|
||||||
"output_cost_per_token": 0.000015,
|
|
||||||
},
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
####### THREAD-SPECIFIC DATA ###################
|
####### THREAD-SPECIFIC DATA ###################
|
||||||
class MyLocal(threading.local):
|
class MyLocal(threading.local):
|
||||||
|
@ -298,7 +222,8 @@ from .utils import (
|
||||||
Logging,
|
Logging,
|
||||||
acreate,
|
acreate,
|
||||||
get_model_list,
|
get_model_list,
|
||||||
completion_with_split_tests
|
completion_with_split_tests,
|
||||||
|
get_max_tokens
|
||||||
)
|
)
|
||||||
from .main import * # type: ignore
|
from .main import * # type: ignore
|
||||||
from .integrations import *
|
from .integrations import *
|
||||||
|
|
Binary file not shown.
Binary file not shown.
Binary file not shown.
|
@ -9,10 +9,11 @@ from openai.error import (
|
||||||
|
|
||||||
|
|
||||||
class AuthenticationError(AuthenticationError): # type: ignore
|
class AuthenticationError(AuthenticationError): # type: ignore
|
||||||
def __init__(self, message, llm_provider):
|
def __init__(self, message, llm_provider, model):
|
||||||
self.status_code = 401
|
self.status_code = 401
|
||||||
self.message = message
|
self.message = message
|
||||||
self.llm_provider = llm_provider
|
self.llm_provider = llm_provider
|
||||||
|
self.model = model
|
||||||
super().__init__(
|
super().__init__(
|
||||||
self.message
|
self.message
|
||||||
) # Call the base class constructor with the parameters it needs
|
) # Call the base class constructor with the parameters it needs
|
||||||
|
@ -41,20 +42,22 @@ class ContextWindowExceededError(InvalidRequestError): # type: ignore
|
||||||
|
|
||||||
|
|
||||||
class RateLimitError(RateLimitError): # type: ignore
|
class RateLimitError(RateLimitError): # type: ignore
|
||||||
def __init__(self, message, llm_provider):
|
def __init__(self, message, llm_provider, model):
|
||||||
self.status_code = 429
|
self.status_code = 429
|
||||||
self.message = message
|
self.message = message
|
||||||
self.llm_provider = llm_provider
|
self.llm_provider = llm_provider
|
||||||
|
self.modle = model
|
||||||
super().__init__(
|
super().__init__(
|
||||||
self.message
|
self.message
|
||||||
) # Call the base class constructor with the parameters it needs
|
) # Call the base class constructor with the parameters it needs
|
||||||
|
|
||||||
|
|
||||||
class ServiceUnavailableError(ServiceUnavailableError): # type: ignore
|
class ServiceUnavailableError(ServiceUnavailableError): # type: ignore
|
||||||
def __init__(self, message, llm_provider):
|
def __init__(self, message, llm_provider, model):
|
||||||
self.status_code = 500
|
self.status_code = 500
|
||||||
self.message = message
|
self.message = message
|
||||||
self.llm_provider = llm_provider
|
self.llm_provider = llm_provider
|
||||||
|
self.model = model
|
||||||
super().__init__(
|
super().__init__(
|
||||||
self.message
|
self.message
|
||||||
) # Call the base class constructor with the parameters it needs
|
) # Call the base class constructor with the parameters it needs
|
||||||
|
|
|
@ -22,7 +22,7 @@ import pytest
|
||||||
litellm.vertex_project = "pathrise-convert-1606954137718"
|
litellm.vertex_project = "pathrise-convert-1606954137718"
|
||||||
litellm.vertex_location = "us-central1"
|
litellm.vertex_location = "us-central1"
|
||||||
|
|
||||||
litellm.failure_callback = ["sentry"]
|
# litellm.failure_callback = ["sentry"]
|
||||||
#### What this tests ####
|
#### What this tests ####
|
||||||
# This tests exception mapping -> trigger an exception from an llm provider -> assert if output is of the expected type
|
# This tests exception mapping -> trigger an exception from an llm provider -> assert if output is of the expected type
|
||||||
|
|
||||||
|
|
10
litellm/tests/test_get_model_cost_map.py
Normal file
10
litellm/tests/test_get_model_cost_map.py
Normal file
|
@ -0,0 +1,10 @@
|
||||||
|
import sys, os
|
||||||
|
import traceback
|
||||||
|
|
||||||
|
sys.path.insert(
|
||||||
|
0, os.path.abspath("../..")
|
||||||
|
) # Adds the parent directory to the system path
|
||||||
|
import time
|
||||||
|
from litellm import get_max_tokens
|
||||||
|
|
||||||
|
print(get_max_tokens("gpt-3.5-turbo"))
|
|
@ -846,6 +846,13 @@ def get_optional_params( # use the openai defaults
|
||||||
return optional_params
|
return optional_params
|
||||||
|
|
||||||
|
|
||||||
|
def get_max_tokens(model: str):
|
||||||
|
try:
|
||||||
|
return litellm.model_cost[model]
|
||||||
|
except:
|
||||||
|
raise Exception("This model isn't mapped yet. Add it here - https://raw.githubusercontent.com/BerriAI/litellm/main/cookbook/community-resources/max_tokens.json")
|
||||||
|
|
||||||
|
|
||||||
def load_test_model(
|
def load_test_model(
|
||||||
model: str,
|
model: str,
|
||||||
custom_llm_provider: str = "",
|
custom_llm_provider: str = "",
|
||||||
|
@ -1458,6 +1465,7 @@ def exception_type(model, original_exception, custom_llm_provider):
|
||||||
raise AuthenticationError(
|
raise AuthenticationError(
|
||||||
message=f"AnthropicException - {original_exception.message}",
|
message=f"AnthropicException - {original_exception.message}",
|
||||||
llm_provider="anthropic",
|
llm_provider="anthropic",
|
||||||
|
model=model
|
||||||
)
|
)
|
||||||
elif original_exception.status_code == 400:
|
elif original_exception.status_code == 400:
|
||||||
exception_mapping_worked = True
|
exception_mapping_worked = True
|
||||||
|
@ -1478,6 +1486,7 @@ def exception_type(model, original_exception, custom_llm_provider):
|
||||||
raise RateLimitError(
|
raise RateLimitError(
|
||||||
message=f"AnthropicException - {original_exception.message}",
|
message=f"AnthropicException - {original_exception.message}",
|
||||||
llm_provider="anthropic",
|
llm_provider="anthropic",
|
||||||
|
model=model
|
||||||
)
|
)
|
||||||
elif (
|
elif (
|
||||||
"Could not resolve authentication method. Expected either api_key or auth_token to be set."
|
"Could not resolve authentication method. Expected either api_key or auth_token to be set."
|
||||||
|
@ -1487,6 +1496,7 @@ def exception_type(model, original_exception, custom_llm_provider):
|
||||||
raise AuthenticationError(
|
raise AuthenticationError(
|
||||||
message=f"AnthropicException - {original_exception.message}",
|
message=f"AnthropicException - {original_exception.message}",
|
||||||
llm_provider="anthropic",
|
llm_provider="anthropic",
|
||||||
|
model=model
|
||||||
)
|
)
|
||||||
elif "replicate" in model:
|
elif "replicate" in model:
|
||||||
if "Incorrect authentication token" in error_str:
|
if "Incorrect authentication token" in error_str:
|
||||||
|
@ -1494,6 +1504,7 @@ def exception_type(model, original_exception, custom_llm_provider):
|
||||||
raise AuthenticationError(
|
raise AuthenticationError(
|
||||||
message=f"ReplicateException - {error_str}",
|
message=f"ReplicateException - {error_str}",
|
||||||
llm_provider="replicate",
|
llm_provider="replicate",
|
||||||
|
model=model
|
||||||
)
|
)
|
||||||
elif "input is too long" in error_str:
|
elif "input is too long" in error_str:
|
||||||
exception_mapping_worked = True
|
exception_mapping_worked = True
|
||||||
|
@ -1514,6 +1525,7 @@ def exception_type(model, original_exception, custom_llm_provider):
|
||||||
raise RateLimitError(
|
raise RateLimitError(
|
||||||
message=f"ReplicateException - {error_str}",
|
message=f"ReplicateException - {error_str}",
|
||||||
llm_provider="replicate",
|
llm_provider="replicate",
|
||||||
|
model=model
|
||||||
)
|
)
|
||||||
elif (
|
elif (
|
||||||
exception_type == "ReplicateError"
|
exception_type == "ReplicateError"
|
||||||
|
@ -1521,6 +1533,7 @@ def exception_type(model, original_exception, custom_llm_provider):
|
||||||
raise ServiceUnavailableError(
|
raise ServiceUnavailableError(
|
||||||
message=f"ReplicateException - {error_str}",
|
message=f"ReplicateException - {error_str}",
|
||||||
llm_provider="replicate",
|
llm_provider="replicate",
|
||||||
|
model=model
|
||||||
)
|
)
|
||||||
elif model in litellm.cohere_models: # Cohere
|
elif model in litellm.cohere_models: # Cohere
|
||||||
if (
|
if (
|
||||||
|
@ -1531,6 +1544,7 @@ def exception_type(model, original_exception, custom_llm_provider):
|
||||||
raise AuthenticationError(
|
raise AuthenticationError(
|
||||||
message=f"CohereException - {original_exception.message}",
|
message=f"CohereException - {original_exception.message}",
|
||||||
llm_provider="cohere",
|
llm_provider="cohere",
|
||||||
|
model=model
|
||||||
)
|
)
|
||||||
elif "too many tokens" in error_str:
|
elif "too many tokens" in error_str:
|
||||||
exception_mapping_worked = True
|
exception_mapping_worked = True
|
||||||
|
@ -1546,6 +1560,7 @@ def exception_type(model, original_exception, custom_llm_provider):
|
||||||
raise RateLimitError(
|
raise RateLimitError(
|
||||||
message=f"CohereException - {original_exception.message}",
|
message=f"CohereException - {original_exception.message}",
|
||||||
llm_provider="cohere",
|
llm_provider="cohere",
|
||||||
|
model=model
|
||||||
)
|
)
|
||||||
elif custom_llm_provider == "huggingface":
|
elif custom_llm_provider == "huggingface":
|
||||||
if "length limit exceeded" in error_str:
|
if "length limit exceeded" in error_str:
|
||||||
|
@ -1561,6 +1576,7 @@ def exception_type(model, original_exception, custom_llm_provider):
|
||||||
raise AuthenticationError(
|
raise AuthenticationError(
|
||||||
message=f"HuggingfaceException - {original_exception.message}",
|
message=f"HuggingfaceException - {original_exception.message}",
|
||||||
llm_provider="huggingface",
|
llm_provider="huggingface",
|
||||||
|
model=model
|
||||||
)
|
)
|
||||||
elif original_exception.status_code == 400:
|
elif original_exception.status_code == 400:
|
||||||
exception_mapping_worked = True
|
exception_mapping_worked = True
|
||||||
|
@ -1574,6 +1590,7 @@ def exception_type(model, original_exception, custom_llm_provider):
|
||||||
raise RateLimitError(
|
raise RateLimitError(
|
||||||
message=f"HuggingfaceException - {original_exception.message}",
|
message=f"HuggingfaceException - {original_exception.message}",
|
||||||
llm_provider="huggingface",
|
llm_provider="huggingface",
|
||||||
|
model=model
|
||||||
)
|
)
|
||||||
elif custom_llm_provider == "ai21":
|
elif custom_llm_provider == "ai21":
|
||||||
if hasattr(original_exception, "message"):
|
if hasattr(original_exception, "message"):
|
||||||
|
@ -1590,6 +1607,7 @@ def exception_type(model, original_exception, custom_llm_provider):
|
||||||
raise AuthenticationError(
|
raise AuthenticationError(
|
||||||
message=f"AI21Exception - {original_exception.message}",
|
message=f"AI21Exception - {original_exception.message}",
|
||||||
llm_provider="ai21",
|
llm_provider="ai21",
|
||||||
|
model=model
|
||||||
)
|
)
|
||||||
if original_exception.status_code == 422:
|
if original_exception.status_code == 422:
|
||||||
exception_mapping_worked = True
|
exception_mapping_worked = True
|
||||||
|
@ -1617,7 +1635,8 @@ def exception_type(model, original_exception, custom_llm_provider):
|
||||||
exception_mapping_worked = True
|
exception_mapping_worked = True
|
||||||
raise AuthenticationError(
|
raise AuthenticationError(
|
||||||
message=f"TogetherAIException - {error_response['error']}",
|
message=f"TogetherAIException - {error_response['error']}",
|
||||||
llm_provider="together_ai"
|
llm_provider="together_ai",
|
||||||
|
model=model
|
||||||
)
|
)
|
||||||
elif "error" in error_response and "INVALID_ARGUMENT" in error_response["error"]:
|
elif "error" in error_response and "INVALID_ARGUMENT" in error_response["error"]:
|
||||||
exception_mapping_worked = True
|
exception_mapping_worked = True
|
||||||
|
@ -1638,6 +1657,7 @@ def exception_type(model, original_exception, custom_llm_provider):
|
||||||
raise RateLimitError(
|
raise RateLimitError(
|
||||||
message=f"TogetherAIException - {original_exception.message}",
|
message=f"TogetherAIException - {original_exception.message}",
|
||||||
llm_provider="together_ai",
|
llm_provider="together_ai",
|
||||||
|
model=model
|
||||||
)
|
)
|
||||||
raise original_exception # base case - return the original exception
|
raise original_exception # base case - return the original exception
|
||||||
else:
|
else:
|
||||||
|
|
|
@ -1,6 +1,6 @@
|
||||||
[tool.poetry]
|
[tool.poetry]
|
||||||
name = "litellm"
|
name = "litellm"
|
||||||
version = "0.1.525"
|
version = "0.1.526"
|
||||||
description = "Library to easily interface with LLM API providers"
|
description = "Library to easily interface with LLM API providers"
|
||||||
authors = ["BerriAI"]
|
authors = ["BerriAI"]
|
||||||
license = "MIT License"
|
license = "MIT License"
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue