update exception mapping and get model cost map

This commit is contained in:
Krrish Dholakia 2023-09-04 11:53:18 -07:00
parent e03d442e8f
commit 73bb1b96e9
9 changed files with 54 additions and 96 deletions

View file

@ -1,4 +1,4 @@
import threading import threading, requests
from typing import Callable, List, Optional, Dict from typing import Callable, List, Optional, Dict
from litellm.caching import Cache from litellm.caching import Cache
@ -35,94 +35,18 @@ caching = False # deprecated son
caching_with_models = False # if you want the caching key to be model + prompt # deprecated soon caching_with_models = False # if you want the caching key to be model + prompt # deprecated soon
cache: Optional[Cache] = None # cache object cache: Optional[Cache] = None # cache object
model_alias_map: Dict[str, str] = {} model_alias_map: Dict[str, str] = {}
model_cost = { def get_model_cost_map():
"babbage-002": { url = "https://raw.githubusercontent.com/BerriAI/litellm/main/cookbook/community-resources/max_tokens.json"
"max_tokens": 16384,
"input_cost_per_token": 0.0000004, try:
"output_cost_per_token": 0.0000004, response = requests.get(url)
}, response.raise_for_status() # Raise an exception if request is unsuccessful
"davinci-002": { content = response.json()
"max_tokens": 16384, return content
"input_cost_per_token": 0.000002, except requests.exceptions.RequestException as e:
"output_cost_per_token": 0.000002, print("Error occurred:", e)
}, return None
"gpt-3.5-turbo": { model_cost = get_model_cost_map()
"max_tokens": 4000,
"input_cost_per_token": 0.0000015,
"output_cost_per_token": 0.000002,
},
"gpt-35-turbo": {
"max_tokens": 4000,
"input_cost_per_token": 0.0000015,
"output_cost_per_token": 0.000002,
}, # azure model name
"gpt-3.5-turbo-0613": {
"max_tokens": 4000,
"input_cost_per_token": 0.0000015,
"output_cost_per_token": 0.000002,
},
"gpt-3.5-turbo-0301": {
"max_tokens": 4000,
"input_cost_per_token": 0.0000015,
"output_cost_per_token": 0.000002,
},
"gpt-3.5-turbo-16k": {
"max_tokens": 16000,
"input_cost_per_token": 0.000003,
"output_cost_per_token": 0.000004,
},
"gpt-35-turbo-16k": {
"max_tokens": 16000,
"input_cost_per_token": 0.000003,
"output_cost_per_token": 0.000004,
}, # azure model name
"gpt-3.5-turbo-16k-0613": {
"max_tokens": 16000,
"input_cost_per_token": 0.000003,
"output_cost_per_token": 0.000004,
},
"gpt-4": {
"max_tokens": 8000,
"input_cost_per_token": 0.000003,
"output_cost_per_token": 0.00006,
},
"gpt-4-0613": {
"max_tokens": 8000,
"input_cost_per_token": 0.000003,
"output_cost_per_token": 0.00006,
},
"gpt-4-32k": {
"max_tokens": 8000,
"input_cost_per_token": 0.00006,
"output_cost_per_token": 0.00012,
},
"claude-instant-1": {
"max_tokens": 100000,
"input_cost_per_token": 0.00000163,
"output_cost_per_token": 0.00000551,
},
"claude-2": {
"max_tokens": 100000,
"input_cost_per_token": 0.00001102,
"output_cost_per_token": 0.00003268,
},
"text-bison-001": {
"max_tokens": 8192,
"input_cost_per_token": 0.000004,
"output_cost_per_token": 0.000004,
},
"chat-bison-001": {
"max_tokens": 4096,
"input_cost_per_token": 0.000002,
"output_cost_per_token": 0.000002,
},
"command-nightly": {
"max_tokens": 4096,
"input_cost_per_token": 0.000015,
"output_cost_per_token": 0.000015,
},
}
####### THREAD-SPECIFIC DATA ################### ####### THREAD-SPECIFIC DATA ###################
class MyLocal(threading.local): class MyLocal(threading.local):
@ -298,7 +222,8 @@ from .utils import (
Logging, Logging,
acreate, acreate,
get_model_list, get_model_list,
completion_with_split_tests completion_with_split_tests,
get_max_tokens
) )
from .main import * # type: ignore from .main import * # type: ignore
from .integrations import * from .integrations import *

View file

@ -9,10 +9,11 @@ from openai.error import (
class AuthenticationError(AuthenticationError): # type: ignore class AuthenticationError(AuthenticationError): # type: ignore
def __init__(self, message, llm_provider): def __init__(self, message, llm_provider, model):
self.status_code = 401 self.status_code = 401
self.message = message self.message = message
self.llm_provider = llm_provider self.llm_provider = llm_provider
self.model = model
super().__init__( super().__init__(
self.message self.message
) # Call the base class constructor with the parameters it needs ) # Call the base class constructor with the parameters it needs
@ -41,20 +42,22 @@ class ContextWindowExceededError(InvalidRequestError): # type: ignore
class RateLimitError(RateLimitError): # type: ignore class RateLimitError(RateLimitError): # type: ignore
def __init__(self, message, llm_provider): def __init__(self, message, llm_provider, model):
self.status_code = 429 self.status_code = 429
self.message = message self.message = message
self.llm_provider = llm_provider self.llm_provider = llm_provider
self.modle = model
super().__init__( super().__init__(
self.message self.message
) # Call the base class constructor with the parameters it needs ) # Call the base class constructor with the parameters it needs
class ServiceUnavailableError(ServiceUnavailableError): # type: ignore class ServiceUnavailableError(ServiceUnavailableError): # type: ignore
def __init__(self, message, llm_provider): def __init__(self, message, llm_provider, model):
self.status_code = 500 self.status_code = 500
self.message = message self.message = message
self.llm_provider = llm_provider self.llm_provider = llm_provider
self.model = model
super().__init__( super().__init__(
self.message self.message
) # Call the base class constructor with the parameters it needs ) # Call the base class constructor with the parameters it needs

View file

@ -22,7 +22,7 @@ import pytest
litellm.vertex_project = "pathrise-convert-1606954137718" litellm.vertex_project = "pathrise-convert-1606954137718"
litellm.vertex_location = "us-central1" litellm.vertex_location = "us-central1"
litellm.failure_callback = ["sentry"] # litellm.failure_callback = ["sentry"]
#### What this tests #### #### What this tests ####
# This tests exception mapping -> trigger an exception from an llm provider -> assert if output is of the expected type # This tests exception mapping -> trigger an exception from an llm provider -> assert if output is of the expected type

View file

@ -0,0 +1,10 @@
import sys, os
import traceback
sys.path.insert(
0, os.path.abspath("../..")
) # Adds the parent directory to the system path
import time
from litellm import get_max_tokens
print(get_max_tokens("gpt-3.5-turbo"))

View file

@ -846,6 +846,13 @@ def get_optional_params( # use the openai defaults
return optional_params return optional_params
def get_max_tokens(model: str):
try:
return litellm.model_cost[model]
except:
raise Exception("This model isn't mapped yet. Add it here - https://raw.githubusercontent.com/BerriAI/litellm/main/cookbook/community-resources/max_tokens.json")
def load_test_model( def load_test_model(
model: str, model: str,
custom_llm_provider: str = "", custom_llm_provider: str = "",
@ -1458,6 +1465,7 @@ def exception_type(model, original_exception, custom_llm_provider):
raise AuthenticationError( raise AuthenticationError(
message=f"AnthropicException - {original_exception.message}", message=f"AnthropicException - {original_exception.message}",
llm_provider="anthropic", llm_provider="anthropic",
model=model
) )
elif original_exception.status_code == 400: elif original_exception.status_code == 400:
exception_mapping_worked = True exception_mapping_worked = True
@ -1478,6 +1486,7 @@ def exception_type(model, original_exception, custom_llm_provider):
raise RateLimitError( raise RateLimitError(
message=f"AnthropicException - {original_exception.message}", message=f"AnthropicException - {original_exception.message}",
llm_provider="anthropic", llm_provider="anthropic",
model=model
) )
elif ( elif (
"Could not resolve authentication method. Expected either api_key or auth_token to be set." "Could not resolve authentication method. Expected either api_key or auth_token to be set."
@ -1487,6 +1496,7 @@ def exception_type(model, original_exception, custom_llm_provider):
raise AuthenticationError( raise AuthenticationError(
message=f"AnthropicException - {original_exception.message}", message=f"AnthropicException - {original_exception.message}",
llm_provider="anthropic", llm_provider="anthropic",
model=model
) )
elif "replicate" in model: elif "replicate" in model:
if "Incorrect authentication token" in error_str: if "Incorrect authentication token" in error_str:
@ -1494,6 +1504,7 @@ def exception_type(model, original_exception, custom_llm_provider):
raise AuthenticationError( raise AuthenticationError(
message=f"ReplicateException - {error_str}", message=f"ReplicateException - {error_str}",
llm_provider="replicate", llm_provider="replicate",
model=model
) )
elif "input is too long" in error_str: elif "input is too long" in error_str:
exception_mapping_worked = True exception_mapping_worked = True
@ -1514,6 +1525,7 @@ def exception_type(model, original_exception, custom_llm_provider):
raise RateLimitError( raise RateLimitError(
message=f"ReplicateException - {error_str}", message=f"ReplicateException - {error_str}",
llm_provider="replicate", llm_provider="replicate",
model=model
) )
elif ( elif (
exception_type == "ReplicateError" exception_type == "ReplicateError"
@ -1521,6 +1533,7 @@ def exception_type(model, original_exception, custom_llm_provider):
raise ServiceUnavailableError( raise ServiceUnavailableError(
message=f"ReplicateException - {error_str}", message=f"ReplicateException - {error_str}",
llm_provider="replicate", llm_provider="replicate",
model=model
) )
elif model in litellm.cohere_models: # Cohere elif model in litellm.cohere_models: # Cohere
if ( if (
@ -1531,6 +1544,7 @@ def exception_type(model, original_exception, custom_llm_provider):
raise AuthenticationError( raise AuthenticationError(
message=f"CohereException - {original_exception.message}", message=f"CohereException - {original_exception.message}",
llm_provider="cohere", llm_provider="cohere",
model=model
) )
elif "too many tokens" in error_str: elif "too many tokens" in error_str:
exception_mapping_worked = True exception_mapping_worked = True
@ -1546,6 +1560,7 @@ def exception_type(model, original_exception, custom_llm_provider):
raise RateLimitError( raise RateLimitError(
message=f"CohereException - {original_exception.message}", message=f"CohereException - {original_exception.message}",
llm_provider="cohere", llm_provider="cohere",
model=model
) )
elif custom_llm_provider == "huggingface": elif custom_llm_provider == "huggingface":
if "length limit exceeded" in error_str: if "length limit exceeded" in error_str:
@ -1561,6 +1576,7 @@ def exception_type(model, original_exception, custom_llm_provider):
raise AuthenticationError( raise AuthenticationError(
message=f"HuggingfaceException - {original_exception.message}", message=f"HuggingfaceException - {original_exception.message}",
llm_provider="huggingface", llm_provider="huggingface",
model=model
) )
elif original_exception.status_code == 400: elif original_exception.status_code == 400:
exception_mapping_worked = True exception_mapping_worked = True
@ -1574,6 +1590,7 @@ def exception_type(model, original_exception, custom_llm_provider):
raise RateLimitError( raise RateLimitError(
message=f"HuggingfaceException - {original_exception.message}", message=f"HuggingfaceException - {original_exception.message}",
llm_provider="huggingface", llm_provider="huggingface",
model=model
) )
elif custom_llm_provider == "ai21": elif custom_llm_provider == "ai21":
if hasattr(original_exception, "message"): if hasattr(original_exception, "message"):
@ -1590,6 +1607,7 @@ def exception_type(model, original_exception, custom_llm_provider):
raise AuthenticationError( raise AuthenticationError(
message=f"AI21Exception - {original_exception.message}", message=f"AI21Exception - {original_exception.message}",
llm_provider="ai21", llm_provider="ai21",
model=model
) )
if original_exception.status_code == 422: if original_exception.status_code == 422:
exception_mapping_worked = True exception_mapping_worked = True
@ -1617,7 +1635,8 @@ def exception_type(model, original_exception, custom_llm_provider):
exception_mapping_worked = True exception_mapping_worked = True
raise AuthenticationError( raise AuthenticationError(
message=f"TogetherAIException - {error_response['error']}", message=f"TogetherAIException - {error_response['error']}",
llm_provider="together_ai" llm_provider="together_ai",
model=model
) )
elif "error" in error_response and "INVALID_ARGUMENT" in error_response["error"]: elif "error" in error_response and "INVALID_ARGUMENT" in error_response["error"]:
exception_mapping_worked = True exception_mapping_worked = True
@ -1638,6 +1657,7 @@ def exception_type(model, original_exception, custom_llm_provider):
raise RateLimitError( raise RateLimitError(
message=f"TogetherAIException - {original_exception.message}", message=f"TogetherAIException - {original_exception.message}",
llm_provider="together_ai", llm_provider="together_ai",
model=model
) )
raise original_exception # base case - return the original exception raise original_exception # base case - return the original exception
else: else:

View file

@ -1,6 +1,6 @@
[tool.poetry] [tool.poetry]
name = "litellm" name = "litellm"
version = "0.1.525" version = "0.1.526"
description = "Library to easily interface with LLM API providers" description = "Library to easily interface with LLM API providers"
authors = ["BerriAI"] authors = ["BerriAI"]
license = "MIT License" license = "MIT License"