forked from phoenix/litellm-mirror
add community resources
This commit is contained in:
parent
499d626c76
commit
50c2c48cc9
10 changed files with 63 additions and 29 deletions
16
community_resources/max_tokens.json
Normal file
16
community_resources/max_tokens.json
Normal file
|
@ -0,0 +1,16 @@
|
||||||
|
{"MODEL_COST":{
|
||||||
|
"gpt-3.5-turbo": {"max_tokens": 4000, "input_cost_per_token": 0.0000015, "output_cost_per_token": 0.000002},
|
||||||
|
"gpt-3.5-turbo-0613": {"max_tokens": 4000, "input_cost_per_token": 0.0000015, "output_cost_per_token": 0.000002},
|
||||||
|
"gpt-3.5-turbo-0301": {"max_tokens": 4000, "input_cost_per_token": 0.0000015, "output_cost_per_token": 0.000002},
|
||||||
|
"gpt-3.5-turbo-16k": {"max_tokens": 16000, "input_cost_per_token": 0.000003, "output_cost_per_token": 0.000004},
|
||||||
|
"gpt-3.5-turbo-16k-0613": {"max_tokens": 16000, "input_cost_per_token": 0.000003, "output_cost_per_token": 0.000004},
|
||||||
|
"gpt-4": {"max_tokens": 8000, "input_cost_per_token": 0.000003, "output_cost_per_token": 0.00006},
|
||||||
|
"gpt-4-0613": {"max_tokens": 8000, "input_cost_per_token": 0.000003, "output_cost_per_token": 0.00006},
|
||||||
|
"gpt-4-32k": {"max_tokens": 8000, "input_cost_per_token": 0.00006, "output_cost_per_token": 0.00012},
|
||||||
|
"claude-instant-1": {"max_tokens": 100000, "input_cost_per_token": 0.00000163, "output_cost_per_token": 0.00000551},
|
||||||
|
"claude-2": {"max_tokens": 100000, "input_cost_per_token": 0.00001102, "output_cost_per_token": 0.00003268},
|
||||||
|
"text-bison-001": {"max_tokens": 8192, "input_cost_per_token": 0.000004, "output_cost_per_token": 0.000004},
|
||||||
|
"chat-bison-001": {"max_tokens": 4096, "input_cost_per_token": 0.000002, "output_cost_per_token": 0.000002},
|
||||||
|
"command-nightly": {"max_tokens": 4096, "input_cost_per_token": 0.000015, "output_cost_per_token": 0.000015},
|
||||||
|
"replicate/llama-2-70b-chat:2c1608e18606fad2812020dc541930f2d0495ce32eee50074220b87300bc16e1": {"max_tokens": 4096, "input_cost_per_token": 0.00000608, "output_cost_per_token": 0.00000608},
|
||||||
|
}}
|
|
@ -9,6 +9,20 @@ azure_key = None
|
||||||
anthropic_key = None
|
anthropic_key = None
|
||||||
replicate_key = None
|
replicate_key = None
|
||||||
cohere_key = None
|
cohere_key = None
|
||||||
|
MAX_TOKENS = {
|
||||||
|
'gpt-3.5-turbo': 4000,
|
||||||
|
'gpt-3.5-turbo-0613': 4000,
|
||||||
|
'gpt-3.5-turbo-0301': 4000,
|
||||||
|
'gpt-3.5-turbo-16k': 16000,
|
||||||
|
'gpt-3.5-turbo-16k-0613': 16000,
|
||||||
|
'gpt-4': 8000,
|
||||||
|
'gpt-4-0613': 8000,
|
||||||
|
'gpt-4-32k': 32000,
|
||||||
|
'claude-instant-1': 100000,
|
||||||
|
'claude-2': 100000,
|
||||||
|
'command-nightly': 4096,
|
||||||
|
'replicate/llama-2-70b-chat:2c1608e18606fad2812020dc541930f2d0495ce32eee50074220b87300bc16e1': 4096,
|
||||||
|
}
|
||||||
####### PROXY PARAMS ################### configurable params if you use proxy models like Helicone
|
####### PROXY PARAMS ################### configurable params if you use proxy models like Helicone
|
||||||
api_base = None
|
api_base = None
|
||||||
headers = None
|
headers = None
|
||||||
|
|
BIN
litellm/__pycache__/__init__.cpython-311.pyc
Normal file
BIN
litellm/__pycache__/__init__.cpython-311.pyc
Normal file
Binary file not shown.
BIN
litellm/__pycache__/main.cpython-311.pyc
Normal file
BIN
litellm/__pycache__/main.cpython-311.pyc
Normal file
Binary file not shown.
BIN
litellm/__pycache__/timeout.cpython-311.pyc
Normal file
BIN
litellm/__pycache__/timeout.cpython-311.pyc
Normal file
Binary file not shown.
BIN
litellm/__pycache__/utils.cpython-311.pyc
Normal file
BIN
litellm/__pycache__/utils.cpython-311.pyc
Normal file
Binary file not shown.
|
@ -74,7 +74,7 @@ async def acompletion(*args, **kwargs):
|
||||||
return await loop.run_in_executor(None, func)
|
return await loop.run_in_executor(None, func)
|
||||||
|
|
||||||
@client
|
@client
|
||||||
@retry(wait=wait_random_exponential(min=1, max=60), stop=stop_after_attempt(2), reraise=True, retry_error_callback=lambda retry_state: setattr(retry_state.outcome, 'retry_variable', litellm.retry)) # retry call, turn this off by setting `litellm.retry = False`
|
# @retry(wait=wait_random_exponential(min=1, max=60), stop=stop_after_attempt(2), reraise=True, retry_error_callback=lambda retry_state: setattr(retry_state.outcome, 'retry_variable', litellm.retry)) # retry call, turn this off by setting `litellm.retry = False`
|
||||||
@timeout(60) ## set timeouts, in case calls hang (e.g. Azure) - default is 60s, override with `force_timeout`
|
@timeout(60) ## set timeouts, in case calls hang (e.g. Azure) - default is 60s, override with `force_timeout`
|
||||||
def completion(
|
def completion(
|
||||||
model, messages, # required params
|
model, messages, # required params
|
||||||
|
@ -255,8 +255,8 @@ def completion(
|
||||||
elif model in litellm.cohere_models:
|
elif model in litellm.cohere_models:
|
||||||
if api_key:
|
if api_key:
|
||||||
cohere_key = api_key
|
cohere_key = api_key
|
||||||
elif litellm.api_key:
|
elif litellm.cohere_key:
|
||||||
cohere_key = litellm.api_key
|
cohere_key = litellm.cohere_key
|
||||||
else:
|
else:
|
||||||
cohere_key = os.environ.get("COHERE_API_KEY")
|
cohere_key = os.environ.get("COHERE_API_KEY")
|
||||||
co = cohere.Client(cohere_key)
|
co = cohere.Client(cohere_key)
|
||||||
|
@ -330,6 +330,7 @@ def embedding(model, input=[], azure=False, force_timeout=60, logger_fn=None):
|
||||||
logging(model=model, input=input, azure=azure, logger_fn=logger_fn, exception=e)
|
logging(model=model, input=input, azure=azure, logger_fn=logger_fn, exception=e)
|
||||||
## Map to OpenAI Exception
|
## Map to OpenAI Exception
|
||||||
raise exception_type(model=model, original_exception=e)
|
raise exception_type(model=model, original_exception=e)
|
||||||
|
raise e
|
||||||
####### HELPER FUNCTIONS ################
|
####### HELPER FUNCTIONS ################
|
||||||
## Set verbose to true -> ```litellm.set_verbose = True```
|
## Set verbose to true -> ```litellm.set_verbose = True```
|
||||||
def print_verbose(print_statement):
|
def print_verbose(print_statement):
|
||||||
|
|
Binary file not shown.
|
@ -8,6 +8,7 @@ from litellm import embedding, completion
|
||||||
from concurrent.futures import ThreadPoolExecutor
|
from concurrent.futures import ThreadPoolExecutor
|
||||||
import pytest
|
import pytest
|
||||||
|
|
||||||
|
# litellm.set_verbose = True
|
||||||
#### What this tests ####
|
#### What this tests ####
|
||||||
# This tests exception mapping -> trigger an exception from an llm provider -> assert if output is of the expected type
|
# This tests exception mapping -> trigger an exception from an llm provider -> assert if output is of the expected type
|
||||||
|
|
||||||
|
@ -19,29 +20,32 @@ import pytest
|
||||||
# Approach: Run each model through the test -> assert if the correct error (always the same one) is triggered
|
# Approach: Run each model through the test -> assert if the correct error (always the same one) is triggered
|
||||||
|
|
||||||
# models = ["gpt-3.5-turbo", "chatgpt-test", "claude-instant-1", "command-nightly"]
|
# models = ["gpt-3.5-turbo", "chatgpt-test", "claude-instant-1", "command-nightly"]
|
||||||
|
models = ["command-nightly"]
|
||||||
# # Test 1: Context Window Errors
|
def logging_fn(model_call_dict):
|
||||||
# @pytest.mark.parametrize("model", models)
|
print(f"model_call_dict: {model_call_dict['model']}")
|
||||||
# def test_context_window(model):
|
# Test 1: Context Window Errors
|
||||||
# sample_text = "how does a court case get to the Supreme Court?" * 100000
|
@pytest.mark.parametrize("model", models)
|
||||||
# messages = [{"content": sample_text, "role": "user"}]
|
def test_context_window(model):
|
||||||
# try:
|
sample_text = "how does a court case get to the Supreme Court?" * 100000
|
||||||
# azure = model == "chatgpt-test"
|
messages = [{"content": sample_text, "role": "user"}]
|
||||||
# print(f"model: {model}")
|
try:
|
||||||
# response = completion(model=model, messages=messages, azure=azure)
|
azure = model == "chatgpt-test"
|
||||||
# except InvalidRequestError:
|
print(f"model: {model}")
|
||||||
# print("InvalidRequestError")
|
response = completion(model=model, messages=messages, azure=azure, logger_fn=logging_fn)
|
||||||
# return
|
print(f"response: {response}")
|
||||||
# except OpenAIError:
|
except InvalidRequestError:
|
||||||
# print("OpenAIError")
|
print("InvalidRequestError")
|
||||||
# return
|
return
|
||||||
# except Exception as e:
|
except OpenAIError:
|
||||||
# print("Uncaught Error in test_context_window")
|
print("OpenAIError")
|
||||||
# # print(f"Error Type: {type(e).__name__}")
|
return
|
||||||
# print(f"Uncaught Exception - {e}")
|
except Exception as e:
|
||||||
# pytest.fail(f"Error occurred: {e}")
|
print("Uncaught Error in test_context_window")
|
||||||
# return
|
# print(f"Error Type: {type(e).__name__}")
|
||||||
|
print(f"Uncaught Exception - {e}")
|
||||||
|
pytest.fail(f"Error occurred: {e}")
|
||||||
|
return
|
||||||
|
test_context_window("command-nightly")
|
||||||
# # Test 2: InvalidAuth Errors
|
# # Test 2: InvalidAuth Errors
|
||||||
# def logger_fn(model_call_object: dict):
|
# def logger_fn(model_call_object: dict):
|
||||||
# print(f"model call details: {model_call_object}")
|
# print(f"model call details: {model_call_object}")
|
||||||
|
@ -64,7 +68,7 @@ import pytest
|
||||||
# os.environ["REPLICATE_API_KEY"] = "bad-key"
|
# os.environ["REPLICATE_API_KEY"] = "bad-key"
|
||||||
# os.environ["REPLICATE_API_TOKEN"] = "bad-key"
|
# os.environ["REPLICATE_API_TOKEN"] = "bad-key"
|
||||||
# print(f"model: {model}")
|
# print(f"model: {model}")
|
||||||
# response = completion(model=model, messages=messages, azure=azure, logger_fn=logger_fn)
|
# response = completion(model=model, messages=messages, azure=azure)
|
||||||
# print(f"response: {response}")
|
# print(f"response: {response}")
|
||||||
# except AuthenticationError as e:
|
# except AuthenticationError as e:
|
||||||
# return
|
# return
|
||||||
|
@ -75,7 +79,6 @@ import pytest
|
||||||
# pytest.fail(f"Error occurred: {e}")
|
# pytest.fail(f"Error occurred: {e}")
|
||||||
# return
|
# return
|
||||||
|
|
||||||
|
|
||||||
# # Test 3: Rate Limit Errors
|
# # Test 3: Rate Limit Errors
|
||||||
# def test_model(model):
|
# def test_model(model):
|
||||||
# try:
|
# try:
|
||||||
|
|
2
setup.py
2
setup.py
|
@ -2,7 +2,7 @@ from setuptools import setup, find_packages
|
||||||
|
|
||||||
setup(
|
setup(
|
||||||
name='litellm',
|
name='litellm',
|
||||||
version='0.1.230',
|
version='0.1.231',
|
||||||
description='Library to easily interface with LLM API providers',
|
description='Library to easily interface with LLM API providers',
|
||||||
author='BerriAI',
|
author='BerriAI',
|
||||||
packages=[
|
packages=[
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue