diff --git a/community_resources/max_tokens.json b/community_resources/max_tokens.json new file mode 100644 index 000000000..e61239f2c --- /dev/null +++ b/community_resources/max_tokens.json @@ -0,0 +1,16 @@ +{"MODEL_COST":{ + "gpt-3.5-turbo": {"max_tokens": 4000, "input_cost_per_token": 0.0000015, "output_cost_per_token": 0.000002}, + "gpt-3.5-turbo-0613": {"max_tokens": 4000, "input_cost_per_token": 0.0000015, "output_cost_per_token": 0.000002}, + "gpt-3.5-turbo-0301": {"max_tokens": 4000, "input_cost_per_token": 0.0000015, "output_cost_per_token": 0.000002}, + "gpt-3.5-turbo-16k": {"max_tokens": 16000, "input_cost_per_token": 0.000003, "output_cost_per_token": 0.000004}, + "gpt-3.5-turbo-16k-0613": {"max_tokens": 16000, "input_cost_per_token": 0.000003, "output_cost_per_token": 0.000004}, + "gpt-4": {"max_tokens": 8000, "input_cost_per_token": 0.000003, "output_cost_per_token": 0.00006}, + "gpt-4-0613": {"max_tokens": 8000, "input_cost_per_token": 0.000003, "output_cost_per_token": 0.00006}, + "gpt-4-32k": {"max_tokens": 8000, "input_cost_per_token": 0.00006, "output_cost_per_token": 0.00012}, + "claude-instant-1": {"max_tokens": 100000, "input_cost_per_token": 0.00000163, "output_cost_per_token": 0.00000551}, + "claude-2": {"max_tokens": 100000, "input_cost_per_token": 0.00001102, "output_cost_per_token": 0.00003268}, + "text-bison-001": {"max_tokens": 8192, "input_cost_per_token": 0.000004, "output_cost_per_token": 0.000004}, + "chat-bison-001": {"max_tokens": 4096, "input_cost_per_token": 0.000002, "output_cost_per_token": 0.000002}, + "command-nightly": {"max_tokens": 4096, "input_cost_per_token": 0.000015, "output_cost_per_token": 0.000015}, + "replicate/llama-2-70b-chat:2c1608e18606fad2812020dc541930f2d0495ce32eee50074220b87300bc16e1": {"max_tokens": 4096, "input_cost_per_token": 0.00000608, "output_cost_per_token": 0.00000608}, +}} \ No newline at end of file diff --git a/litellm/__init__.py b/litellm/__init__.py index 785b1d293..3efd14343 100644 --- a/litellm/__init__.py +++ b/litellm/__init__.py @@ -9,6 +9,20 @@ azure_key = None anthropic_key = None replicate_key = None cohere_key = None +MAX_TOKENS = { + 'gpt-3.5-turbo': 4000, + 'gpt-3.5-turbo-0613': 4000, + 'gpt-3.5-turbo-0301': 4000, + 'gpt-3.5-turbo-16k': 16000, + 'gpt-3.5-turbo-16k-0613': 16000, + 'gpt-4': 8000, + 'gpt-4-0613': 8000, + 'gpt-4-32k': 32000, + 'claude-instant-1': 100000, + 'claude-2': 100000, + 'command-nightly': 4096, + 'replicate/llama-2-70b-chat:2c1608e18606fad2812020dc541930f2d0495ce32eee50074220b87300bc16e1': 4096, +} ####### PROXY PARAMS ################### configurable params if you use proxy models like Helicone api_base = None headers = None diff --git a/litellm/__pycache__/__init__.cpython-311.pyc b/litellm/__pycache__/__init__.cpython-311.pyc new file mode 100644 index 000000000..eee4d9926 Binary files /dev/null and b/litellm/__pycache__/__init__.cpython-311.pyc differ diff --git a/litellm/__pycache__/main.cpython-311.pyc b/litellm/__pycache__/main.cpython-311.pyc new file mode 100644 index 000000000..261f8e6ce Binary files /dev/null and b/litellm/__pycache__/main.cpython-311.pyc differ diff --git a/litellm/__pycache__/timeout.cpython-311.pyc b/litellm/__pycache__/timeout.cpython-311.pyc new file mode 100644 index 000000000..c7211e4a7 Binary files /dev/null and b/litellm/__pycache__/timeout.cpython-311.pyc differ diff --git a/litellm/__pycache__/utils.cpython-311.pyc b/litellm/__pycache__/utils.cpython-311.pyc new file mode 100644 index 000000000..f298d2988 Binary files /dev/null and b/litellm/__pycache__/utils.cpython-311.pyc differ diff --git a/litellm/main.py b/litellm/main.py index 0d96073e7..09ac2f329 100644 --- a/litellm/main.py +++ b/litellm/main.py @@ -74,7 +74,7 @@ async def acompletion(*args, **kwargs): return await loop.run_in_executor(None, func) @client -@retry(wait=wait_random_exponential(min=1, max=60), stop=stop_after_attempt(2), reraise=True, retry_error_callback=lambda retry_state: setattr(retry_state.outcome, 'retry_variable', litellm.retry)) # retry call, turn this off by setting `litellm.retry = False` +# @retry(wait=wait_random_exponential(min=1, max=60), stop=stop_after_attempt(2), reraise=True, retry_error_callback=lambda retry_state: setattr(retry_state.outcome, 'retry_variable', litellm.retry)) # retry call, turn this off by setting `litellm.retry = False` @timeout(60) ## set timeouts, in case calls hang (e.g. Azure) - default is 60s, override with `force_timeout` def completion( model, messages, # required params @@ -255,8 +255,8 @@ def completion( elif model in litellm.cohere_models: if api_key: cohere_key = api_key - elif litellm.api_key: - cohere_key = litellm.api_key + elif litellm.cohere_key: + cohere_key = litellm.cohere_key else: cohere_key = os.environ.get("COHERE_API_KEY") co = cohere.Client(cohere_key) @@ -330,6 +330,7 @@ def embedding(model, input=[], azure=False, force_timeout=60, logger_fn=None): logging(model=model, input=input, azure=azure, logger_fn=logger_fn, exception=e) ## Map to OpenAI Exception raise exception_type(model=model, original_exception=e) + raise e ####### HELPER FUNCTIONS ################ ## Set verbose to true -> ```litellm.set_verbose = True``` def print_verbose(print_statement): diff --git a/litellm/tests/__pycache__/test_exceptions.cpython-311-pytest-7.4.0.pyc b/litellm/tests/__pycache__/test_exceptions.cpython-311-pytest-7.4.0.pyc index 50f8b39d3..62f9422f8 100644 Binary files a/litellm/tests/__pycache__/test_exceptions.cpython-311-pytest-7.4.0.pyc and b/litellm/tests/__pycache__/test_exceptions.cpython-311-pytest-7.4.0.pyc differ diff --git a/litellm/tests/test_exceptions.py b/litellm/tests/test_exceptions.py index 5472168f9..6a6902449 100644 --- a/litellm/tests/test_exceptions.py +++ b/litellm/tests/test_exceptions.py @@ -8,6 +8,7 @@ from litellm import embedding, completion from concurrent.futures import ThreadPoolExecutor import pytest +# litellm.set_verbose = True #### What this tests #### # This tests exception mapping -> trigger an exception from an llm provider -> assert if output is of the expected type @@ -19,29 +20,32 @@ import pytest # Approach: Run each model through the test -> assert if the correct error (always the same one) is triggered # models = ["gpt-3.5-turbo", "chatgpt-test", "claude-instant-1", "command-nightly"] - -# # Test 1: Context Window Errors -# @pytest.mark.parametrize("model", models) -# def test_context_window(model): -# sample_text = "how does a court case get to the Supreme Court?" * 100000 -# messages = [{"content": sample_text, "role": "user"}] -# try: -# azure = model == "chatgpt-test" -# print(f"model: {model}") -# response = completion(model=model, messages=messages, azure=azure) -# except InvalidRequestError: -# print("InvalidRequestError") -# return -# except OpenAIError: -# print("OpenAIError") -# return -# except Exception as e: -# print("Uncaught Error in test_context_window") -# # print(f"Error Type: {type(e).__name__}") -# print(f"Uncaught Exception - {e}") -# pytest.fail(f"Error occurred: {e}") -# return - +models = ["command-nightly"] +def logging_fn(model_call_dict): + print(f"model_call_dict: {model_call_dict['model']}") +# Test 1: Context Window Errors +@pytest.mark.parametrize("model", models) +def test_context_window(model): + sample_text = "how does a court case get to the Supreme Court?" * 100000 + messages = [{"content": sample_text, "role": "user"}] + try: + azure = model == "chatgpt-test" + print(f"model: {model}") + response = completion(model=model, messages=messages, azure=azure, logger_fn=logging_fn) + print(f"response: {response}") + except InvalidRequestError: + print("InvalidRequestError") + return + except OpenAIError: + print("OpenAIError") + return + except Exception as e: + print("Uncaught Error in test_context_window") + # print(f"Error Type: {type(e).__name__}") + print(f"Uncaught Exception - {e}") + pytest.fail(f"Error occurred: {e}") + return +test_context_window("command-nightly") # # Test 2: InvalidAuth Errors # def logger_fn(model_call_object: dict): # print(f"model call details: {model_call_object}") @@ -64,7 +68,7 @@ import pytest # os.environ["REPLICATE_API_KEY"] = "bad-key" # os.environ["REPLICATE_API_TOKEN"] = "bad-key" # print(f"model: {model}") -# response = completion(model=model, messages=messages, azure=azure, logger_fn=logger_fn) +# response = completion(model=model, messages=messages, azure=azure) # print(f"response: {response}") # except AuthenticationError as e: # return @@ -75,7 +79,6 @@ import pytest # pytest.fail(f"Error occurred: {e}") # return - # # Test 3: Rate Limit Errors # def test_model(model): # try: diff --git a/setup.py b/setup.py index 492b6e86a..9f54d6452 100644 --- a/setup.py +++ b/setup.py @@ -2,7 +2,7 @@ from setuptools import setup, find_packages setup( name='litellm', - version='0.1.230', + version='0.1.231', description='Library to easily interface with LLM API providers', author='BerriAI', packages=[