From 26e68c3f673ae59c450273a3616895f15da2c4fd Mon Sep 17 00:00:00 2001 From: David Manouchehri Date: Mon, 5 Feb 2024 15:12:42 -0500 Subject: [PATCH 01/33] (feat) Add sessionId for Langfuse. --- litellm/integrations/langfuse.py | 1 + 1 file changed, 1 insertion(+) diff --git a/litellm/integrations/langfuse.py b/litellm/integrations/langfuse.py index cd37a93a3..e62dccdc4 100644 --- a/litellm/integrations/langfuse.py +++ b/litellm/integrations/langfuse.py @@ -214,6 +214,7 @@ class LangFuseLogger: "output": output, "user_id": metadata.get("trace_user_id", user_id), "id": metadata.get("trace_id", None), + "session_id": metadata.get("session_id", None), } cost = kwargs["response_cost"] print_verbose(f"trace: {cost}") From c77deb12b3edcd0f1bf405aa8c5268a9a5a1d811 Mon Sep 17 00:00:00 2001 From: David Manouchehri Date: Mon, 5 Feb 2024 15:21:05 -0500 Subject: [PATCH 02/33] (docs) Add session_id to Langfuse doc --- docs/my-website/docs/observability/langfuse_integration.md | 1 + 1 file changed, 1 insertion(+) diff --git a/docs/my-website/docs/observability/langfuse_integration.md b/docs/my-website/docs/observability/langfuse_integration.md index 3de426ec3..ec8f3a029 100644 --- a/docs/my-website/docs/observability/langfuse_integration.md +++ b/docs/my-website/docs/observability/langfuse_integration.md @@ -122,6 +122,7 @@ response = completion( "generation_id": "gen-id22", # set langfuse Generation ID "trace_id": "trace-id22", # set langfuse Trace ID "trace_user_id": "user-id2", # set langfuse Trace User ID + "session_id": "session-1", # set langfuse Session ID }, ) From 9cbc412c78e3d2bb549c3714343bb5391e545da4 Mon Sep 17 00:00:00 2001 From: ishaan-jaff Date: Mon, 5 Feb 2024 14:36:07 -0800 Subject: [PATCH 03/33] (fix) fix backup.json --- ...odel_prices_and_context_window_backup.json | 1164 +++++++++++++++-- 1 file changed, 1069 insertions(+), 95 deletions(-) diff --git a/litellm/model_prices_and_context_window_backup.json b/litellm/model_prices_and_context_window_backup.json index 454b2504a..b6ded001c 100644 --- a/litellm/model_prices_and_context_window_backup.json +++ b/litellm/model_prices_and_context_window_backup.json @@ -1,6 +1,8 @@ { "gpt-4": { - "max_tokens": 8192, + "max_tokens": 8192, + "max_input_tokens": 8192, + "max_output_tokens": 4096, "input_cost_per_token": 0.00003, "output_cost_per_token": 0.00006, "litellm_provider": "openai", @@ -8,6 +10,8 @@ }, "gpt-4-0314": { "max_tokens": 8192, + "max_input_tokens": 8192, + "max_output_tokens": 4096, "input_cost_per_token": 0.00003, "output_cost_per_token": 0.00006, "litellm_provider": "openai", @@ -15,6 +19,8 @@ }, "gpt-4-0613": { "max_tokens": 8192, + "max_input_tokens": 8192, + "max_output_tokens": 4096, "input_cost_per_token": 0.00003, "output_cost_per_token": 0.00006, "litellm_provider": "openai", @@ -22,6 +28,8 @@ }, "gpt-4-32k": { "max_tokens": 32768, + "max_input_tokens": 32768, + "max_output_tokens": 4096, "input_cost_per_token": 0.00006, "output_cost_per_token": 0.00012, "litellm_provider": "openai", @@ -29,6 +37,8 @@ }, "gpt-4-32k-0314": { "max_tokens": 32768, + "max_input_tokens": 32768, + "max_output_tokens": 4096, "input_cost_per_token": 0.00006, "output_cost_per_token": 0.00012, "litellm_provider": "openai", @@ -36,6 +46,8 @@ }, "gpt-4-32k-0613": { "max_tokens": 32768, + "max_input_tokens": 32768, + "max_output_tokens": 4096, "input_cost_per_token": 0.00006, "output_cost_per_token": 0.00012, "litellm_provider": "openai", @@ -43,6 +55,17 @@ }, "gpt-4-1106-preview": { "max_tokens": 128000, + "max_input_tokens": 128000, + "max_output_tokens": 4096, + "input_cost_per_token": 0.00001, + "output_cost_per_token": 0.00003, + "litellm_provider": "openai", + "mode": "chat" + }, + "gpt-4-0125-preview": { + "max_tokens": 128000, + "max_input_tokens": 128000, + "max_output_tokens": 4096, "input_cost_per_token": 0.00001, "output_cost_per_token": 0.00003, "litellm_provider": "openai", @@ -50,6 +73,17 @@ }, "gpt-4-vision-preview": { "max_tokens": 128000, + "max_input_tokens": 128000, + "max_output_tokens": 4096, + "input_cost_per_token": 0.00001, + "output_cost_per_token": 0.00003, + "litellm_provider": "openai", + "mode": "chat" + }, + "gpt-4-1106-vision-preview": { + "max_tokens": 128000, + "max_input_tokens": 128000, + "max_output_tokens": 4096, "input_cost_per_token": 0.00001, "output_cost_per_token": 0.00003, "litellm_provider": "openai", @@ -57,6 +91,8 @@ }, "gpt-3.5-turbo": { "max_tokens": 4097, + "max_input_tokens": 4097, + "max_output_tokens": 4096, "input_cost_per_token": 0.0000015, "output_cost_per_token": 0.000002, "litellm_provider": "openai", @@ -64,6 +100,8 @@ }, "gpt-3.5-turbo-0301": { "max_tokens": 4097, + "max_input_tokens": 4097, + "max_output_tokens": 4096, "input_cost_per_token": 0.0000015, "output_cost_per_token": 0.000002, "litellm_provider": "openai", @@ -71,6 +109,8 @@ }, "gpt-3.5-turbo-0613": { "max_tokens": 4097, + "max_input_tokens": 4097, + "max_output_tokens": 4096, "input_cost_per_token": 0.0000015, "output_cost_per_token": 0.000002, "litellm_provider": "openai", @@ -78,13 +118,26 @@ }, "gpt-3.5-turbo-1106": { "max_tokens": 16385, + "max_input_tokens": 16385, + "max_output_tokens": 4096, "input_cost_per_token": 0.0000010, "output_cost_per_token": 0.0000020, "litellm_provider": "openai", "mode": "chat" }, + "gpt-3.5-turbo-0125": { + "max_tokens": 16385, + "max_input_tokens": 16385, + "max_output_tokens": 4096, + "input_cost_per_token": 0.0000005, + "output_cost_per_token": 0.0000015, + "litellm_provider": "openai", + "mode": "chat" + }, "gpt-3.5-turbo-16k": { "max_tokens": 16385, + "max_input_tokens": 16385, + "max_output_tokens": 4096, "input_cost_per_token": 0.000003, "output_cost_per_token": 0.000004, "litellm_provider": "openai", @@ -92,6 +145,8 @@ }, "gpt-3.5-turbo-16k-0613": { "max_tokens": 16385, + "max_input_tokens": 16385, + "max_output_tokens": 4096, "input_cost_per_token": 0.000003, "output_cost_per_token": 0.000004, "litellm_provider": "openai", @@ -99,11 +154,27 @@ }, "ft:gpt-3.5-turbo": { "max_tokens": 4097, + "max_input_tokens": 4097, + "max_output_tokens": 4096, "input_cost_per_token": 0.000012, "output_cost_per_token": 0.000016, "litellm_provider": "openai", "mode": "chat" }, + "text-embedding-3-large": { + "max_tokens": 8191, + "input_cost_per_token": 0.00000013, + "output_cost_per_token": 0.000000, + "litellm_provider": "openai", + "mode": "embedding" + }, + "text-embedding-3-small": { + "max_tokens": 8191, + "input_cost_per_token": 0.00000002, + "output_cost_per_token": 0.000000, + "litellm_provider": "openai", + "mode": "embedding" + }, "text-embedding-ada-002": { "max_tokens": 8191, "input_cost_per_token": 0.0000001, @@ -111,41 +182,173 @@ "litellm_provider": "openai", "mode": "embedding" }, + "text-embedding-ada-002-v2": { + "max_tokens": 8191, + "input_cost_per_token": 0.0000001, + "output_cost_per_token": 0.000000, + "litellm_provider": "openai", + "mode": "embedding" + }, + "256-x-256/dall-e-2": { + "mode": "image_generation", + "input_cost_per_pixel": 0.00000024414, + "output_cost_per_pixel": 0.0, + "litellm_provider": "openai" + }, + "512-x-512/dall-e-2": { + "mode": "image_generation", + "input_cost_per_pixel": 0.0000000686, + "output_cost_per_pixel": 0.0, + "litellm_provider": "openai" + }, + "1024-x-1024/dall-e-2": { + "mode": "image_generation", + "input_cost_per_pixel": 0.000000019, + "output_cost_per_pixel": 0.0, + "litellm_provider": "openai" + }, + "hd/1024-x-1792/dall-e-3": { + "mode": "image_generation", + "input_cost_per_pixel": 0.00000006539, + "output_cost_per_pixel": 0.0, + "litellm_provider": "openai" + }, + "hd/1792-x-1024/dall-e-3": { + "mode": "image_generation", + "input_cost_per_pixel": 0.00000006539, + "output_cost_per_pixel": 0.0, + "litellm_provider": "openai" + }, + "hd/1024-x-1024/dall-e-3": { + "mode": "image_generation", + "input_cost_per_pixel": 0.00000007629, + "output_cost_per_pixel": 0.0, + "litellm_provider": "openai" + }, + "standard/1024-x-1792/dall-e-3": { + "mode": "image_generation", + "input_cost_per_pixel": 0.00000004359, + "output_cost_per_pixel": 0.0, + "litellm_provider": "openai" + }, + "standard/1792-x-1024/dall-e-3": { + "mode": "image_generation", + "input_cost_per_pixel": 0.00000004359, + "output_cost_per_pixel": 0.0, + "litellm_provider": "openai" + }, + "standard/1024-x-1024/dall-e-3": { + "mode": "image_generation", + "input_cost_per_pixel": 0.0000000381469, + "output_cost_per_pixel": 0.0, + "litellm_provider": "openai" + }, "azure/gpt-4-1106-preview": { "max_tokens": 128000, + "max_input_tokens": 128000, + "max_output_tokens": 4096, "input_cost_per_token": 0.00001, "output_cost_per_token": 0.00003, "litellm_provider": "azure", "mode": "chat" }, - "azure/gpt-4-32k": { + "azure/gpt-4-0613": { "max_tokens": 8192, + "max_input_tokens": 8192, + "max_output_tokens": 4096, + "input_cost_per_token": 0.00003, + "output_cost_per_token": 0.00006, + "litellm_provider": "azure", + "mode": "chat" + }, + "azure/gpt-4-32k-0613": { + "max_tokens": 32768, + "max_input_tokens": 32768, + "max_output_tokens": 4096, + "input_cost_per_token": 0.00006, + "output_cost_per_token": 0.00012, + "litellm_provider": "azure", + "mode": "chat" + }, + "azure/gpt-4-32k": { + "max_tokens": 32768, + "max_input_tokens": 32768, + "max_output_tokens": 4096, "input_cost_per_token": 0.00006, "output_cost_per_token": 0.00012, "litellm_provider": "azure", "mode": "chat" }, "azure/gpt-4": { - "max_tokens": 16385, + "max_tokens": 8192, + "max_input_tokens": 8192, + "max_output_tokens": 4096, "input_cost_per_token": 0.00003, "output_cost_per_token": 0.00006, "litellm_provider": "azure", "mode": "chat" }, - "azure/gpt-3.5-turbo-16k": { + "azure/gpt-4-turbo": { + "max_tokens": 128000, + "max_input_tokens": 128000, + "max_output_tokens": 4096, + "input_cost_per_token": 0.00001, + "output_cost_per_token": 0.00003, + "litellm_provider": "azure", + "mode": "chat" + }, + "azure/gpt-4-turbo-vision-preview": { + "max_tokens": 128000, + "max_input_tokens": 128000, + "max_output_tokens": 4096, + "input_cost_per_token": 0.00001, + "output_cost_per_token": 0.00003, + "litellm_provider": "azure", + "mode": "chat" + }, + "azure/gpt-35-turbo-16k-0613": { "max_tokens": 16385, + "max_input_tokens": 16385, + "max_output_tokens": 4096, "input_cost_per_token": 0.000003, "output_cost_per_token": 0.000004, "litellm_provider": "azure", "mode": "chat" }, - "azure/gpt-3.5-turbo": { - "max_tokens": 4097, + "azure/gpt-35-turbo-1106": { + "max_tokens": 16384, + "max_input_tokens": 16384, + "max_output_tokens": 4096, "input_cost_per_token": 0.0000015, "output_cost_per_token": 0.000002, "litellm_provider": "azure", "mode": "chat" }, + "azure/gpt-35-turbo-16k": { + "max_tokens": 16385, + "max_input_tokens": 16385, + "max_output_tokens": 4096, + "input_cost_per_token": 0.000003, + "output_cost_per_token": 0.000004, + "litellm_provider": "azure", + "mode": "chat" + }, + "azure/gpt-35-turbo": { + "max_tokens": 4097, + "max_input_tokens": 4097, + "max_output_tokens": 4096, + "input_cost_per_token": 0.0000015, + "output_cost_per_token": 0.000002, + "litellm_provider": "azure", + "mode": "chat" + }, + "azure/ada": { + "max_tokens": 8191, + "input_cost_per_token": 0.0000001, + "output_cost_per_token": 0.000000, + "litellm_provider": "azure", + "mode": "embedding" + }, "azure/text-embedding-ada-002": { "max_tokens": 8191, "input_cost_per_token": 0.0000001, @@ -153,36 +356,52 @@ "litellm_provider": "azure", "mode": "embedding" }, - "text-davinci-003": { - "max_tokens": 4097, - "input_cost_per_token": 0.000002, - "output_cost_per_token": 0.000002, - "litellm_provider": "text-completion-openai", - "mode": "completion" + "azure/standard/1024-x-1024/dall-e-3": { + "input_cost_per_pixel": 0.0000000381469, + "output_cost_per_token": 0.0, + "litellm_provider": "azure", + "mode": "image_generation" }, - "text-curie-001": { - "max_tokens": 2049, - "input_cost_per_token": 0.000002, - "output_cost_per_token": 0.000002, - "litellm_provider": "text-completion-openai", - "mode": "completion" + "azure/hd/1024-x-1024/dall-e-3": { + "input_cost_per_pixel": 0.00000007629, + "output_cost_per_token": 0.0, + "litellm_provider": "azure", + "mode": "image_generation" }, - "text-babbage-001": { - "max_tokens": 2049, - "input_cost_per_token": 0.0000004, - "output_cost_per_token": 0.0000004, - "litellm_provider": "text-completion-openai", - "mode": "completion" + "azure/standard/1024-x-1792/dall-e-3": { + "input_cost_per_pixel": 0.00000004359, + "output_cost_per_token": 0.0, + "litellm_provider": "azure", + "mode": "image_generation" }, - "text-ada-001": { - "max_tokens": 2049, - "input_cost_per_token": 0.0000004, - "output_cost_per_token": 0.0000004, - "litellm_provider": "text-completion-openai", - "mode": "completion" + "azure/standard/1792-x-1024/dall-e-3": { + "input_cost_per_pixel": 0.00000004359, + "output_cost_per_token": 0.0, + "litellm_provider": "azure", + "mode": "image_generation" + }, + "azure/hd/1024-x-1792/dall-e-3": { + "input_cost_per_pixel": 0.00000006539, + "output_cost_per_token": 0.0, + "litellm_provider": "azure", + "mode": "image_generation" + }, + "azure/hd/1792-x-1024/dall-e-3": { + "input_cost_per_pixel": 0.00000006539, + "output_cost_per_token": 0.0, + "litellm_provider": "azure", + "mode": "image_generation" + }, + "azure/standard/1024-x-1024/dall-e-2": { + "input_cost_per_pixel": 0.0, + "output_cost_per_token": 0.0, + "litellm_provider": "azure", + "mode": "image_generation" }, "babbage-002": { "max_tokens": 16384, + "max_input_tokens": 16384, + "max_output_tokens": 4096, "input_cost_per_token": 0.0000004, "output_cost_per_token": 0.0000004, "litellm_provider": "text-completion-openai", @@ -190,6 +409,8 @@ }, "davinci-002": { "max_tokens": 16384, + "max_input_tokens": 16384, + "max_output_tokens": 4096, "input_cost_per_token": 0.000002, "output_cost_per_token": 0.000002, "litellm_provider": "text-completion-openai", @@ -197,6 +418,8 @@ }, "gpt-3.5-turbo-instruct": { "max_tokens": 8192, + "max_input_tokens": 8192, + "max_output_tokens": 4096, "input_cost_per_token": 0.0000015, "output_cost_per_token": 0.000002, "litellm_provider": "text-completion-openai", @@ -210,6 +433,33 @@ "litellm_provider": "anthropic", "mode": "chat" }, + "mistral/mistral-tiny": { + "max_tokens": 8192, + "input_cost_per_token": 0.00000015, + "output_cost_per_token": 0.00000046, + "litellm_provider": "mistral", + "mode": "chat" + }, + "mistral/mistral-small": { + "max_tokens": 8192, + "input_cost_per_token": 0.00000066, + "output_cost_per_token": 0.00000197, + "litellm_provider": "mistral", + "mode": "chat" + }, + "mistral/mistral-medium": { + "max_tokens": 8192, + "input_cost_per_token": 0.00000273, + "output_cost_per_token": 0.00000820, + "litellm_provider": "mistral", + "mode": "chat" + }, + "mistral/mistral-embed": { + "max_tokens": 8192, + "input_cost_per_token": 0.000000111, + "litellm_provider": "mistral", + "mode": "embedding" + }, "claude-instant-1.2": { "max_tokens": 100000, "max_output_tokens": 8191, @@ -248,6 +498,20 @@ "litellm_provider": "vertex_ai-text-models", "mode": "completion" }, + "text-unicorn": { + "max_tokens": 8192, + "input_cost_per_token": 0.00001, + "output_cost_per_token": 0.000028, + "litellm_provider": "vertex_ai-text-models", + "mode": "completion" + }, + "text-unicorn@001": { + "max_tokens": 8192, + "input_cost_per_token": 0.00001, + "output_cost_per_token": 0.000028, + "litellm_provider": "vertex_ai-text-models", + "mode": "completion" + }, "chat-bison": { "max_tokens": 4096, "input_cost_per_token": 0.000000125, @@ -262,6 +526,13 @@ "litellm_provider": "vertex_ai-chat-models", "mode": "chat" }, + "chat-bison@002": { + "max_tokens": 4096, + "input_cost_per_token": 0.000000125, + "output_cost_per_token": 0.000000125, + "litellm_provider": "vertex_ai-chat-models", + "mode": "chat" + }, "chat-bison-32k": { "max_tokens": 32000, "input_cost_per_token": 0.000000125, @@ -287,14 +558,21 @@ "max_tokens": 2048, "input_cost_per_token": 0.000000125, "output_cost_per_token": 0.000000125, - "litellm_provider": "vertex_ai-chat-models", + "litellm_provider": "vertex_ai-code-text-models", "mode": "completion" }, - "code-gecko@latest": { + "code-gecko@002": { "max_tokens": 2048, "input_cost_per_token": 0.000000125, "output_cost_per_token": 0.000000125, - "litellm_provider": "vertex_ai-chat-models", + "litellm_provider": "vertex_ai-code-text-models", + "mode": "completion" + }, + "code-gecko": { + "max_tokens": 2048, + "input_cost_per_token": 0.000000125, + "output_cost_per_token": 0.000000125, + "litellm_provider": "vertex_ai-code-text-models", "mode": "completion" }, "codechat-bison": { @@ -318,6 +596,67 @@ "litellm_provider": "vertex_ai-code-chat-models", "mode": "chat" }, + "gemini-pro": { + "max_tokens": 30720, + "max_output_tokens": 2048, + "input_cost_per_token": 0.00000025, + "output_cost_per_token": 0.0000005, + "litellm_provider": "vertex_ai-language-models", + "mode": "chat" + }, + "gemini-pro-vision": { + "max_tokens": 30720, + "max_output_tokens": 2048, + "input_cost_per_token": 0.00000025, + "output_cost_per_token": 0.0000005, + "litellm_provider": "vertex_ai-vision-models", + "mode": "chat" + }, + "textembedding-gecko": { + "max_tokens": 3072, + "max_input_tokens": 3072, + "output_vector_size": 768, + "input_cost_per_token": 0.00000000625, + "output_cost_per_token": 0, + "litellm_provider": "vertex_ai-embedding-models", + "mode": "embedding" + }, + "textembedding-gecko-multilingual": { + "max_tokens": 3072, + "max_input_tokens": 3072, + "output_vector_size": 768, + "input_cost_per_token": 0.00000000625, + "output_cost_per_token": 0, + "litellm_provider": "vertex_ai-embedding-models", + "mode": "embedding" + }, + "textembedding-gecko-multilingual@001": { + "max_tokens": 3072, + "max_input_tokens": 3072, + "output_vector_size": 768, + "input_cost_per_token": 0.00000000625, + "output_cost_per_token": 0, + "litellm_provider": "vertex_ai-embedding-models", + "mode": "embedding" + }, + "textembedding-gecko@001": { + "max_tokens": 3072, + "max_input_tokens": 3072, + "output_vector_size": 768, + "input_cost_per_token": 0.00000000625, + "output_cost_per_token": 0, + "litellm_provider": "vertex_ai-embedding-models", + "mode": "embedding" + }, + "textembedding-gecko@003": { + "max_tokens": 3072, + "max_input_tokens": 3072, + "output_vector_size": 768, + "input_cost_per_token": 0.00000000625, + "output_cost_per_token": 0, + "litellm_provider": "vertex_ai-embedding-models", + "mode": "embedding" + }, "palm/chat-bison": { "max_tokens": 4096, "input_cost_per_token": 0.000000125, @@ -360,6 +699,22 @@ "litellm_provider": "palm", "mode": "completion" }, + "gemini/gemini-pro": { + "max_tokens": 30720, + "max_output_tokens": 2048, + "input_cost_per_token": 0.0, + "output_cost_per_token": 0.0, + "litellm_provider": "gemini", + "mode": "chat" + }, + "gemini/gemini-pro-vision": { + "max_tokens": 30720, + "max_output_tokens": 2048, + "input_cost_per_token": 0.0, + "output_cost_per_token": 0.0, + "litellm_provider": "gemini", + "mode": "chat" + }, "command-nightly": { "max_tokens": 4096, "input_cost_per_token": 0.000015, @@ -628,6 +983,14 @@ "litellm_provider": "bedrock", "mode": "chat" }, + "amazon.titan-embed-text-v1": { + "max_tokens": 8192, + "output_vector_size": 1536, + "input_cost_per_token": 0.0000001, + "output_cost_per_token": 0.0, + "litellm_provider": "bedrock", + "mode": "embedding" + }, "anthropic.claude-v1": { "max_tokens": 100000, "max_output_tokens": 8191, @@ -636,6 +999,102 @@ "litellm_provider": "bedrock", "mode": "chat" }, + "bedrock/us-east-1/anthropic.claude-v1": { + "max_tokens": 100000, + "max_output_tokens": 8191, + "input_cost_per_token": 0.000008, + "output_cost_per_token": 0.000024, + "litellm_provider": "bedrock", + "mode": "chat" + }, + "bedrock/us-west-2/anthropic.claude-v1": { + "max_tokens": 100000, + "max_output_tokens": 8191, + "input_cost_per_token": 0.000008, + "output_cost_per_token": 0.000024, + "litellm_provider": "bedrock", + "mode": "chat" + }, + "bedrock/ap-northeast-1/anthropic.claude-v1": { + "max_tokens": 100000, + "max_output_tokens": 8191, + "input_cost_per_token": 0.000008, + "output_cost_per_token": 0.000024, + "litellm_provider": "bedrock", + "mode": "chat" + }, + "bedrock/ap-northeast-1/1-month-commitment/anthropic.claude-v1": { + "max_tokens": 100000, + "max_output_tokens": 8191, + "input_cost_per_second": 0.0455, + "output_cost_per_second": 0.0455, + "litellm_provider": "bedrock", + "mode": "chat" + }, + "bedrock/ap-northeast-1/6-month-commitment/anthropic.claude-v1": { + "max_tokens": 100000, + "max_output_tokens": 8191, + "input_cost_per_second": 0.02527, + "output_cost_per_second": 0.02527, + "litellm_provider": "bedrock", + "mode": "chat" + }, + "bedrock/eu-central-1/anthropic.claude-v1": { + "max_tokens": 100000, + "max_output_tokens": 8191, + "input_cost_per_token": 0.000008, + "output_cost_per_token": 0.000024, + "litellm_provider": "bedrock", + "mode": "chat" + }, + "bedrock/eu-central-1/1-month-commitment/anthropic.claude-v1": { + "max_tokens": 100000, + "max_output_tokens": 8191, + "input_cost_per_second": 0.0415, + "output_cost_per_second": 0.0415, + "litellm_provider": "bedrock", + "mode": "chat" + }, + "bedrock/eu-central-1/6-month-commitment/anthropic.claude-v1": { + "max_tokens": 100000, + "max_output_tokens": 8191, + "input_cost_per_second": 0.02305, + "output_cost_per_second": 0.02305, + "litellm_provider": "bedrock", + "mode": "chat" + }, + "bedrock/us-east-1/1-month-commitment/anthropic.claude-v1": { + "max_tokens": 100000, + "max_output_tokens": 8191, + "input_cost_per_second": 0.0175, + "output_cost_per_second": 0.0175, + "litellm_provider": "bedrock", + "mode": "chat" + }, + "bedrock/us-east-1/6-month-commitment/anthropic.claude-v1": { + "max_tokens": 100000, + "max_output_tokens": 8191, + "input_cost_per_second": 0.00972, + "output_cost_per_second": 0.00972, + "litellm_provider": "bedrock", + "mode": "chat" + }, + "bedrock/us-west-2/1-month-commitment/anthropic.claude-v1": { + "max_tokens": 100000, + "max_output_tokens": 8191, + "input_cost_per_second": 0.0175, + "output_cost_per_second": 0.0175, + "litellm_provider": "bedrock", + "mode": "chat" + }, + "bedrock/us-west-2/6-month-commitment/anthropic.claude-v1": { + "max_tokens": 100000, + "max_output_tokens": 8191, + "input_cost_per_second": 0.00972, + "output_cost_per_second": 0.00972, + "litellm_provider": "bedrock", + "mode": "chat" + }, "anthropic.claude-v2": { "max_tokens": 100000, "max_output_tokens": 8191, @@ -644,6 +1103,102 @@ "litellm_provider": "bedrock", "mode": "chat" }, + "bedrock/us-east-1/anthropic.claude-v2": { + "max_tokens": 100000, + "max_output_tokens": 8191, + "input_cost_per_token": 0.000008, + "output_cost_per_token": 0.000024, + "litellm_provider": "bedrock", + "mode": "chat" + }, + "bedrock/us-west-2/anthropic.claude-v2": { + "max_tokens": 100000, + "max_output_tokens": 8191, + "input_cost_per_token": 0.000008, + "output_cost_per_token": 0.000024, + "litellm_provider": "bedrock", + "mode": "chat" + }, + "bedrock/ap-northeast-1/anthropic.claude-v2": { + "max_tokens": 100000, + "max_output_tokens": 8191, + "input_cost_per_token": 0.000008, + "output_cost_per_token": 0.000024, + "litellm_provider": "bedrock", + "mode": "chat" + }, + "bedrock/ap-northeast-1/1-month-commitment/anthropic.claude-v2": { + "max_tokens": 100000, + "max_output_tokens": 8191, + "input_cost_per_second": 0.0455, + "output_cost_per_second": 0.0455, + "litellm_provider": "bedrock", + "mode": "chat" + }, + "bedrock/ap-northeast-1/6-month-commitment/anthropic.claude-v2": { + "max_tokens": 100000, + "max_output_tokens": 8191, + "input_cost_per_second": 0.02527, + "output_cost_per_second": 0.02527, + "litellm_provider": "bedrock", + "mode": "chat" + }, + "bedrock/eu-central-1/anthropic.claude-v2": { + "max_tokens": 100000, + "max_output_tokens": 8191, + "input_cost_per_token": 0.000008, + "output_cost_per_token": 0.000024, + "litellm_provider": "bedrock", + "mode": "chat" + }, + "bedrock/eu-central-1/1-month-commitment/anthropic.claude-v2": { + "max_tokens": 100000, + "max_output_tokens": 8191, + "input_cost_per_second": 0.0415, + "output_cost_per_second": 0.0415, + "litellm_provider": "bedrock", + "mode": "chat" + }, + "bedrock/eu-central-1/6-month-commitment/anthropic.claude-v2": { + "max_tokens": 100000, + "max_output_tokens": 8191, + "input_cost_per_second": 0.02305, + "output_cost_per_second": 0.02305, + "litellm_provider": "bedrock", + "mode": "chat" + }, + "bedrock/us-east-1/1-month-commitment/anthropic.claude-v2": { + "max_tokens": 100000, + "max_output_tokens": 8191, + "input_cost_per_second": 0.0175, + "output_cost_per_second": 0.0175, + "litellm_provider": "bedrock", + "mode": "chat" + }, + "bedrock/us-east-1/6-month-commitment/anthropic.claude-v2": { + "max_tokens": 100000, + "max_output_tokens": 8191, + "input_cost_per_second": 0.00972, + "output_cost_per_second": 0.00972, + "litellm_provider": "bedrock", + "mode": "chat" + }, + "bedrock/us-west-2/1-month-commitment/anthropic.claude-v2": { + "max_tokens": 100000, + "max_output_tokens": 8191, + "input_cost_per_second": 0.0175, + "output_cost_per_second": 0.0175, + "litellm_provider": "bedrock", + "mode": "chat" + }, + "bedrock/us-west-2/6-month-commitment/anthropic.claude-v2": { + "max_tokens": 100000, + "max_output_tokens": 8191, + "input_cost_per_second": 0.00972, + "output_cost_per_second": 0.00972, + "litellm_provider": "bedrock", + "mode": "chat" + }, "anthropic.claude-v2:1": { "max_tokens": 200000, "max_output_tokens": 8191, @@ -652,6 +1207,102 @@ "litellm_provider": "bedrock", "mode": "chat" }, + "bedrock/us-east-1/anthropic.claude-v2:1": { + "max_tokens": 100000, + "max_output_tokens": 8191, + "input_cost_per_token": 0.000008, + "output_cost_per_token": 0.000024, + "litellm_provider": "bedrock", + "mode": "chat" + }, + "bedrock/us-west-2/anthropic.claude-v2:1": { + "max_tokens": 100000, + "max_output_tokens": 8191, + "input_cost_per_token": 0.000008, + "output_cost_per_token": 0.000024, + "litellm_provider": "bedrock", + "mode": "chat" + }, + "bedrock/ap-northeast-1/anthropic.claude-v2:1": { + "max_tokens": 100000, + "max_output_tokens": 8191, + "input_cost_per_token": 0.000008, + "output_cost_per_token": 0.000024, + "litellm_provider": "bedrock", + "mode": "chat" + }, + "bedrock/ap-northeast-1/1-month-commitment/anthropic.claude-v2:1": { + "max_tokens": 100000, + "max_output_tokens": 8191, + "input_cost_per_second": 0.0455, + "output_cost_per_second": 0.0455, + "litellm_provider": "bedrock", + "mode": "chat" + }, + "bedrock/ap-northeast-1/6-month-commitment/anthropic.claude-v2:1": { + "max_tokens": 100000, + "max_output_tokens": 8191, + "input_cost_per_second": 0.02527, + "output_cost_per_second": 0.02527, + "litellm_provider": "bedrock", + "mode": "chat" + }, + "bedrock/eu-central-1/anthropic.claude-v2:1": { + "max_tokens": 100000, + "max_output_tokens": 8191, + "input_cost_per_token": 0.000008, + "output_cost_per_token": 0.000024, + "litellm_provider": "bedrock", + "mode": "chat" + }, + "bedrock/eu-central-1/1-month-commitment/anthropic.claude-v2:1": { + "max_tokens": 100000, + "max_output_tokens": 8191, + "input_cost_per_second": 0.0415, + "output_cost_per_second": 0.0415, + "litellm_provider": "bedrock", + "mode": "chat" + }, + "bedrock/eu-central-1/6-month-commitment/anthropic.claude-v2:1": { + "max_tokens": 100000, + "max_output_tokens": 8191, + "input_cost_per_second": 0.02305, + "output_cost_per_second": 0.02305, + "litellm_provider": "bedrock", + "mode": "chat" + }, + "bedrock/us-east-1/1-month-commitment/anthropic.claude-v2:1": { + "max_tokens": 100000, + "max_output_tokens": 8191, + "input_cost_per_second": 0.0175, + "output_cost_per_second": 0.0175, + "litellm_provider": "bedrock", + "mode": "chat" + }, + "bedrock/us-east-1/6-month-commitment/anthropic.claude-v2:1": { + "max_tokens": 100000, + "max_output_tokens": 8191, + "input_cost_per_second": 0.00972, + "output_cost_per_second": 0.00972, + "litellm_provider": "bedrock", + "mode": "chat" + }, + "bedrock/us-west-2/1-month-commitment/anthropic.claude-v2:1": { + "max_tokens": 100000, + "max_output_tokens": 8191, + "input_cost_per_second": 0.0175, + "output_cost_per_second": 0.0175, + "litellm_provider": "bedrock", + "mode": "chat" + }, + "bedrock/us-west-2/6-month-commitment/anthropic.claude-v2:1": { + "max_tokens": 100000, + "max_output_tokens": 8191, + "input_cost_per_second": 0.00972, + "output_cost_per_second": 0.00972, + "litellm_provider": "bedrock", + "mode": "chat" + }, "anthropic.claude-instant-v1": { "max_tokens": 100000, "max_output_tokens": 8191, @@ -660,6 +1311,102 @@ "litellm_provider": "bedrock", "mode": "chat" }, + "bedrock/us-east-1/anthropic.claude-instant-v1": { + "max_tokens": 100000, + "max_output_tokens": 8191, + "input_cost_per_token": 0.0000008, + "output_cost_per_token": 0.0000024, + "litellm_provider": "bedrock", + "mode": "chat" + }, + "bedrock/us-east-1/1-month-commitment/anthropic.claude-instant-v1": { + "max_tokens": 100000, + "max_output_tokens": 8191, + "input_cost_per_second": 0.011, + "output_cost_per_second": 0.011, + "litellm_provider": "bedrock", + "mode": "chat" + }, + "bedrock/us-east-1/6-month-commitment/anthropic.claude-instant-v1": { + "max_tokens": 100000, + "max_output_tokens": 8191, + "input_cost_per_second": 0.00611, + "output_cost_per_second": 0.00611, + "litellm_provider": "bedrock", + "mode": "chat" + }, + "bedrock/us-west-2/1-month-commitment/anthropic.claude-instant-v1": { + "max_tokens": 100000, + "max_output_tokens": 8191, + "input_cost_per_second": 0.011, + "output_cost_per_second": 0.011, + "litellm_provider": "bedrock", + "mode": "chat" + }, + "bedrock/us-west-2/6-month-commitment/anthropic.claude-instant-v1": { + "max_tokens": 100000, + "max_output_tokens": 8191, + "input_cost_per_second": 0.00611, + "output_cost_per_second": 0.00611, + "litellm_provider": "bedrock", + "mode": "chat" + }, + "bedrock/us-west-2/anthropic.claude-instant-v1": { + "max_tokens": 100000, + "max_output_tokens": 8191, + "input_cost_per_token": 0.0000008, + "output_cost_per_token": 0.0000024, + "litellm_provider": "bedrock", + "mode": "chat" + }, + "bedrock/ap-northeast-1/anthropic.claude-instant-v1": { + "max_tokens": 100000, + "max_output_tokens": 8191, + "input_cost_per_token": 0.00000223, + "output_cost_per_token": 0.00000755, + "litellm_provider": "bedrock", + "mode": "chat" + }, + "bedrock/ap-northeast-1/1-month-commitment/anthropic.claude-instant-v1": { + "max_tokens": 100000, + "max_output_tokens": 8191, + "input_cost_per_second": 0.01475, + "output_cost_per_second": 0.01475, + "litellm_provider": "bedrock", + "mode": "chat" + }, + "bedrock/ap-northeast-1/6-month-commitment/anthropic.claude-instant-v1": { + "max_tokens": 100000, + "max_output_tokens": 8191, + "input_cost_per_second": 0.008194, + "output_cost_per_second": 0.008194, + "litellm_provider": "bedrock", + "mode": "chat" + }, + "bedrock/eu-central-1/anthropic.claude-instant-v1": { + "max_tokens": 100000, + "max_output_tokens": 8191, + "input_cost_per_token": 0.00000248, + "output_cost_per_token": 0.00000838, + "litellm_provider": "bedrock", + "mode": "chat" + }, + "bedrock/eu-central-1/1-month-commitment/anthropic.claude-instant-v1": { + "max_tokens": 100000, + "max_output_tokens": 8191, + "input_cost_per_second": 0.01635, + "output_cost_per_second": 0.01635, + "litellm_provider": "bedrock", + "mode": "chat" + }, + "bedrock/eu-central-1/6-month-commitment/anthropic.claude-instant-v1": { + "max_tokens": 100000, + "max_output_tokens": 8191, + "input_cost_per_second": 0.009083, + "output_cost_per_second": 0.009083, + "litellm_provider": "bedrock", + "mode": "chat" + }, "cohere.command-text-v14": { "max_tokens": 4096, "input_cost_per_token": 0.0000015, @@ -667,6 +1414,55 @@ "litellm_provider": "bedrock", "mode": "chat" }, + "bedrock/*/1-month-commitment/cohere.command-text-v14": { + "max_tokens": 4096, + "input_cost_per_second": 0.011, + "output_cost_per_second": 0.011, + "litellm_provider": "bedrock", + "mode": "chat" + }, + "bedrock/*/6-month-commitment/cohere.command-text-v14": { + "max_tokens": 4096, + "input_cost_per_second": 0.0066027, + "output_cost_per_second": 0.0066027, + "litellm_provider": "bedrock", + "mode": "chat" + }, + "cohere.command-light-text-v14": { + "max_tokens": 4000, + "input_cost_per_token": 0.0000003, + "output_cost_per_token": 0.0000006, + "litellm_provider": "bedrock", + "mode": "chat" + }, + "bedrock/*/1-month-commitment/cohere.command-light-text-v14": { + "max_tokens": 4096, + "input_cost_per_second": 0.001902, + "output_cost_per_second": 0.001902, + "litellm_provider": "bedrock", + "mode": "chat" + }, + "bedrock/*/6-month-commitment/cohere.command-light-text-v14": { + "max_tokens": 4096, + "input_cost_per_second": 0.0011416, + "output_cost_per_second": 0.0011416, + "litellm_provider": "bedrock", + "mode": "chat" + }, + "cohere.embed-english-v3": { + "max_tokens": 512, + "input_cost_per_token": 0.0000001, + "output_cost_per_token": 0.000000, + "litellm_provider": "bedrock", + "mode": "embedding" + }, + "cohere.embed-multilingual-v3": { + "max_tokens": 512, + "input_cost_per_token": 0.0000001, + "output_cost_per_token": 0.000000, + "litellm_provider": "bedrock", + "mode": "embedding" + }, "meta.llama2-13b-chat-v1": { "max_tokens": 4096, "input_cost_per_token": 0.00000075, @@ -681,6 +1477,48 @@ "litellm_provider": "bedrock", "mode": "chat" }, + "512-x-512/50-steps/stability.stable-diffusion-xl-v0": { + "max_tokens": 77, + "max_input_tokens": 77, + "output_cost_per_image": 0.018, + "litellm_provider": "bedrock", + "mode": "image_generation" + }, + "512-x-512/max-steps/stability.stable-diffusion-xl-v0": { + "max_tokens": 77, + "max_input_tokens": 77, + "output_cost_per_image": 0.036, + "litellm_provider": "bedrock", + "mode": "image_generation" + }, + "max-x-max/50-steps/stability.stable-diffusion-xl-v0": { + "max_tokens": 77, + "max_input_tokens": 77, + "output_cost_per_image": 0.036, + "litellm_provider": "bedrock", + "mode": "image_generation" + }, + "max-x-max/max-steps/stability.stable-diffusion-xl-v0": { + "max_tokens": 77, + "max_input_tokens": 77, + "output_cost_per_image": 0.072, + "litellm_provider": "bedrock", + "mode": "image_generation" + }, + "1024-x-1024/50-steps/stability.stable-diffusion-xl-v1": { + "max_tokens": 77, + "max_input_tokens": 77, + "output_cost_per_image": 0.04, + "litellm_provider": "bedrock", + "mode": "image_generation" + }, + "1024-x-1024/max-steps/stability.stable-diffusion-xl-v1": { + "max_tokens": 77, + "max_input_tokens": 77, + "output_cost_per_image": 0.08, + "litellm_provider": "bedrock", + "mode": "image_generation" + }, "sagemaker/meta-textgeneration-llama-2-7b": { "max_tokens": 4096, "input_cost_per_token": 0.000, @@ -805,104 +1643,197 @@ "litellm_provider": "ollama", "mode": "completion" }, + "deepinfra/lizpreciatior/lzlv_70b_fp16_hf": { + "max_tokens": 4096, + "input_cost_per_token": 0.00000070, + "output_cost_per_token": 0.00000090, + "litellm_provider": "deepinfra", + "mode": "chat" + }, + "deepinfra/Gryphe/MythoMax-L2-13b": { + "max_tokens": 4096, + "input_cost_per_token": 0.00000022, + "output_cost_per_token": 0.00000022, + "litellm_provider": "deepinfra", + "mode": "chat" + }, + "deepinfra/mistralai/Mistral-7B-Instruct-v0.1": { + "max_tokens": 32768, + "input_cost_per_token": 0.00000013, + "output_cost_per_token": 0.00000013, + "litellm_provider": "deepinfra", + "mode": "chat" + }, "deepinfra/meta-llama/Llama-2-70b-chat-hf": { "max_tokens": 4096, - "input_cost_per_token": 0.000000700, - "output_cost_per_token": 0.000000950, + "input_cost_per_token": 0.00000070, + "output_cost_per_token": 0.00000090, + "litellm_provider": "deepinfra", + "mode": "chat" + }, + "deepinfra/cognitivecomputations/dolphin-2.6-mixtral-8x7b": { + "max_tokens": 32768, + "input_cost_per_token": 0.00000027, + "output_cost_per_token": 0.00000027, "litellm_provider": "deepinfra", "mode": "chat" }, "deepinfra/codellama/CodeLlama-34b-Instruct-hf": { "max_tokens": 4096, - "input_cost_per_token": 0.0000006, - "output_cost_per_token": 0.0000006, + "input_cost_per_token": 0.00000060, + "output_cost_per_token": 0.00000060, "litellm_provider": "deepinfra", "mode": "chat" - }, - "deepinfra/meta-llama/Llama-2-13b-chat-hf": { + }, + "deepinfra/deepinfra/mixtral": { "max_tokens": 4096, - "input_cost_per_token": 0.00000035, - "output_cost_per_token": 0.00000035, + "input_cost_per_token": 0.00000027, + "output_cost_per_token": 0.00000027, "litellm_provider": "deepinfra", - "mode": "chat" - }, - "deepinfra/meta-llama/Llama-2-7b-chat-hf": { + "mode": "completion" + }, + "deepinfra/Phind/Phind-CodeLlama-34B-v2": { "max_tokens": 4096, - "input_cost_per_token": 0.0000002, - "output_cost_per_token": 0.0000002, + "input_cost_per_token": 0.00000060, + "output_cost_per_token": 0.00000060, "litellm_provider": "deepinfra", "mode": "chat" - }, - "deepinfra/mistralai/Mistral-7B-Instruct-v0.1": { + }, + "deepinfra/mistralai/Mixtral-8x7B-Instruct-v0.1": { + "max_tokens": 32768, + "input_cost_per_token": 0.00000027, + "output_cost_per_token": 0.00000027, + "litellm_provider": "deepinfra", + "mode": "chat" + }, + "deepinfra/deepinfra/airoboros-70b": { "max_tokens": 4096, - "input_cost_per_token": 0.0000002, - "output_cost_per_token": 0.0000002, + "input_cost_per_token": 0.00000070, + "output_cost_per_token": 0.00000090, "litellm_provider": "deepinfra", "mode": "chat" - }, - "deepinfra/jondurbin/airoboros-l2-70b-gpt4-1.4.1": { + }, + "deepinfra/01-ai/Yi-34B-Chat": { "max_tokens": 4096, - "input_cost_per_token": 0.0000007, - "output_cost_per_token": 0.00000095, + "input_cost_per_token": 0.00000060, + "output_cost_per_token": 0.00000060, "litellm_provider": "deepinfra", "mode": "chat" - }, - "perplexity/pplx-7b-chat": { + }, + "deepinfra/01-ai/Yi-6B-200K": { + "max_tokens": 4096, + "input_cost_per_token": 0.00000013, + "output_cost_per_token": 0.00000013, + "litellm_provider": "deepinfra", + "mode": "completion" + }, + "deepinfra/jondurbin/airoboros-l2-70b-gpt4-1.4.1": { + "max_tokens": 4096, + "input_cost_per_token": 0.00000070, + "output_cost_per_token": 0.00000090, + "litellm_provider": "deepinfra", + "mode": "chat" + }, + "deepinfra/meta-llama/Llama-2-13b-chat-hf": { + "max_tokens": 4096, + "input_cost_per_token": 0.00000022, + "output_cost_per_token": 0.00000022, + "litellm_provider": "deepinfra", + "mode": "chat" + }, + "deepinfra/amazon/MistralLite": { + "max_tokens": 32768, + "input_cost_per_token": 0.00000020, + "output_cost_per_token": 0.00000020, + "litellm_provider": "deepinfra", + "mode": "chat" + }, + "deepinfra/meta-llama/Llama-2-7b-chat-hf": { + "max_tokens": 4096, + "input_cost_per_token": 0.00000013, + "output_cost_per_token": 0.00000013, + "litellm_provider": "deepinfra", + "mode": "chat" + }, + "deepinfra/01-ai/Yi-34B-200K": { + "max_tokens": 4096, + "input_cost_per_token": 0.00000060, + "output_cost_per_token": 0.00000060, + "litellm_provider": "deepinfra", + "mode": "completion" + }, + "deepinfra/openchat/openchat_3.5": { + "max_tokens": 4096, + "input_cost_per_token": 0.00000013, + "output_cost_per_token": 0.00000013, + "litellm_provider": "deepinfra", + "mode": "chat" + }, + "perplexity/codellama-34b-instruct": { + "max_tokens": 16384, + "input_cost_per_token": 0.00000035, + "output_cost_per_token": 0.00000140, + "litellm_provider": "perplexity", + "mode": "chat" + }, + "perplexity/codellama-70b-instruct": { + "max_tokens": 16384, + "input_cost_per_token": 0.00000070, + "output_cost_per_token": 0.00000280, + "litellm_provider": "perplexity", + "mode": "chat" + }, + "perplexity/pplx-7b-chat": { "max_tokens": 8192, - "input_cost_per_token": 0.0000000, - "output_cost_per_token": 0.000000, + "input_cost_per_token": 0.00000007, + "output_cost_per_token": 0.00000028, "litellm_provider": "perplexity", "mode": "chat" - }, - "perplexity/pplx-70b-chat": { + }, + "perplexity/pplx-70b-chat": { + "max_tokens": 4096, + "input_cost_per_token": 0.00000070, + "output_cost_per_token": 0.00000280, + "litellm_provider": "perplexity", + "mode": "chat" + }, + "perplexity/pplx-7b-online": { "max_tokens": 4096, "input_cost_per_token": 0.0000000, - "output_cost_per_token": 0.000000, + "output_cost_per_token": 0.00000028, + "input_cost_per_request": 0.005, "litellm_provider": "perplexity", "mode": "chat" - }, - "perplexity/pplx-7b-online": { + }, + "perplexity/pplx-70b-online": { "max_tokens": 4096, "input_cost_per_token": 0.0000000, - "output_cost_per_token": 0.0005, + "output_cost_per_token": 0.00000280, + "input_cost_per_request": 0.005, "litellm_provider": "perplexity", "mode": "chat" - }, - "perplexity/pplx-70b-online": { + }, + "perplexity/llama-2-70b-chat": { "max_tokens": 4096, - "input_cost_per_token": 0.0000000, - "output_cost_per_token": 0.0005, + "input_cost_per_token": 0.00000070, + "output_cost_per_token": 0.00000280, "litellm_provider": "perplexity", "mode": "chat" - }, - "perplexity/llama-2-13b-chat": { + }, + "perplexity/mistral-7b-instruct": { "max_tokens": 4096, - "input_cost_per_token": 0.0000000, - "output_cost_per_token": 0.000000, + "input_cost_per_token": 0.00000007, + "output_cost_per_token": 0.00000028, "litellm_provider": "perplexity", "mode": "chat" - }, - "perplexity/llama-2-70b-chat": { - "max_tokens": 4096, - "input_cost_per_token": 0.0000000, - "output_cost_per_token": 0.000000, - "litellm_provider": "perplexity", - "mode": "chat" - }, - "perplexity/mistral-7b-instruct": { - "max_tokens": 4096, - "input_cost_per_token": 0.0000000, - "output_cost_per_token": 0.000000, - "litellm_provider": "perplexity", - "mode": "chat" - }, - "perplexity/replit-code-v1.5-3b": { - "max_tokens": 4096, - "input_cost_per_token": 0.0000000, - "output_cost_per_token": 0.000000, - "litellm_provider": "perplexity", - "mode": "chat" - }, + }, + "perplexity/mixtral-8x7b-instruct": { + "max_tokens": 4096, + "input_cost_per_token": 0.00000007, + "output_cost_per_token": 0.00000028, + "litellm_provider": "perplexity", + "mode": "chat" + }, "anyscale/mistralai/Mistral-7B-Instruct-v0.1": { "max_tokens": 16384, "input_cost_per_token": 0.00000015, @@ -944,5 +1875,48 @@ "output_cost_per_token": 0.000001, "litellm_provider": "anyscale", "mode": "chat" - } + }, + "cloudflare/@cf/meta/llama-2-7b-chat-fp16": { + "max_tokens": 3072, + "input_cost_per_token": 0.000001923, + "output_cost_per_token": 0.000001923, + "litellm_provider": "cloudflare", + "mode": "chat" + }, + "cloudflare/@cf/meta/llama-2-7b-chat-int8": { + "max_tokens": 2048, + "input_cost_per_token": 0.000001923, + "output_cost_per_token": 0.000001923, + "litellm_provider": "cloudflare", + "mode": "chat" + }, + "cloudflare/@cf/mistral/mistral-7b-instruct-v0.1": { + "max_tokens": 8192, + "input_cost_per_token": 0.000001923, + "output_cost_per_token": 0.000001923, + "litellm_provider": "cloudflare", + "mode": "chat" + }, + "cloudflare/@hf/thebloke/codellama-7b-instruct-awq": { + "max_tokens": 4096, + "input_cost_per_token": 0.000001923, + "output_cost_per_token": 0.000001923, + "litellm_provider": "cloudflare", + "mode": "chat" + }, + "voyage/voyage-01": { + "max_tokens": 4096, + "input_cost_per_token": 0.0000001, + "output_cost_per_token": 0.000000, + "litellm_provider": "voyage", + "mode": "embedding" + }, + "voyage/voyage-lite-01": { + "max_tokens": 4096, + "input_cost_per_token": 0.0000001, + "output_cost_per_token": 0.000000, + "litellm_provider": "voyage", + "mode": "embedding" + } + } From a6836a0996396853e142998df2dc210debacf687 Mon Sep 17 00:00:00 2001 From: ishaan-jaff Date: Mon, 5 Feb 2024 14:42:10 -0800 Subject: [PATCH 04/33] (feat) pre-commit hook to validate --- .pre-commit-config.yaml | 9 ++++++++- ci_cd/check_files_match.py | 25 +++++++++++++++++++++++++ model_prices_and_context_window.json | 2 +- 3 files changed, 34 insertions(+), 2 deletions(-) create mode 100644 ci_cd/check_files_match.py diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 8ab4e3e92..95f19f222 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -17,4 +17,11 @@ repos: entry: python3 -m mypy --ignore-missing-imports language: system types: [python] - files: ^litellm/ \ No newline at end of file + files: ^litellm/ +- repo: local + hooks: + - id: check-files-match + name: Check if files match + entry: python3 ci_cd/check_files_match.py + language: system + files: model_prices_and_context_window.json model_prices_and_context_window_backup.json \ No newline at end of file diff --git a/ci_cd/check_files_match.py b/ci_cd/check_files_match.py new file mode 100644 index 000000000..0b5a0fab1 --- /dev/null +++ b/ci_cd/check_files_match.py @@ -0,0 +1,25 @@ +import sys +import filecmp + + +def main(argv=None): + if argv is None: + argv = sys.argv[1:] + + if len(argv) != 2: + print("Usage: python check_files_match.py ") + return 1 + + file1 = argv[0] + file2 = argv[1] + + if filecmp.cmp(file1, file2, shallow=False): + print(f"Files {file1} and {file2} match.") + return 0 + else: + print(f"Files {file1} and {file2} do not match.") + return 1 + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/model_prices_and_context_window.json b/model_prices_and_context_window.json index b6ded001c..6087eccd9 100644 --- a/model_prices_and_context_window.json +++ b/model_prices_and_context_window.json @@ -5,7 +5,7 @@ "max_output_tokens": 4096, "input_cost_per_token": 0.00003, "output_cost_per_token": 0.00006, - "litellm_provider": "openai", + "litellm_provider": "o", "mode": "chat" }, "gpt-4-0314": { From 8b571159fcd2bd25c2f9a7e75b3ab58fa20dcd24 Mon Sep 17 00:00:00 2001 From: ishaan-jaff Date: Mon, 5 Feb 2024 15:00:13 -0800 Subject: [PATCH 05/33] (feat) add pre-commit hook to check model_prices_and_context_window.json litellm/model_prices_and_context_window_backup.json --- .pre-commit-config.yaml | 15 +++++------ ci_cd/check_files_match.py | 38 +++++++++++++++++++--------- model_prices_and_context_window.json | 2 +- 3 files changed, 34 insertions(+), 21 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 95f19f222..8978e0d1a 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -10,6 +10,12 @@ repos: exclude: ^litellm/tests/|^litellm/proxy/proxy_cli.py|^litellm/integrations/|^litellm/proxy/tests/ additional_dependencies: [flake8-print] files: litellm/.*\.py +- repo: local + hooks: + - id: check-files-match + name: Check if files match + entry: python3 ci_cd/check_files_match.py + language: system - repo: local hooks: - id: mypy @@ -17,11 +23,4 @@ repos: entry: python3 -m mypy --ignore-missing-imports language: system types: [python] - files: ^litellm/ -- repo: local - hooks: - - id: check-files-match - name: Check if files match - entry: python3 ci_cd/check_files_match.py - language: system - files: model_prices_and_context_window.json model_prices_and_context_window_backup.json \ No newline at end of file + files: ^litellm/ \ No newline at end of file diff --git a/ci_cd/check_files_match.py b/ci_cd/check_files_match.py index 0b5a0fab1..a9f081ce8 100644 --- a/ci_cd/check_files_match.py +++ b/ci_cd/check_files_match.py @@ -1,23 +1,37 @@ import sys import filecmp +import difflib + + +def show_diff(file1, file2): + with open(file1, "r") as f1, open(file2, "r") as f2: + lines1 = f1.readlines() + lines2 = f2.readlines() + + diff = difflib.unified_diff(lines1, lines2, lineterm="") + + for line in diff: + print(line) def main(argv=None): - if argv is None: - argv = sys.argv[1:] + print( + "comparing model_prices_and_context_window, and litellm/model_prices_and_context_window_backup.json files.......... checking they match", + argv, + ) - if len(argv) != 2: - print("Usage: python check_files_match.py ") - return 1 - - file1 = argv[0] - file2 = argv[1] - - if filecmp.cmp(file1, file2, shallow=False): - print(f"Files {file1} and {file2} match.") + file1 = "model_prices_and_context_window.json" + file2 = "litellm/model_prices_and_context_window_backup.json" + cmp_result = filecmp.cmp(file1, file2, shallow=False) + if cmp_result: + print(f"Passed ! Files {file1} and {file2} match.") return 0 else: - print(f"Files {file1} and {file2} do not match.") + # show the diff + print(f"Failed ! Files {file1} and {file2} do not match.") + print("\nDiff") + show_diff(file1, file2) + return 1 diff --git a/model_prices_and_context_window.json b/model_prices_and_context_window.json index 6087eccd9..b6ded001c 100644 --- a/model_prices_and_context_window.json +++ b/model_prices_and_context_window.json @@ -5,7 +5,7 @@ "max_output_tokens": 4096, "input_cost_per_token": 0.00003, "output_cost_per_token": 0.00006, - "litellm_provider": "o", + "litellm_provider": "openai", "mode": "chat" }, "gpt-4-0314": { From 70f36073dc0f50768d9bfdd71b5a35f163588c6f Mon Sep 17 00:00:00 2001 From: ishaan-jaff Date: Mon, 5 Feb 2024 15:03:04 -0800 Subject: [PATCH 06/33] (fix) pre commit hook to sync backup context_window mapping --- ci_cd/check_files_match.py | 33 ++++++++----------- ...odel_prices_and_context_window_backup.json | 2 +- model_prices_and_context_window.json | 2 +- 3 files changed, 15 insertions(+), 22 deletions(-) diff --git a/ci_cd/check_files_match.py b/ci_cd/check_files_match.py index a9f081ce8..18b6cf792 100644 --- a/ci_cd/check_files_match.py +++ b/ci_cd/check_files_match.py @@ -1,39 +1,32 @@ import sys import filecmp -import difflib - - -def show_diff(file1, file2): - with open(file1, "r") as f1, open(file2, "r") as f2: - lines1 = f1.readlines() - lines2 = f2.readlines() - - diff = difflib.unified_diff(lines1, lines2, lineterm="") - - for line in diff: - print(line) +import shutil def main(argv=None): print( - "comparing model_prices_and_context_window, and litellm/model_prices_and_context_window_backup.json files.......... checking they match", - argv, + "Comparing model_prices_and_context_window and litellm/model_prices_and_context_window_backup.json files... checking if they match." ) file1 = "model_prices_and_context_window.json" file2 = "litellm/model_prices_and_context_window_backup.json" + cmp_result = filecmp.cmp(file1, file2, shallow=False) + if cmp_result: - print(f"Passed ! Files {file1} and {file2} match.") + print(f"Passed! Files {file1} and {file2} match.") return 0 else: - # show the diff - print(f"Failed ! Files {file1} and {file2} do not match.") - print("\nDiff") - show_diff(file1, file2) - + print( + f"Failed! Files {file1} and {file2} do not match. Copying content from {file1} to {file2}." + ) + copy_content(file1, file2) return 1 +def copy_content(source, destination): + shutil.copy2(source, destination) + + if __name__ == "__main__": sys.exit(main()) diff --git a/litellm/model_prices_and_context_window_backup.json b/litellm/model_prices_and_context_window_backup.json index b6ded001c..3d94bd915 100644 --- a/litellm/model_prices_and_context_window_backup.json +++ b/litellm/model_prices_and_context_window_backup.json @@ -5,7 +5,7 @@ "max_output_tokens": 4096, "input_cost_per_token": 0.00003, "output_cost_per_token": 0.00006, - "litellm_provider": "openai", + "litellm_provider": "opeai", "mode": "chat" }, "gpt-4-0314": { diff --git a/model_prices_and_context_window.json b/model_prices_and_context_window.json index b6ded001c..3d94bd915 100644 --- a/model_prices_and_context_window.json +++ b/model_prices_and_context_window.json @@ -5,7 +5,7 @@ "max_output_tokens": 4096, "input_cost_per_token": 0.00003, "output_cost_per_token": 0.00006, - "litellm_provider": "openai", + "litellm_provider": "opeai", "mode": "chat" }, "gpt-4-0314": { From 7557a2535af37b0a5a50038a236c213cd03deaf6 Mon Sep 17 00:00:00 2001 From: ishaan-jaff Date: Mon, 5 Feb 2024 15:04:39 -0800 Subject: [PATCH 07/33] (fix) model_prices --- litellm/model_prices_and_context_window_backup.json | 2 +- model_prices_and_context_window.json | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/litellm/model_prices_and_context_window_backup.json b/litellm/model_prices_and_context_window_backup.json index 3d94bd915..b6ded001c 100644 --- a/litellm/model_prices_and_context_window_backup.json +++ b/litellm/model_prices_and_context_window_backup.json @@ -5,7 +5,7 @@ "max_output_tokens": 4096, "input_cost_per_token": 0.00003, "output_cost_per_token": 0.00006, - "litellm_provider": "opeai", + "litellm_provider": "openai", "mode": "chat" }, "gpt-4-0314": { diff --git a/model_prices_and_context_window.json b/model_prices_and_context_window.json index 3d94bd915..b6ded001c 100644 --- a/model_prices_and_context_window.json +++ b/model_prices_and_context_window.json @@ -5,7 +5,7 @@ "max_output_tokens": 4096, "input_cost_per_token": 0.00003, "output_cost_per_token": 0.00006, - "litellm_provider": "opeai", + "litellm_provider": "openai", "mode": "chat" }, "gpt-4-0314": { From 006b5efef0871b9e786d5141a1c69378a3408de8 Mon Sep 17 00:00:00 2001 From: ishaan-jaff Date: Mon, 5 Feb 2024 15:10:59 -0800 Subject: [PATCH 08/33] =?UTF-8?q?bump:=20version=201.22.4=20=E2=86=92=201.?= =?UTF-8?q?22.5?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- pyproject.toml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 256624417..381ae931e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "litellm" -version = "1.22.4" +version = "1.22.5" description = "Library to easily interface with LLM API providers" authors = ["BerriAI"] license = "MIT" @@ -69,7 +69,7 @@ requires = ["poetry-core", "wheel"] build-backend = "poetry.core.masonry.api" [tool.commitizen] -version = "1.22.4" +version = "1.22.5" version_files = [ "pyproject.toml:^version" ] From 77fe71ee08515bd391a17737449df314a0926a18 Mon Sep 17 00:00:00 2001 From: Krrish Dholakia Date: Mon, 5 Feb 2024 15:30:30 -0800 Subject: [PATCH 09/33] fix(utils.py): support together ai function calling --- .gitignore | 1 + litellm/main.py | 1 + litellm/proxy/proxy_server.py | 10 ++++++---- litellm/utils.py | 13 ++++++++----- 4 files changed, 16 insertions(+), 9 deletions(-) diff --git a/.gitignore b/.gitignore index 730898a5b..00cd35c5b 100644 --- a/.gitignore +++ b/.gitignore @@ -43,3 +43,4 @@ ui/litellm-dashboard/package-lock.json deploy/charts/litellm-helm/*.tgz deploy/charts/litellm-helm/charts/* deploy/charts/*.tgz +litellm/proxy/vertex_key.json diff --git a/litellm/main.py b/litellm/main.py index bc33a69e5..66af60576 100644 --- a/litellm/main.py +++ b/litellm/main.py @@ -263,6 +263,7 @@ async def acompletion( or custom_llm_provider == "ollama" or custom_llm_provider == "ollama_chat" or custom_llm_provider == "vertex_ai" + or custom_llm_provider in litellm.openai_compatible_providers ): # currently implemented aiohttp calls for just azure, openai, hf, ollama, vertex ai soon all. init_response = await loop.run_in_executor(None, func_with_context) if isinstance(init_response, dict) or isinstance( diff --git a/litellm/proxy/proxy_server.py b/litellm/proxy/proxy_server.py index 0501ec746..7461b59b0 100644 --- a/litellm/proxy/proxy_server.py +++ b/litellm/proxy/proxy_server.py @@ -758,9 +758,10 @@ async def _PROXY_track_cost_callback( verbose_proxy_logger.info( f"response_cost {response_cost}, for user_id {user_id}" ) - if user_api_key and ( - prisma_client is not None or custom_db_client is not None - ): + verbose_proxy_logger.debug( + f"user_api_key {user_api_key}, prisma_client: {prisma_client}, custom_db_client: {custom_db_client}" + ) + if user_api_key is not None: await update_database( token=user_api_key, response_cost=response_cost, @@ -770,6 +771,8 @@ async def _PROXY_track_cost_callback( start_time=start_time, end_time=end_time, ) + else: + raise Exception("User API key missing from custom callback.") else: if kwargs["stream"] != True or ( kwargs["stream"] == True @@ -4067,7 +4070,6 @@ def _has_user_setup_sso(): async def shutdown_event(): global prisma_client, master_key, user_custom_auth, user_custom_key_generate if prisma_client: - verbose_proxy_logger.debug("Disconnecting from Prisma") await prisma_client.disconnect() diff --git a/litellm/utils.py b/litellm/utils.py index 8df027b87..e56ba879f 100644 --- a/litellm/utils.py +++ b/litellm/utils.py @@ -3852,6 +3852,8 @@ def get_optional_params( and custom_llm_provider != "text-completion-openai" and custom_llm_provider != "azure" and custom_llm_provider != "vertex_ai" + and custom_llm_provider != "anyscale" + and custom_llm_provider != "together_ai" ): if custom_llm_provider == "ollama" or custom_llm_provider == "ollama_chat": # ollama actually supports json output @@ -3870,11 +3872,6 @@ def get_optional_params( optional_params[ "functions_unsupported_model" ] = non_default_params.pop("functions") - elif ( - custom_llm_provider == "anyscale" - and model == "mistralai/Mistral-7B-Instruct-v0.1" - ): # anyscale just supports function calling with mistral - pass elif ( litellm.add_function_to_prompt ): # if user opts to add it to prompt instead @@ -4087,6 +4084,8 @@ def get_optional_params( "top_p", "stop", "frequency_penalty", + "tools", + "tool_choice", ] _check_valid_arg(supported_params=supported_params) @@ -4104,6 +4103,10 @@ def get_optional_params( ] = frequency_penalty # https://docs.together.ai/reference/inference if stop is not None: optional_params["stop"] = stop + if tools is not None: + optional_params["tools"] = tools + if tool_choice is not None: + optional_params["tool_choice"] = tool_choice elif custom_llm_provider == "ai21": ## check if unsupported param passed in supported_params = [ From 8d7698f24d537060a1f632cd8387cb2c2c9d8d51 Mon Sep 17 00:00:00 2001 From: ishaan-jaff Date: Mon, 5 Feb 2024 16:10:49 -0800 Subject: [PATCH 10/33] (fix) litellm-ui keys can never access /chat/completions --- litellm/proxy/proxy_server.py | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/litellm/proxy/proxy_server.py b/litellm/proxy/proxy_server.py index 7461b59b0..781c676c8 100644 --- a/litellm/proxy/proxy_server.py +++ b/litellm/proxy/proxy_server.py @@ -636,6 +636,31 @@ async def user_api_key_auth( raise Exception( f"Only master key can be used to generate, delete, update or get info for new keys/users. Value of allow_user_auth={allow_user_auth}" ) + + # check if token is from litellm-ui, litellm ui makes keys to allow users to login with sso. These keys can only be used for LiteLLM UI functions + # sso/login, ui/login, /key functions and /user functions + # this will never be allowed to call /chat/completions + token_team = getattr(valid_token, "team_id", None) + if token_team is not None: + if token_team == "litellm-dashboard": + # this token is only used for managing the ui + allowed_routes = [ + "/sso", + "/login", + "/key", + "/spend", + "/user", + ] + # check if the current route startswith any of the allowed routes + if any( + route.startswith(allowed_route) for allowed_route in allowed_routes + ): + # Do something if the current route starts with any of the allowed routes + pass + else: + raise Exception( + f"This key is made for LiteLLM UI, Tried to access route: {route}. Not allowed" + ) return UserAPIKeyAuth(api_key=api_key, **valid_token_dict) else: raise Exception(f"Invalid Key Passed to LiteLLM Proxy") From 2b588a8786f7ab22938bdedbb231b6d5438108e0 Mon Sep 17 00:00:00 2001 From: ishaan-jaff Date: Mon, 5 Feb 2024 16:11:33 -0800 Subject: [PATCH 11/33] (test) litellm-dashboard never allowed to /chat/completions --- litellm/tests/test_key_generate_prisma.py | 33 +++++++++++++++++++++++ 1 file changed, 33 insertions(+) diff --git a/litellm/tests/test_key_generate_prisma.py b/litellm/tests/test_key_generate_prisma.py index 734a0b114..de2616859 100644 --- a/litellm/tests/test_key_generate_prisma.py +++ b/litellm/tests/test_key_generate_prisma.py @@ -44,6 +44,7 @@ from litellm.proxy.proxy_server import ( info_key_fn, update_key_fn, generate_key_fn, + generate_key_helper_fn, spend_user_fn, spend_key_fn, view_spend_logs, @@ -1378,3 +1379,35 @@ async def test_user_api_key_auth_without_master_key(prisma_client): except Exception as e: print("Got Exception", e) pytest.fail(f"Got exception {e}") + + +@pytest.mark.asyncio +async def test_key_with_no_permissions(prisma_client): + """ + - create key + - get key info + - assert key_name is null + """ + setattr(litellm.proxy.proxy_server, "prisma_client", prisma_client) + setattr(litellm.proxy.proxy_server, "master_key", "sk-1234") + setattr(litellm.proxy.proxy_server, "general_settings", {"allow_user_auth": False}) + await litellm.proxy.proxy_server.prisma_client.connect() + try: + response = await generate_key_helper_fn( + **{"duration": "1hr", "key_max_budget": 0, "models": [], "aliases": {}, "config": {}, "spend": 0, "user_id": "ishaan", "team_id": "litellm-dashboard"} # type: ignore + ) + + print(response) + key = response["token"] + + # make a /chat/completions call -> it should fail + request = Request(scope={"type": "http"}) + request._url = URL(url="/chat/completions") + + # use generated key to auth in + result = await user_api_key_auth(request=request, api_key="Bearer " + key) + print("result from user auth with new key", result) + pytest.fail(f"This should have failed!. IT's an invalid key") + except Exception as e: + print("Got Exception", e) + print(e.message) From a1bbb16ab2afddcf68f9c793a5962fe374ba3165 Mon Sep 17 00:00:00 2001 From: Krrish Dholakia Date: Mon, 5 Feb 2024 16:16:15 -0800 Subject: [PATCH 12/33] fix(langfuse.py): support logging failed llm api calls to langfuse --- litellm/integrations/langfuse.py | 198 +++++++++++++++++++------------ litellm/utils.py | 58 ++++----- 2 files changed, 151 insertions(+), 105 deletions(-) diff --git a/litellm/integrations/langfuse.py b/litellm/integrations/langfuse.py index e62dccdc4..82de33366 100644 --- a/litellm/integrations/langfuse.py +++ b/litellm/integrations/langfuse.py @@ -55,8 +55,21 @@ class LangFuseLogger: else: self.upstream_langfuse = None + # def log_error(kwargs, response_obj, start_time, end_time): + # generation = trace.generation( + # level ="ERROR" # can be any of DEBUG, DEFAULT, WARNING or ERROR + # status_message='error' # can be any string (e.g. stringified stack trace or error body) + # ) def log_event( - self, kwargs, response_obj, start_time, end_time, user_id, print_verbose + self, + kwargs, + response_obj, + start_time, + end_time, + user_id, + print_verbose, + level="DEFAULT", + status_message=None, ): # Method definition @@ -84,37 +97,49 @@ class LangFuseLogger: pass # end of processing langfuse ######################## - if kwargs.get("call_type", None) == "embedding" or isinstance( - response_obj, litellm.EmbeddingResponse + if ( + level == "ERROR" + and status_message is not None + and isinstance(status_message, str) + ): + input = prompt + output = status_message + elif response_obj is not None and ( + kwargs.get("call_type", None) == "embedding" + or isinstance(response_obj, litellm.EmbeddingResponse) ): input = prompt output = response_obj["data"] - else: + elif response_obj is not None: input = prompt output = response_obj["choices"][0]["message"].json() - print_verbose(f"OUTPUT IN LANGFUSE: {output}; original: {response_obj}") - self._log_langfuse_v2( - user_id, - metadata, - output, - start_time, - end_time, - kwargs, - optional_params, - input, - response_obj, - print_verbose, - ) if self._is_langfuse_v2() else self._log_langfuse_v1( - user_id, - metadata, - output, - start_time, - end_time, - kwargs, - optional_params, - input, - response_obj, - ) + print(f"OUTPUT IN LANGFUSE: {output}; original: {response_obj}") + if self._is_langfuse_v2(): + self._log_langfuse_v2( + user_id, + metadata, + output, + start_time, + end_time, + kwargs, + optional_params, + input, + response_obj, + level, + print_verbose, + ) + elif response_obj is not None: + self._log_langfuse_v1( + user_id, + metadata, + output, + start_time, + end_time, + kwargs, + optional_params, + input, + response_obj, + ) self.Langfuse.flush() print_verbose( @@ -123,15 +148,15 @@ class LangFuseLogger: verbose_logger.info(f"Langfuse Layer Logging - logging success") except: traceback.print_exc() - print_verbose(f"Langfuse Layer Error - {traceback.format_exc()}") + print(f"Langfuse Layer Error - {traceback.format_exc()}") pass async def _async_log_event( self, kwargs, response_obj, start_time, end_time, user_id, print_verbose ): - self.log_event( - kwargs, response_obj, start_time, end_time, user_id, print_verbose - ) + """ + TODO: support async calls when langfuse is truly async + """ def _is_langfuse_v2(self): import langfuse @@ -193,57 +218,78 @@ class LangFuseLogger: optional_params, input, response_obj, + level, print_verbose, ): import langfuse - tags = [] - supports_tags = Version(langfuse.version.__version__) >= Version("2.6.3") - supports_costs = Version(langfuse.version.__version__) >= Version("2.7.3") + try: + tags = [] + supports_tags = Version(langfuse.version.__version__) >= Version("2.6.3") + supports_costs = Version(langfuse.version.__version__) >= Version("2.7.3") - print_verbose(f"Langfuse Layer Logging - logging to langfuse v2 ") + print_verbose(f"Langfuse Layer Logging - logging to langfuse v2 ") - generation_name = metadata.get("generation_name", None) - if generation_name is None: - # just log `litellm-{call_type}` as the generation name - generation_name = f"litellm-{kwargs.get('call_type', 'completion')}" + generation_name = metadata.get("generation_name", None) + if generation_name is None: + # just log `litellm-{call_type}` as the generation name + generation_name = f"litellm-{kwargs.get('call_type', 'completion')}" - trace_params = { - "name": generation_name, - "input": input, - "output": output, - "user_id": metadata.get("trace_user_id", user_id), - "id": metadata.get("trace_id", None), - "session_id": metadata.get("session_id", None), - } - cost = kwargs["response_cost"] - print_verbose(f"trace: {cost}") - if supports_tags: - for key, value in metadata.items(): - tags.append(f"{key}:{value}") - if "cache_hit" in kwargs: - tags.append(f"cache_hit:{kwargs['cache_hit']}") - trace_params.update({"tags": tags}) + trace_params = { + "name": generation_name, + "input": input, + "user_id": metadata.get("trace_user_id", user_id), + "id": metadata.get("trace_id", None), + "session_id": metadata.get("session_id", None), + } - trace = self.Langfuse.trace(**trace_params) + if level == "ERROR": + trace_params["status_message"] = output + else: + trace_params["output"] = output - # get generation_id - generation_id = None - if response_obj.get("id", None) is not None: - generation_id = litellm.utils.get_logging_id(start_time, response_obj) - trace.generation( - name=generation_name, - id=metadata.get("generation_id", generation_id), - startTime=start_time, - endTime=end_time, - model=kwargs["model"], - modelParameters=optional_params, - input=input, - output=output, - usage={ - "prompt_tokens": response_obj["usage"]["prompt_tokens"], - "completion_tokens": response_obj["usage"]["completion_tokens"], - "total_cost": cost if supports_costs else None, - }, - metadata=metadata, - ) + cost = kwargs.get("response_cost", None) + print_verbose(f"trace: {cost}") + if supports_tags: + for key, value in metadata.items(): + tags.append(f"{key}:{value}") + if "cache_hit" in kwargs: + tags.append(f"cache_hit:{kwargs['cache_hit']}") + trace_params.update({"tags": tags}) + + trace = self.Langfuse.trace(**trace_params) + + if level == "ERROR": + trace.generation( + level="ERROR", # can be any of DEBUG, DEFAULT, WARNING or ERROR + status_message=output, # can be any string (e.g. stringified stack trace or error body) + ) + print(f"SUCCESSFULLY LOGGED ERROR") + else: + # get generation_id + generation_id = None + if ( + response_obj is not None + and response_obj.get("id", None) is not None + ): + generation_id = litellm.utils.get_logging_id( + start_time, response_obj + ) + trace.generation( + name=generation_name, + id=metadata.get("generation_id", generation_id), + startTime=start_time, + endTime=end_time, + model=kwargs["model"], + modelParameters=optional_params, + input=input, + output=output, + usage={ + "prompt_tokens": response_obj["usage"]["prompt_tokens"], + "completion_tokens": response_obj["usage"]["completion_tokens"], + "total_cost": cost if supports_costs else None, + }, + metadata=metadata, + ) + except Exception as e: + print(f"Langfuse Layer Error - {traceback.format_exc()}") diff --git a/litellm/utils.py b/litellm/utils.py index e56ba879f..1e83a319f 100644 --- a/litellm/utils.py +++ b/litellm/utils.py @@ -1636,34 +1636,6 @@ class Logging: end_time=end_time, print_verbose=print_verbose, ) - if callback == "langfuse": - global langFuseLogger - print_verbose("reaches Async langfuse for logging!") - kwargs = {} - for k, v in self.model_call_details.items(): - if ( - k != "original_response" - ): # copy.deepcopy raises errors as this could be a coroutine - kwargs[k] = v - # this only logs streaming once, complete_streaming_response exists i.e when stream ends - if self.stream: - if "complete_streaming_response" not in kwargs: - return - else: - print_verbose( - "reaches Async langfuse for streaming logging!" - ) - result = kwargs["complete_streaming_response"] - if langFuseLogger is None: - langFuseLogger = LangFuseLogger() - await langFuseLogger._async_log_event( - kwargs=kwargs, - response_obj=result, - start_time=start_time, - end_time=end_time, - user_id=kwargs.get("user", None), - print_verbose=print_verbose, - ) except: print_verbose( f"LiteLLM.LoggingError: [Non-Blocking] Exception occurred while success logging {traceback.format_exc()}" @@ -1788,9 +1760,37 @@ class Logging: response_obj=result, kwargs=self.model_call_details, ) + elif callback == "langfuse": + global langFuseLogger + verbose_logger.debug("reaches langfuse for logging!") + kwargs = {} + for k, v in self.model_call_details.items(): + if ( + k != "original_response" + ): # copy.deepcopy raises errors as this could be a coroutine + kwargs[k] = v + # this only logs streaming once, complete_streaming_response exists i.e when stream ends + if langFuseLogger is None or ( + self.langfuse_public_key != langFuseLogger.public_key + and self.langfuse_secret != langFuseLogger.secret_key + ): + langFuseLogger = LangFuseLogger( + langfuse_public_key=self.langfuse_public_key, + langfuse_secret=self.langfuse_secret, + ) + langFuseLogger.log_event( + start_time=start_time, + end_time=end_time, + response_obj=None, + user_id=kwargs.get("user", None), + print_verbose=print_verbose, + status_message=str(exception), + level="ERROR", + kwargs=self.model_call_details, + ) except Exception as e: print_verbose( - f"LiteLLM.LoggingError: [Non-Blocking] Exception occurred while failure logging with integrations {traceback.format_exc()}" + f"LiteLLM.LoggingError: [Non-Blocking] Exception occurred while failure logging with integrations {str(e)}" ) print_verbose( f"LiteLLM.Logging: is sentry capture exception initialized {capture_exception}" From 1f7c8e86a7361e7853e28dc0022069d40581f867 Mon Sep 17 00:00:00 2001 From: ishaan-jaff Date: Mon, 5 Feb 2024 16:21:54 -0800 Subject: [PATCH 13/33] (fix) make sure route is str --- litellm/proxy/proxy_server.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/litellm/proxy/proxy_server.py b/litellm/proxy/proxy_server.py index 781c676c8..289a36cb2 100644 --- a/litellm/proxy/proxy_server.py +++ b/litellm/proxy/proxy_server.py @@ -652,8 +652,13 @@ async def user_api_key_auth( "/user", ] # check if the current route startswith any of the allowed routes - if any( - route.startswith(allowed_route) for allowed_route in allowed_routes + if ( + route is not None + and isinstance(route, str) + and any( + route.startswith(allowed_route) + for allowed_route in allowed_routes + ) ): # Do something if the current route starts with any of the allowed routes pass From 3b9ada07e024d991399a2f089a47f84a9ecbe617 Mon Sep 17 00:00:00 2001 From: Krrish Dholakia Date: Mon, 5 Feb 2024 16:26:17 -0800 Subject: [PATCH 14/33] fix(main.py): raise better error message for health check models without mode --- litellm/main.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/litellm/main.py b/litellm/main.py index 66af60576..384dadc32 100644 --- a/litellm/main.py +++ b/litellm/main.py @@ -3320,6 +3320,10 @@ async def ahealth_check( response = {} # args like remaining ratelimit etc. return response except Exception as e: + if model not in litellm.model_cost and mode is None: + raise Exception( + "Missing `mode`. Set the `mode` for the model - https://docs.litellm.ai/docs/proxy/health#embedding-models" + ) return {"error": str(e)} From cdbbedec362ddf73fe7714ab29ceef1498e73421 Mon Sep 17 00:00:00 2001 From: Krrish Dholakia Date: Mon, 5 Feb 2024 16:26:30 -0800 Subject: [PATCH 15/33] =?UTF-8?q?bump:=20version=201.22.5=20=E2=86=92=201.?= =?UTF-8?q?22.6?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- pyproject.toml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 381ae931e..06dedbed6 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "litellm" -version = "1.22.5" +version = "1.22.6" description = "Library to easily interface with LLM API providers" authors = ["BerriAI"] license = "MIT" @@ -69,7 +69,7 @@ requires = ["poetry-core", "wheel"] build-backend = "poetry.core.masonry.api" [tool.commitizen] -version = "1.22.5" +version = "1.22.6" version_files = [ "pyproject.toml:^version" ] From b9e6f760ebd00175e90ec3087444e16f491ac27e Mon Sep 17 00:00:00 2001 From: Krish Dholakia Date: Mon, 5 Feb 2024 17:07:57 -0800 Subject: [PATCH 16/33] Update model_prices_and_context_window.json --- model_prices_and_context_window.json | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/model_prices_and_context_window.json b/model_prices_and_context_window.json index b6ded001c..4c28bdbe8 100644 --- a/model_prices_and_context_window.json +++ b/model_prices_and_context_window.json @@ -156,8 +156,8 @@ "max_tokens": 4097, "max_input_tokens": 4097, "max_output_tokens": 4096, - "input_cost_per_token": 0.000012, - "output_cost_per_token": 0.000016, + "input_cost_per_token": 0.000003, + "output_cost_per_token": 0.000006, "litellm_provider": "openai", "mode": "chat" }, From f4cdb6d234493028a2e3b1de3b3c5008e48d4b50 Mon Sep 17 00:00:00 2001 From: John HU Date: Mon, 5 Feb 2024 17:30:39 -0800 Subject: [PATCH 17/33] Fix admin UI title and description --- ui/litellm-dashboard/src/app/layout.tsx | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ui/litellm-dashboard/src/app/layout.tsx b/ui/litellm-dashboard/src/app/layout.tsx index 3314e4780..a04a0d66e 100644 --- a/ui/litellm-dashboard/src/app/layout.tsx +++ b/ui/litellm-dashboard/src/app/layout.tsx @@ -5,8 +5,8 @@ import "./globals.css"; const inter = Inter({ subsets: ["latin"] }); export const metadata: Metadata = { - title: "Create Next App", - description: "Generated by create next app", + title: "🚅 LiteLLM", + description: "LiteLLM Proxy Admin UI", }; export default function RootLayout({ From 7a0bccf4d06cc70141dd20973feb11dfc4da3352 Mon Sep 17 00:00:00 2001 From: Krrish Dholakia Date: Mon, 5 Feb 2024 21:44:27 -0800 Subject: [PATCH 18/33] test(test_key_generate_dynamodb.py): fix test --- litellm/tests/test_key_generate_dynamodb.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/litellm/tests/test_key_generate_dynamodb.py b/litellm/tests/test_key_generate_dynamodb.py index 61d0ff6a6..e77dc7472 100644 --- a/litellm/tests/test_key_generate_dynamodb.py +++ b/litellm/tests/test_key_generate_dynamodb.py @@ -490,8 +490,13 @@ def test_dynamo_db_migration(custom_db_client): try: async def test(): + request = GenerateKeyRequest(max_budget=1) + key = await generate_key_fn(request) + print(key) + + generated_key = key.key bearer_token = ( - "Bearer " + "sk-elJDL2pOEjcAuC7zD4psAg" + "Bearer " + generated_key ) # this works with ishaan's db, it's a never expiring key request = Request(scope={"type": "http"}) From d4fd287617e04fd807717ee9646dc01042ccf825 Mon Sep 17 00:00:00 2001 From: ishaan-jaff Date: Mon, 5 Feb 2024 22:37:05 -0800 Subject: [PATCH 19/33] (docs) upperbound_key_generate_params --- docs/my-website/docs/proxy/virtual_keys.md | 16 ++++++++++++++++ .../model_prices_and_context_window_backup.json | 4 ++-- 2 files changed, 18 insertions(+), 2 deletions(-) diff --git a/docs/my-website/docs/proxy/virtual_keys.md b/docs/my-website/docs/proxy/virtual_keys.md index dd5edc6da..c51bfc0ac 100644 --- a/docs/my-website/docs/proxy/virtual_keys.md +++ b/docs/my-website/docs/proxy/virtual_keys.md @@ -352,6 +352,22 @@ Request Params: } ``` +## Upperbound /key/generate params +Use this, if you need to control the upperbound that users can use for `max_budget`, `budget_duration` or any `key/generate` param per key. + +Set `litellm_settings:upperbound_key_generate_params`: +```yaml +litellm_settings: + upperbound_key_generate_params: + max_budget: 100 # upperbound of $100, for all /key/generate requests + duration: "30d" # upperbound of 30 days for all /key/generate requests +``` + +** Expected Behavior ** + +- Send a `/key/generate` request with `max_budget=200` +- Key will be created with `max_budget=100` since 100 is the upper bound + ## Default /key/generate params Use this, if you need to control the default `max_budget` or any `key/generate` param per key. diff --git a/litellm/model_prices_and_context_window_backup.json b/litellm/model_prices_and_context_window_backup.json index b6ded001c..4c28bdbe8 100644 --- a/litellm/model_prices_and_context_window_backup.json +++ b/litellm/model_prices_and_context_window_backup.json @@ -156,8 +156,8 @@ "max_tokens": 4097, "max_input_tokens": 4097, "max_output_tokens": 4096, - "input_cost_per_token": 0.000012, - "output_cost_per_token": 0.000016, + "input_cost_per_token": 0.000003, + "output_cost_per_token": 0.000006, "litellm_provider": "openai", "mode": "chat" }, From a712596d4628294b71a95d49a527f765dbe8d4e1 Mon Sep 17 00:00:00 2001 From: ishaan-jaff Date: Mon, 5 Feb 2024 22:38:47 -0800 Subject: [PATCH 20/33] (feat) upperbound_key_generate_params --- litellm/__init__.py | 1 + litellm/proxy/proxy_server.py | 69 +++++++++++++++++++++++++---------- 2 files changed, 51 insertions(+), 19 deletions(-) diff --git a/litellm/__init__.py b/litellm/__init__.py index 3f2a1e4b4..26b761c64 100644 --- a/litellm/__init__.py +++ b/litellm/__init__.py @@ -146,6 +146,7 @@ suppress_debug_info = False dynamodb_table_name: Optional[str] = None s3_callback_params: Optional[Dict] = None default_key_generate_params: Optional[Dict] = None +upperbound_key_generate_params: Optional[Dict] = None default_team_settings: Optional[List] = None #### RELIABILITY #### request_timeout: Optional[float] = 6000 diff --git a/litellm/proxy/proxy_server.py b/litellm/proxy/proxy_server.py index 289a36cb2..494c87414 100644 --- a/litellm/proxy/proxy_server.py +++ b/litellm/proxy/proxy_server.py @@ -1391,6 +1391,26 @@ class ProxyConfig: proxy_config = ProxyConfig() +def _duration_in_seconds(duration: str): + match = re.match(r"(\d+)([smhd]?)", duration) + if not match: + raise ValueError("Invalid duration format") + + value, unit = match.groups() + value = int(value) + + if unit == "s": + return value + elif unit == "m": + return value * 60 + elif unit == "h": + return value * 3600 + elif unit == "d": + return value * 86400 + else: + raise ValueError("Unsupported duration unit") + + async def generate_key_helper_fn( duration: Optional[str], models: list, @@ -1425,25 +1445,6 @@ async def generate_key_helper_fn( if token is None: token = f"sk-{secrets.token_urlsafe(16)}" - def _duration_in_seconds(duration: str): - match = re.match(r"(\d+)([smhd]?)", duration) - if not match: - raise ValueError("Invalid duration format") - - value, unit = match.groups() - value = int(value) - - if unit == "s": - return value - elif unit == "m": - return value * 60 - elif unit == "h": - return value * 3600 - elif unit == "d": - return value * 86400 - else: - raise ValueError("Unsupported duration unit") - if duration is None: # allow tokens that never expire expires = None else: @@ -2660,6 +2661,36 @@ async def generate_key_fn( elif key == "metadata" and value == {}: setattr(data, key, litellm.default_key_generate_params.get(key, {})) + # check if user set default key/generate params on config.yaml + if litellm.upperbound_key_generate_params is not None: + for elem in data: + # if key in litellm.upperbound_key_generate_params, use the min of value and litellm.upperbound_key_generate_params[key] + key, value = elem + if value is not None and key in litellm.upperbound_key_generate_params: + # if value is float/int + if key in [ + "max_budget", + "max_parallel_requests", + "tpm_limit", + "rpm_limit", + ]: + if value > litellm.upperbound_key_generate_params[key]: + # directly compare floats/ints + setattr( + data, key, litellm.upperbound_key_generate_params[key] + ) + elif key == "budget_duration": + # budgets are in 1s, 1m, 1h, 1d, 1m (30s, 30m, 30h, 30d, 30m) + # compare the duration in seconds and max duration in seconds + upperbound_budget_duration = _duration_in_seconds( + duration=litellm.upperbound_key_generate_params[key] + ) + user_set_budget_duration = _duration_in_seconds(duration=value) + if user_set_budget_duration > upperbound_budget_duration: + setattr( + data, key, litellm.upperbound_key_generate_params[key] + ) + data_json = data.json() # type: ignore # if we get max_budget passed to /key/generate, then use it as key_max_budget. Since generate_key_helper_fn is used to make new users From 4d4554b0e4c2996db037854c4da48ec23ba5786e Mon Sep 17 00:00:00 2001 From: ishaan-jaff Date: Mon, 5 Feb 2024 22:39:36 -0800 Subject: [PATCH 21/33] (test) test_upperbound_key_params --- litellm/tests/test_key_generate_prisma.py | 34 +++++++++++++++++++++++ 1 file changed, 34 insertions(+) diff --git a/litellm/tests/test_key_generate_prisma.py b/litellm/tests/test_key_generate_prisma.py index de2616859..b4c86afb2 100644 --- a/litellm/tests/test_key_generate_prisma.py +++ b/litellm/tests/test_key_generate_prisma.py @@ -1279,6 +1279,40 @@ async def test_default_key_params(prisma_client): pytest.fail(f"Got exception {e}") +@pytest.mark.asyncio() +async def test_upperbound_key_params(prisma_client): + """ + - create key + - get key info + - assert key_name is not null + """ + setattr(litellm.proxy.proxy_server, "prisma_client", prisma_client) + setattr(litellm.proxy.proxy_server, "master_key", "sk-1234") + litellm.upperbound_key_generate_params = { + "max_budget": 0.001, + "budget_duration": "1m", + } + await litellm.proxy.proxy_server.prisma_client.connect() + try: + request = GenerateKeyRequest( + max_budget=200000, + budget_duration="30d", + ) + key = await generate_key_fn(request) + generated_key = key.key + + result = await info_key_fn(key=generated_key) + key_info = result["info"] + # assert it used the upper bound for max_budget, and budget_duration + assert key_info["max_budget"] == 0.001 + assert key_info["budget_duration"] == "1m" + + print(result) + except Exception as e: + print("Got Exception", e) + pytest.fail(f"Got exception {e}") + + def test_get_bearer_token(): from litellm.proxy.proxy_server import _get_bearer_token From 71814d8149779cc5a5dc6777a35c5e5bca29fcee Mon Sep 17 00:00:00 2001 From: ishaan-jaff Date: Mon, 5 Feb 2024 22:40:52 -0800 Subject: [PATCH 22/33] (feat) proxy - upperbound params /key/generate --- litellm/proxy/proxy_config.yaml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/litellm/proxy/proxy_config.yaml b/litellm/proxy/proxy_config.yaml index 874049a75..bd844bd7b 100644 --- a/litellm/proxy/proxy_config.yaml +++ b/litellm/proxy/proxy_config.yaml @@ -73,6 +73,9 @@ litellm_settings: max_budget: 1.5000 models: ["azure-gpt-3.5"] duration: None + upperbound_key_generate_params: + max_budget: 100 + duration: "30d" # cache: True # setting callback class # callbacks: custom_callbacks.proxy_handler_instance # sets litellm.callbacks = [proxy_handler_instance] From 4d625818d699b0b1710e3c3434bfa9c74a80eb12 Mon Sep 17 00:00:00 2001 From: ishaan-jaff Date: Mon, 5 Feb 2024 22:51:08 -0800 Subject: [PATCH 23/33] (fix) proxy startup test --- .../test_configs/test_config_no_auth.yaml | 95 ------------------- 1 file changed, 95 deletions(-) diff --git a/litellm/tests/test_configs/test_config_no_auth.yaml b/litellm/tests/test_configs/test_config_no_auth.yaml index 8441018e3..ccebe016d 100644 --- a/litellm/tests/test_configs/test_config_no_auth.yaml +++ b/litellm/tests/test_configs/test_config_no_auth.yaml @@ -9,21 +9,11 @@ model_list: api_key: os.environ/AZURE_CANADA_API_KEY model: azure/gpt-35-turbo model_name: azure-model -- litellm_params: - api_base: https://gateway.ai.cloudflare.com/v1/0399b10e77ac6668c80404a5ff49eb37/litellm-test/azure-openai/openai-gpt-4-test-v-1 - api_key: os.environ/AZURE_API_KEY - model: azure/chatgpt-v-2 - model_name: azure-cloudflare-model - litellm_params: api_base: https://openai-france-1234.openai.azure.com api_key: os.environ/AZURE_FRANCE_API_KEY model: azure/gpt-turbo model_name: azure-model -- litellm_params: - model: gpt-3.5-turbo - model_info: - description: this is a test openai model - model_name: test_openai_models - litellm_params: model: gpt-3.5-turbo model_info: @@ -36,93 +26,8 @@ model_list: description: this is a test openai model id: 4d1ee26c-abca-450c-8744-8e87fd6755e9 model_name: test_openai_models -- litellm_params: - model: gpt-3.5-turbo - model_info: - description: this is a test openai model - id: 00e19c0f-b63d-42bb-88e9-016fb0c60764 - model_name: test_openai_models -- litellm_params: - model: gpt-3.5-turbo - model_info: - description: this is a test openai model - id: 79fc75bf-8e1b-47d5-8d24-9365a854af03 - model_name: test_openai_models -- litellm_params: - api_base: os.environ/AZURE_API_BASE - api_key: os.environ/AZURE_API_KEY - api_version: 2023-07-01-preview - model: azure/azure-embedding-model - model_info: - mode: embedding - model_name: azure-embedding-model -- litellm_params: - model: gpt-3.5-turbo - model_info: - description: this is a test openai model - id: 55848c55-4162-40f9-a6e2-9a722b9ef404 - model_name: test_openai_models -- litellm_params: - model: gpt-3.5-turbo - model_info: - description: this is a test openai model - id: 34339b1e-e030-4bcc-a531-c48559f10ce4 - model_name: test_openai_models -- litellm_params: - model: gpt-3.5-turbo - model_info: - description: this is a test openai model - id: f6f74e14-ac64-4403-9365-319e584dcdc5 - model_name: test_openai_models -- litellm_params: - model: gpt-3.5-turbo - model_info: - description: this is a test openai model - id: 9b1ef341-322c-410a-8992-903987fef439 - model_name: test_openai_models - litellm_params: model: bedrock/amazon.titan-embed-text-v1 model_info: mode: embedding model_name: amazon-embeddings -- litellm_params: - model: sagemaker/berri-benchmarking-gpt-j-6b-fp16 - model_info: - mode: embedding - model_name: GPT-J 6B - Sagemaker Text Embedding (Internal) -- litellm_params: - model: dall-e-3 - model_info: - mode: image_generation - model_name: dall-e-3 -- litellm_params: - api_base: os.environ/AZURE_SWEDEN_API_BASE - api_key: os.environ/AZURE_SWEDEN_API_KEY - api_version: 2023-12-01-preview - model: azure/dall-e-3-test - model_info: - mode: image_generation - model_name: dall-e-3 -- litellm_params: - api_base: os.environ/AZURE_API_BASE - api_key: os.environ/AZURE_API_KEY - api_version: 2023-06-01-preview - model: azure/ - model_info: - mode: image_generation - model_name: dall-e-2 -- litellm_params: - api_base: os.environ/AZURE_API_BASE - api_key: os.environ/AZURE_API_KEY - api_version: 2023-07-01-preview - model: azure/azure-embedding-model - model_info: - base_model: text-embedding-ada-002 - mode: embedding - model_name: text-embedding-ada-002 -- litellm_params: - model: gpt-3.5-turbo - model_info: - description: this is a test openai model - id: 34cb2419-7c63-44ae-a189-53f1d1ce5953 - model_name: test_openai_models From 9a8abdb1ae3c7e4733a537ec9bc182bd213decce Mon Sep 17 00:00:00 2001 From: ishaan-jaff Date: Mon, 5 Feb 2024 22:53:31 -0800 Subject: [PATCH 24/33] (ci/cd) print debug info for test_proxy_gunicorn_startup_config_dict --- litellm/tests/test_proxy_startup.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/litellm/tests/test_proxy_startup.py b/litellm/tests/test_proxy_startup.py index 650e2f8a7..a846c9f4a 100644 --- a/litellm/tests/test_proxy_startup.py +++ b/litellm/tests/test_proxy_startup.py @@ -33,6 +33,11 @@ def test_proxy_gunicorn_startup_direct_config(): Test both approaches """ try: + from litellm._logging import verbose_proxy_logger, verbose_router_logger + import logging + + verbose_proxy_logger.setLevel(level=logging.DEBUG) + verbose_router_logger.setLevel(level=logging.DEBUG) filepath = os.path.dirname(os.path.abspath(__file__)) # test with worker_config = config yaml config_fp = f"{filepath}/test_configs/test_config_no_auth.yaml" @@ -48,6 +53,11 @@ def test_proxy_gunicorn_startup_direct_config(): def test_proxy_gunicorn_startup_config_dict(): try: + from litellm._logging import verbose_proxy_logger, verbose_router_logger + import logging + + verbose_proxy_logger.setLevel(level=logging.DEBUG) + verbose_router_logger.setLevel(level=logging.DEBUG) filepath = os.path.dirname(os.path.abspath(__file__)) # test with worker_config = config yaml config_fp = f"{filepath}/test_configs/test_config_no_auth.yaml" From 47bed68c7f6a634dd7cfbb5910659dd18ee03c1a Mon Sep 17 00:00:00 2001 From: ishaan-jaff Date: Tue, 6 Feb 2024 06:46:49 -0800 Subject: [PATCH 25/33] (fix) test_normal_router_tpm_limit --- litellm/tests/test_parallel_request_limiter.py | 1 + 1 file changed, 1 insertion(+) diff --git a/litellm/tests/test_parallel_request_limiter.py b/litellm/tests/test_parallel_request_limiter.py index 34dc0e3b5..528bb19d2 100644 --- a/litellm/tests/test_parallel_request_limiter.py +++ b/litellm/tests/test_parallel_request_limiter.py @@ -379,6 +379,7 @@ async def test_normal_router_tpm_limit(): ) except Exception as e: + print("Exception on test_normal_router_tpm_limit", e) assert e.status_code == 429 From 2e3748e6eb0e7e06fcdf58feb344b0b0dfcd45ee Mon Sep 17 00:00:00 2001 From: Krrish Dholakia Date: Tue, 6 Feb 2024 07:22:54 -0800 Subject: [PATCH 26/33] fix(ollama_chat.py): fix ollama chat completion token counting --- litellm/llms/ollama_chat.py | 8 ++++++-- litellm/utils.py | 3 --- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/litellm/llms/ollama_chat.py b/litellm/llms/ollama_chat.py index 95ff8dfaa..3628ae290 100644 --- a/litellm/llms/ollama_chat.py +++ b/litellm/llms/ollama_chat.py @@ -320,11 +320,15 @@ async def ollama_acompletion(url, data, model_response, encoding, logging_obj): model_response["choices"][0]["message"] = message else: model_response["choices"][0]["message"] = response_json["message"] + model_response["created"] = int(time.time()) - model_response["model"] = "ollama/" + data["model"] + model_response["model"] = "ollama_chat/" + data["model"] prompt_tokens = response_json.get("prompt_eval_count", litellm.token_counter(messages=data["messages"])) # type: ignore completion_tokens = response_json.get( - "eval_count", litellm.token_counter(text=response_json["message"]) + "eval_count", + litellm.token_counter( + text=response_json["message"]["content"], count_response_tokens=True + ), ) model_response["usage"] = litellm.Usage( prompt_tokens=prompt_tokens, diff --git a/litellm/utils.py b/litellm/utils.py index 1e83a319f..8491a1d5e 100644 --- a/litellm/utils.py +++ b/litellm/utils.py @@ -983,9 +983,6 @@ class Logging: verbose_logger.debug( f"RAW RESPONSE:\n{self.model_call_details.get('original_response', self.model_call_details)}\n\n" ) - verbose_logger.debug( - f"Logging Details Post-API Call: LiteLLM Params: {self.model_call_details}" - ) if self.logger_fn and callable(self.logger_fn): try: self.logger_fn( From c2a523b9548c85ee389d673e8ea1ce7f98205286 Mon Sep 17 00:00:00 2001 From: Krrish Dholakia Date: Tue, 6 Feb 2024 07:26:13 -0800 Subject: [PATCH 27/33] fix(utils.py): use print_verbose for statements, so debug can be seen when running sdk --- litellm/utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/litellm/utils.py b/litellm/utils.py index 8491a1d5e..5ccb85ef0 100644 --- a/litellm/utils.py +++ b/litellm/utils.py @@ -980,7 +980,7 @@ class Logging: self.model_call_details["log_event_type"] = "post_api_call" # User Logging -> if you pass in a custom logging function - verbose_logger.debug( + print_verbose( f"RAW RESPONSE:\n{self.model_call_details.get('original_response', self.model_call_details)}\n\n" ) if self.logger_fn and callable(self.logger_fn): From 4a5b635e3597c0c4b2c4ab68ef8b809401c41f6d Mon Sep 17 00:00:00 2001 From: Krrish Dholakia Date: Tue, 6 Feb 2024 07:35:46 -0800 Subject: [PATCH 28/33] build(requirements.txt): update the proxy requirements.txt --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index c9bd0e511..768e8dff3 100644 --- a/requirements.txt +++ b/requirements.txt @@ -11,7 +11,7 @@ boto3==1.28.58 # aws bedrock/sagemaker calls redis==4.6.0 # caching prisma==0.11.0 # for db mangum==0.17.0 # for aws lambda functions -google-generativeai==0.1.0 # for vertex ai calls +google-generativeai==0.3.2 # for vertex ai calls async_generator==1.10.0 # for async ollama calls traceloop-sdk==0.5.3 # for open telemetry logging langfuse>=2.6.3 # for langfuse self-hosted logging From 9e091a0624cee976ee0995bfc3d21e6004d14cbd Mon Sep 17 00:00:00 2001 From: Krrish Dholakia Date: Tue, 6 Feb 2024 07:43:47 -0800 Subject: [PATCH 29/33] fix(ollama_chat.py): explicitly state if ollama call is streaming or not --- litellm/llms/ollama_chat.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/litellm/llms/ollama_chat.py b/litellm/llms/ollama_chat.py index 3628ae290..d1a439398 100644 --- a/litellm/llms/ollama_chat.py +++ b/litellm/llms/ollama_chat.py @@ -146,7 +146,12 @@ def get_ollama_response( optional_params[k] = v stream = optional_params.pop("stream", False) - data = {"model": model, "messages": messages, "options": optional_params} + data = { + "model": model, + "messages": messages, + "options": optional_params, + "stream": stream, + } ## LOGGING logging_obj.pre_call( input=None, From f2ef32bceed1c24da0e140ebc5e001838dab3584 Mon Sep 17 00:00:00 2001 From: Krrish Dholakia Date: Tue, 6 Feb 2024 07:44:04 -0800 Subject: [PATCH 30/33] =?UTF-8?q?bump:=20version=201.22.6=20=E2=86=92=201.?= =?UTF-8?q?22.7?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- pyproject.toml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 06dedbed6..be8c8966b 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "litellm" -version = "1.22.6" +version = "1.22.7" description = "Library to easily interface with LLM API providers" authors = ["BerriAI"] license = "MIT" @@ -69,7 +69,7 @@ requires = ["poetry-core", "wheel"] build-backend = "poetry.core.masonry.api" [tool.commitizen] -version = "1.22.6" +version = "1.22.7" version_files = [ "pyproject.toml:^version" ] From d1db67890c7f70b4bd94269e4ffd59d715052783 Mon Sep 17 00:00:00 2001 From: Krrish Dholakia Date: Tue, 6 Feb 2024 10:11:43 -0800 Subject: [PATCH 31/33] fix(ollama.py): support format for ollama --- litellm/llms/ollama.py | 10 +++++++++- litellm/llms/ollama_chat.py | 3 +++ 2 files changed, 12 insertions(+), 1 deletion(-) diff --git a/litellm/llms/ollama.py b/litellm/llms/ollama.py index d0bc24af4..9339deb78 100644 --- a/litellm/llms/ollama.py +++ b/litellm/llms/ollama.py @@ -146,7 +146,15 @@ def get_ollama_response( optional_params[k] = v stream = optional_params.pop("stream", False) - data = {"model": model, "prompt": prompt, "options": optional_params} + format = optional_params.pop("format", None) + data = { + "model": model, + "prompt": prompt, + "options": optional_params, + "stream": stream, + } + if format is not None: + data["format"] = format ## LOGGING logging_obj.pre_call( diff --git a/litellm/llms/ollama_chat.py b/litellm/llms/ollama_chat.py index d1a439398..0311931b1 100644 --- a/litellm/llms/ollama_chat.py +++ b/litellm/llms/ollama_chat.py @@ -146,12 +146,15 @@ def get_ollama_response( optional_params[k] = v stream = optional_params.pop("stream", False) + format = optional_params.pop("format", None) data = { "model": model, "messages": messages, "options": optional_params, "stream": stream, } + if format is not None: + data["format"] = format ## LOGGING logging_obj.pre_call( input=None, From e542aa10ccce06e313cdb5743301603173eaa97e Mon Sep 17 00:00:00 2001 From: Krrish Dholakia Date: Tue, 6 Feb 2024 10:12:13 -0800 Subject: [PATCH 32/33] =?UTF-8?q?bump:=20version=201.22.7=20=E2=86=92=201.?= =?UTF-8?q?22.8?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- pyproject.toml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index be8c8966b..17d80ae8e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "litellm" -version = "1.22.7" +version = "1.22.8" description = "Library to easily interface with LLM API providers" authors = ["BerriAI"] license = "MIT" @@ -69,7 +69,7 @@ requires = ["poetry-core", "wheel"] build-backend = "poetry.core.masonry.api" [tool.commitizen] -version = "1.22.7" +version = "1.22.8" version_files = [ "pyproject.toml:^version" ] From 405a44727c5c0b09ff095596c6cca2d51601ba98 Mon Sep 17 00:00:00 2001 From: ishaan-jaff Date: Tue, 6 Feb 2024 10:57:20 -0800 Subject: [PATCH 33/33] (ci/cd) run in verbose mode --- .circleci/config.yml | 2 +- litellm/tests/test_completion.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.circleci/config.yml b/.circleci/config.yml index c1224159a..9a29ed07c 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -80,7 +80,7 @@ jobs: command: | pwd ls - python -m pytest -vv litellm/tests/ -x --junitxml=test-results/junit.xml --durations=5 + python -m pytest -vv -s litellm/tests/ -x --junitxml=test-results/junit.xml --durations=5 no_output_timeout: 120m # Store test results diff --git a/litellm/tests/test_completion.py b/litellm/tests/test_completion.py index bd0301f20..e0ee05d4f 100644 --- a/litellm/tests/test_completion.py +++ b/litellm/tests/test_completion.py @@ -41,7 +41,7 @@ def test_completion_custom_provider_model_name(): messages=messages, logger_fn=logger_fn, ) - # Add any assertions here to check the, response + # Add any assertions here to check the,response print(response) print(response["choices"][0]["finish_reason"]) except litellm.Timeout as e: