From e78c038284178e2769d2b1a606adf694ffcf3f0a Mon Sep 17 00:00:00 2001 From: Ishaan Jaff Date: Wed, 26 Jun 2024 08:46:45 -0700 Subject: [PATCH 1/8] add gemini-1.0-ultra-001 --- ...odel_prices_and_context_window_backup.json | 54 +++++++++++++++++++ model_prices_and_context_window.json | 30 +++++++++++ 2 files changed, 84 insertions(+) diff --git a/litellm/model_prices_and_context_window_backup.json b/litellm/model_prices_and_context_window_backup.json index 415d220f2..2c72248f0 100644 --- a/litellm/model_prices_and_context_window_backup.json +++ b/litellm/model_prices_and_context_window_backup.json @@ -1232,6 +1232,36 @@ "supports_function_calling": true, "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models" }, + "gemini-1.0-ultra": { + "max_tokens": 8192, + "max_input_tokens": 32760, + "max_output_tokens": 8192, + "input_cost_per_image": 0.0025, + "input_cost_per_video_per_second": 0.002, + "input_cost_per_token": 0.0000005, + "input_cost_per_character": 0.000000125, + "output_cost_per_token": 0.0000015, + "output_cost_per_character": 0.000000375, + "litellm_provider": "vertex_ai-language-models", + "mode": "chat", + "supports_function_calling": true, + "source": "As of Jun, 2024. There is no available doc on vertex ai pricing gemini-1.0-ultra-001. Using gemini-1.0-pro information here" + }, + "gemini-1.0-ultra-001": { + "max_tokens": 8192, + "max_input_tokens": 32760, + "max_output_tokens": 8192, + "input_cost_per_image": 0.0025, + "input_cost_per_video_per_second": 0.002, + "input_cost_per_token": 0.0000005, + "input_cost_per_character": 0.000000125, + "output_cost_per_token": 0.0000015, + "output_cost_per_character": 0.000000375, + "litellm_provider": "vertex_ai-language-models", + "mode": "chat", + "supports_function_calling": true, + "source": "As of Jun, 2024. There is no available doc on vertex ai pricing gemini-1.0-ultra-001. Using gemini-1.0-pro information here" + }, "gemini-1.0-pro-002": { "max_tokens": 8192, "max_input_tokens": 32760, @@ -2073,6 +2103,30 @@ "supports_function_calling": true, "supports_vision": true }, + "openrouter/anthropic/claude-3-haiku-20240307": { + "max_tokens": 4096, + "max_input_tokens": 200000, + "max_output_tokens": 4096, + "input_cost_per_token": 0.00000025, + "output_cost_per_token": 0.00000125, + "litellm_provider": "openrouter", + "mode": "chat", + "supports_function_calling": true, + "supports_vision": true, + "tool_use_system_prompt_tokens": 264 + }, + "openrouter/anthropic/claude-3.5-sonnet": { + "max_tokens": 4096, + "max_input_tokens": 200000, + "max_output_tokens": 4096, + "input_cost_per_token": 0.000003, + "output_cost_per_token": 0.000015, + "litellm_provider": "openrouter", + "mode": "chat", + "supports_function_calling": true, + "supports_vision": true, + "tool_use_system_prompt_tokens": 159 + }, "openrouter/anthropic/claude-3-sonnet": { "max_tokens": 200000, "input_cost_per_token": 0.000003, diff --git a/model_prices_and_context_window.json b/model_prices_and_context_window.json index d7a7a7dc8..2c72248f0 100644 --- a/model_prices_and_context_window.json +++ b/model_prices_and_context_window.json @@ -1232,6 +1232,36 @@ "supports_function_calling": true, "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models" }, + "gemini-1.0-ultra": { + "max_tokens": 8192, + "max_input_tokens": 32760, + "max_output_tokens": 8192, + "input_cost_per_image": 0.0025, + "input_cost_per_video_per_second": 0.002, + "input_cost_per_token": 0.0000005, + "input_cost_per_character": 0.000000125, + "output_cost_per_token": 0.0000015, + "output_cost_per_character": 0.000000375, + "litellm_provider": "vertex_ai-language-models", + "mode": "chat", + "supports_function_calling": true, + "source": "As of Jun, 2024. There is no available doc on vertex ai pricing gemini-1.0-ultra-001. Using gemini-1.0-pro information here" + }, + "gemini-1.0-ultra-001": { + "max_tokens": 8192, + "max_input_tokens": 32760, + "max_output_tokens": 8192, + "input_cost_per_image": 0.0025, + "input_cost_per_video_per_second": 0.002, + "input_cost_per_token": 0.0000005, + "input_cost_per_character": 0.000000125, + "output_cost_per_token": 0.0000015, + "output_cost_per_character": 0.000000375, + "litellm_provider": "vertex_ai-language-models", + "mode": "chat", + "supports_function_calling": true, + "source": "As of Jun, 2024. There is no available doc on vertex ai pricing gemini-1.0-ultra-001. Using gemini-1.0-pro information here" + }, "gemini-1.0-pro-002": { "max_tokens": 8192, "max_input_tokens": 32760, From 1ed5ceda8f4f1929ac977dbde1331800c8b71e29 Mon Sep 17 00:00:00 2001 From: Ishaan Jaff Date: Wed, 26 Jun 2024 08:55:04 -0700 Subject: [PATCH 2/8] fix gemini ultra info --- litellm/model_prices_and_context_window_backup.json | 12 ++++++------ model_prices_and_context_window.json | 12 ++++++------ 2 files changed, 12 insertions(+), 12 deletions(-) diff --git a/litellm/model_prices_and_context_window_backup.json b/litellm/model_prices_and_context_window_backup.json index 2c72248f0..8d9b2595f 100644 --- a/litellm/model_prices_and_context_window_backup.json +++ b/litellm/model_prices_and_context_window_backup.json @@ -1234,8 +1234,8 @@ }, "gemini-1.0-ultra": { "max_tokens": 8192, - "max_input_tokens": 32760, - "max_output_tokens": 8192, + "max_input_tokens": 8192, + "max_output_tokens": 2048, "input_cost_per_image": 0.0025, "input_cost_per_video_per_second": 0.002, "input_cost_per_token": 0.0000005, @@ -1245,12 +1245,12 @@ "litellm_provider": "vertex_ai-language-models", "mode": "chat", "supports_function_calling": true, - "source": "As of Jun, 2024. There is no available doc on vertex ai pricing gemini-1.0-ultra-001. Using gemini-1.0-pro information here" + "source": "As of Jun, 2024. There is no available doc on vertex ai pricing gemini-1.0-ultra-001. Using gemini-1.0-pro pricing. Got max_tokens info here: https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models" }, "gemini-1.0-ultra-001": { "max_tokens": 8192, - "max_input_tokens": 32760, - "max_output_tokens": 8192, + "max_input_tokens": 8192, + "max_output_tokens": 2048, "input_cost_per_image": 0.0025, "input_cost_per_video_per_second": 0.002, "input_cost_per_token": 0.0000005, @@ -1260,7 +1260,7 @@ "litellm_provider": "vertex_ai-language-models", "mode": "chat", "supports_function_calling": true, - "source": "As of Jun, 2024. There is no available doc on vertex ai pricing gemini-1.0-ultra-001. Using gemini-1.0-pro information here" + "source": "As of Jun, 2024. There is no available doc on vertex ai pricing gemini-1.0-ultra-001. Using gemini-1.0-pro pricing. Got max_tokens info here: https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models" }, "gemini-1.0-pro-002": { "max_tokens": 8192, diff --git a/model_prices_and_context_window.json b/model_prices_and_context_window.json index 2c72248f0..8d9b2595f 100644 --- a/model_prices_and_context_window.json +++ b/model_prices_and_context_window.json @@ -1234,8 +1234,8 @@ }, "gemini-1.0-ultra": { "max_tokens": 8192, - "max_input_tokens": 32760, - "max_output_tokens": 8192, + "max_input_tokens": 8192, + "max_output_tokens": 2048, "input_cost_per_image": 0.0025, "input_cost_per_video_per_second": 0.002, "input_cost_per_token": 0.0000005, @@ -1245,12 +1245,12 @@ "litellm_provider": "vertex_ai-language-models", "mode": "chat", "supports_function_calling": true, - "source": "As of Jun, 2024. There is no available doc on vertex ai pricing gemini-1.0-ultra-001. Using gemini-1.0-pro information here" + "source": "As of Jun, 2024. There is no available doc on vertex ai pricing gemini-1.0-ultra-001. Using gemini-1.0-pro pricing. Got max_tokens info here: https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models" }, "gemini-1.0-ultra-001": { "max_tokens": 8192, - "max_input_tokens": 32760, - "max_output_tokens": 8192, + "max_input_tokens": 8192, + "max_output_tokens": 2048, "input_cost_per_image": 0.0025, "input_cost_per_video_per_second": 0.002, "input_cost_per_token": 0.0000005, @@ -1260,7 +1260,7 @@ "litellm_provider": "vertex_ai-language-models", "mode": "chat", "supports_function_calling": true, - "source": "As of Jun, 2024. There is no available doc on vertex ai pricing gemini-1.0-ultra-001. Using gemini-1.0-pro information here" + "source": "As of Jun, 2024. There is no available doc on vertex ai pricing gemini-1.0-ultra-001. Using gemini-1.0-pro pricing. Got max_tokens info here: https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models" }, "gemini-1.0-pro-002": { "max_tokens": 8192, From 08412f736b3aeb1adf4c82f0001dda9590dc50f1 Mon Sep 17 00:00:00 2001 From: Ishaan Jaff Date: Wed, 26 Jun 2024 11:18:22 -0700 Subject: [PATCH 3/8] add vertex text-bison --- ...odel_prices_and_context_window_backup.json | 42 +++++++++++++++++-- model_prices_and_context_window.json | 42 +++++++++++++++++-- 2 files changed, 76 insertions(+), 8 deletions(-) diff --git a/litellm/model_prices_and_context_window_backup.json b/litellm/model_prices_and_context_window_backup.json index 8d9b2595f..b708e509b 100644 --- a/litellm/model_prices_and_context_window_backup.json +++ b/litellm/model_prices_and_context_window_backup.json @@ -1028,21 +1028,55 @@ "tool_use_system_prompt_tokens": 159 }, "text-bison": { - "max_tokens": 1024, + "max_tokens": 2048, "max_input_tokens": 8192, - "max_output_tokens": 1024, - "input_cost_per_token": 0.000000125, - "output_cost_per_token": 0.000000125, + "max_output_tokens": 2048, + "input_cost_per_character": 0.00000025, + "output_cost_per_character": 0.0000005, "litellm_provider": "vertex_ai-text-models", "mode": "completion", "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models" }, "text-bison@001": { + "max_tokens": 1024, + "max_input_tokens": 8192, + "max_output_tokens": 1024, + "input_cost_per_character": 0.00000025, + "output_cost_per_character": 0.0000005, + "litellm_provider": "vertex_ai-text-models", + "mode": "completion", + "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models" + }, + "text-bison@002": { + "max_tokens": 1024, + "max_input_tokens": 8192, + "max_output_tokens": 1024, + "input_cost_per_character": 0.00000025, + "output_cost_per_character": 0.0000005, + "litellm_provider": "vertex_ai-text-models", + "mode": "completion", + "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models" + }, + "text-bison32k": { "max_tokens": 1024, "max_input_tokens": 8192, "max_output_tokens": 1024, "input_cost_per_token": 0.000000125, "output_cost_per_token": 0.000000125, + "input_cost_per_character": 0.00000025, + "output_cost_per_character": 0.0000005, + "litellm_provider": "vertex_ai-text-models", + "mode": "completion", + "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models" + }, + "text-bison32k@002": { + "max_tokens": 1024, + "max_input_tokens": 8192, + "max_output_tokens": 1024, + "input_cost_per_token": 0.000000125, + "output_cost_per_token": 0.000000125, + "input_cost_per_character": 0.00000025, + "output_cost_per_character": 0.0000005, "litellm_provider": "vertex_ai-text-models", "mode": "completion", "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models" diff --git a/model_prices_and_context_window.json b/model_prices_and_context_window.json index 8d9b2595f..b708e509b 100644 --- a/model_prices_and_context_window.json +++ b/model_prices_and_context_window.json @@ -1028,21 +1028,55 @@ "tool_use_system_prompt_tokens": 159 }, "text-bison": { - "max_tokens": 1024, + "max_tokens": 2048, "max_input_tokens": 8192, - "max_output_tokens": 1024, - "input_cost_per_token": 0.000000125, - "output_cost_per_token": 0.000000125, + "max_output_tokens": 2048, + "input_cost_per_character": 0.00000025, + "output_cost_per_character": 0.0000005, "litellm_provider": "vertex_ai-text-models", "mode": "completion", "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models" }, "text-bison@001": { + "max_tokens": 1024, + "max_input_tokens": 8192, + "max_output_tokens": 1024, + "input_cost_per_character": 0.00000025, + "output_cost_per_character": 0.0000005, + "litellm_provider": "vertex_ai-text-models", + "mode": "completion", + "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models" + }, + "text-bison@002": { + "max_tokens": 1024, + "max_input_tokens": 8192, + "max_output_tokens": 1024, + "input_cost_per_character": 0.00000025, + "output_cost_per_character": 0.0000005, + "litellm_provider": "vertex_ai-text-models", + "mode": "completion", + "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models" + }, + "text-bison32k": { "max_tokens": 1024, "max_input_tokens": 8192, "max_output_tokens": 1024, "input_cost_per_token": 0.000000125, "output_cost_per_token": 0.000000125, + "input_cost_per_character": 0.00000025, + "output_cost_per_character": 0.0000005, + "litellm_provider": "vertex_ai-text-models", + "mode": "completion", + "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models" + }, + "text-bison32k@002": { + "max_tokens": 1024, + "max_input_tokens": 8192, + "max_output_tokens": 1024, + "input_cost_per_token": 0.000000125, + "output_cost_per_token": 0.000000125, + "input_cost_per_character": 0.00000025, + "output_cost_per_character": 0.0000005, "litellm_provider": "vertex_ai-text-models", "mode": "completion", "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models" From 553f258758b136bb0e05592cd04cc16de50d4cd6 Mon Sep 17 00:00:00 2001 From: Ishaan Jaff Date: Wed, 26 Jun 2024 11:26:14 -0700 Subject: [PATCH 4/8] add chat-bison-32k@002 --- ...odel_prices_and_context_window_backup.json | 30 +++++++++++++++++++ model_prices_and_context_window.json | 30 +++++++++++++++++++ 2 files changed, 60 insertions(+) diff --git a/litellm/model_prices_and_context_window_backup.json b/litellm/model_prices_and_context_window_backup.json index b708e509b..84c3b9de2 100644 --- a/litellm/model_prices_and_context_window_backup.json +++ b/litellm/model_prices_and_context_window_backup.json @@ -1107,6 +1107,8 @@ "max_output_tokens": 4096, "input_cost_per_token": 0.000000125, "output_cost_per_token": 0.000000125, + "input_cost_per_character": 0.00000025, + "output_cost_per_character": 0.0000005, "litellm_provider": "vertex_ai-chat-models", "mode": "chat", "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models" @@ -1117,6 +1119,8 @@ "max_output_tokens": 4096, "input_cost_per_token": 0.000000125, "output_cost_per_token": 0.000000125, + "input_cost_per_character": 0.00000025, + "output_cost_per_character": 0.0000005, "litellm_provider": "vertex_ai-chat-models", "mode": "chat", "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models" @@ -1127,6 +1131,8 @@ "max_output_tokens": 4096, "input_cost_per_token": 0.000000125, "output_cost_per_token": 0.000000125, + "input_cost_per_character": 0.00000025, + "output_cost_per_character": 0.0000005, "litellm_provider": "vertex_ai-chat-models", "mode": "chat", "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models" @@ -1137,6 +1143,20 @@ "max_output_tokens": 8192, "input_cost_per_token": 0.000000125, "output_cost_per_token": 0.000000125, + "input_cost_per_character": 0.00000025, + "output_cost_per_character": 0.0000005, + "litellm_provider": "vertex_ai-chat-models", + "mode": "chat", + "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models" + }, + "chat-bison-32k@002": { + "max_tokens": 8192, + "max_input_tokens": 32000, + "max_output_tokens": 8192, + "input_cost_per_token": 0.000000125, + "output_cost_per_token": 0.000000125, + "input_cost_per_character": 0.00000025, + "output_cost_per_character": 0.0000005, "litellm_provider": "vertex_ai-chat-models", "mode": "chat", "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models" @@ -1147,6 +1167,8 @@ "max_output_tokens": 1024, "input_cost_per_token": 0.000000125, "output_cost_per_token": 0.000000125, + "input_cost_per_character": 0.00000025, + "output_cost_per_character": 0.0000005, "litellm_provider": "vertex_ai-code-text-models", "mode": "chat", "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models" @@ -1157,6 +1179,8 @@ "max_output_tokens": 1024, "input_cost_per_token": 0.000000125, "output_cost_per_token": 0.000000125, + "input_cost_per_character": 0.00000025, + "output_cost_per_character": 0.0000005, "litellm_provider": "vertex_ai-code-text-models", "mode": "completion", "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models" @@ -1197,6 +1221,8 @@ "max_output_tokens": 1024, "input_cost_per_token": 0.000000125, "output_cost_per_token": 0.000000125, + "input_cost_per_character": 0.00000025, + "output_cost_per_character": 0.0000005, "litellm_provider": "vertex_ai-code-chat-models", "mode": "chat", "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models" @@ -1207,6 +1233,8 @@ "max_output_tokens": 1024, "input_cost_per_token": 0.000000125, "output_cost_per_token": 0.000000125, + "input_cost_per_character": 0.00000025, + "output_cost_per_character": 0.0000005, "litellm_provider": "vertex_ai-code-chat-models", "mode": "chat", "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models" @@ -1217,6 +1245,8 @@ "max_output_tokens": 8192, "input_cost_per_token": 0.000000125, "output_cost_per_token": 0.000000125, + "input_cost_per_character": 0.00000025, + "output_cost_per_character": 0.0000005, "litellm_provider": "vertex_ai-code-chat-models", "mode": "chat", "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models" diff --git a/model_prices_and_context_window.json b/model_prices_and_context_window.json index b708e509b..84c3b9de2 100644 --- a/model_prices_and_context_window.json +++ b/model_prices_and_context_window.json @@ -1107,6 +1107,8 @@ "max_output_tokens": 4096, "input_cost_per_token": 0.000000125, "output_cost_per_token": 0.000000125, + "input_cost_per_character": 0.00000025, + "output_cost_per_character": 0.0000005, "litellm_provider": "vertex_ai-chat-models", "mode": "chat", "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models" @@ -1117,6 +1119,8 @@ "max_output_tokens": 4096, "input_cost_per_token": 0.000000125, "output_cost_per_token": 0.000000125, + "input_cost_per_character": 0.00000025, + "output_cost_per_character": 0.0000005, "litellm_provider": "vertex_ai-chat-models", "mode": "chat", "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models" @@ -1127,6 +1131,8 @@ "max_output_tokens": 4096, "input_cost_per_token": 0.000000125, "output_cost_per_token": 0.000000125, + "input_cost_per_character": 0.00000025, + "output_cost_per_character": 0.0000005, "litellm_provider": "vertex_ai-chat-models", "mode": "chat", "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models" @@ -1137,6 +1143,20 @@ "max_output_tokens": 8192, "input_cost_per_token": 0.000000125, "output_cost_per_token": 0.000000125, + "input_cost_per_character": 0.00000025, + "output_cost_per_character": 0.0000005, + "litellm_provider": "vertex_ai-chat-models", + "mode": "chat", + "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models" + }, + "chat-bison-32k@002": { + "max_tokens": 8192, + "max_input_tokens": 32000, + "max_output_tokens": 8192, + "input_cost_per_token": 0.000000125, + "output_cost_per_token": 0.000000125, + "input_cost_per_character": 0.00000025, + "output_cost_per_character": 0.0000005, "litellm_provider": "vertex_ai-chat-models", "mode": "chat", "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models" @@ -1147,6 +1167,8 @@ "max_output_tokens": 1024, "input_cost_per_token": 0.000000125, "output_cost_per_token": 0.000000125, + "input_cost_per_character": 0.00000025, + "output_cost_per_character": 0.0000005, "litellm_provider": "vertex_ai-code-text-models", "mode": "chat", "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models" @@ -1157,6 +1179,8 @@ "max_output_tokens": 1024, "input_cost_per_token": 0.000000125, "output_cost_per_token": 0.000000125, + "input_cost_per_character": 0.00000025, + "output_cost_per_character": 0.0000005, "litellm_provider": "vertex_ai-code-text-models", "mode": "completion", "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models" @@ -1197,6 +1221,8 @@ "max_output_tokens": 1024, "input_cost_per_token": 0.000000125, "output_cost_per_token": 0.000000125, + "input_cost_per_character": 0.00000025, + "output_cost_per_character": 0.0000005, "litellm_provider": "vertex_ai-code-chat-models", "mode": "chat", "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models" @@ -1207,6 +1233,8 @@ "max_output_tokens": 1024, "input_cost_per_token": 0.000000125, "output_cost_per_token": 0.000000125, + "input_cost_per_character": 0.00000025, + "output_cost_per_character": 0.0000005, "litellm_provider": "vertex_ai-code-chat-models", "mode": "chat", "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models" @@ -1217,6 +1245,8 @@ "max_output_tokens": 8192, "input_cost_per_token": 0.000000125, "output_cost_per_token": 0.000000125, + "input_cost_per_character": 0.00000025, + "output_cost_per_character": 0.0000005, "litellm_provider": "vertex_ai-code-chat-models", "mode": "chat", "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models" From 08d3d670c1648e38ff512a16cc4936a9184abe14 Mon Sep 17 00:00:00 2001 From: Ishaan Jaff Date: Wed, 26 Jun 2024 11:28:10 -0700 Subject: [PATCH 5/8] add code-bison --- ...odel_prices_and_context_window_backup.json | 36 +++++++++++++++++++ model_prices_and_context_window.json | 36 +++++++++++++++++++ 2 files changed, 72 insertions(+) diff --git a/litellm/model_prices_and_context_window_backup.json b/litellm/model_prices_and_context_window_backup.json index 84c3b9de2..f51182d8f 100644 --- a/litellm/model_prices_and_context_window_backup.json +++ b/litellm/model_prices_and_context_window_backup.json @@ -1185,6 +1185,42 @@ "mode": "completion", "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models" }, + "code-bison@002": { + "max_tokens": 1024, + "max_input_tokens": 6144, + "max_output_tokens": 1024, + "input_cost_per_token": 0.000000125, + "output_cost_per_token": 0.000000125, + "input_cost_per_character": 0.00000025, + "output_cost_per_character": 0.0000005, + "litellm_provider": "vertex_ai-code-text-models", + "mode": "completion", + "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models" + }, + "code-bison32k": { + "max_tokens": 1024, + "max_input_tokens": 6144, + "max_output_tokens": 1024, + "input_cost_per_token": 0.000000125, + "output_cost_per_token": 0.000000125, + "input_cost_per_character": 0.00000025, + "output_cost_per_character": 0.0000005, + "litellm_provider": "vertex_ai-code-text-models", + "mode": "completion", + "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models" + }, + "code-bison-32k@002": { + "max_tokens": 1024, + "max_input_tokens": 6144, + "max_output_tokens": 1024, + "input_cost_per_token": 0.000000125, + "output_cost_per_token": 0.000000125, + "input_cost_per_character": 0.00000025, + "output_cost_per_character": 0.0000005, + "litellm_provider": "vertex_ai-code-text-models", + "mode": "completion", + "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models" + }, "code-gecko@001": { "max_tokens": 64, "max_input_tokens": 2048, diff --git a/model_prices_and_context_window.json b/model_prices_and_context_window.json index 84c3b9de2..f51182d8f 100644 --- a/model_prices_and_context_window.json +++ b/model_prices_and_context_window.json @@ -1185,6 +1185,42 @@ "mode": "completion", "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models" }, + "code-bison@002": { + "max_tokens": 1024, + "max_input_tokens": 6144, + "max_output_tokens": 1024, + "input_cost_per_token": 0.000000125, + "output_cost_per_token": 0.000000125, + "input_cost_per_character": 0.00000025, + "output_cost_per_character": 0.0000005, + "litellm_provider": "vertex_ai-code-text-models", + "mode": "completion", + "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models" + }, + "code-bison32k": { + "max_tokens": 1024, + "max_input_tokens": 6144, + "max_output_tokens": 1024, + "input_cost_per_token": 0.000000125, + "output_cost_per_token": 0.000000125, + "input_cost_per_character": 0.00000025, + "output_cost_per_character": 0.0000005, + "litellm_provider": "vertex_ai-code-text-models", + "mode": "completion", + "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models" + }, + "code-bison-32k@002": { + "max_tokens": 1024, + "max_input_tokens": 6144, + "max_output_tokens": 1024, + "input_cost_per_token": 0.000000125, + "output_cost_per_token": 0.000000125, + "input_cost_per_character": 0.00000025, + "output_cost_per_character": 0.0000005, + "litellm_provider": "vertex_ai-code-text-models", + "mode": "completion", + "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models" + }, "code-gecko@001": { "max_tokens": 64, "max_input_tokens": 2048, From b13bac82049a0d53b8b5a7c24e95455341e1f4dc Mon Sep 17 00:00:00 2001 From: Ishaan Jaff Date: Wed, 26 Jun 2024 11:34:48 -0700 Subject: [PATCH 6/8] add code-gecko-latest --- litellm/model_prices_and_context_window_backup.json | 10 ++++++++++ model_prices_and_context_window.json | 10 ++++++++++ 2 files changed, 20 insertions(+) diff --git a/litellm/model_prices_and_context_window_backup.json b/litellm/model_prices_and_context_window_backup.json index f51182d8f..f7a23e8e1 100644 --- a/litellm/model_prices_and_context_window_backup.json +++ b/litellm/model_prices_and_context_window_backup.json @@ -1251,6 +1251,16 @@ "mode": "completion", "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models" }, + "code-gecko-latest": { + "max_tokens": 64, + "max_input_tokens": 2048, + "max_output_tokens": 64, + "input_cost_per_token": 0.000000125, + "output_cost_per_token": 0.000000125, + "litellm_provider": "vertex_ai-code-text-models", + "mode": "completion", + "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models" + }, "codechat-bison": { "max_tokens": 1024, "max_input_tokens": 6144, diff --git a/model_prices_and_context_window.json b/model_prices_and_context_window.json index f51182d8f..f7a23e8e1 100644 --- a/model_prices_and_context_window.json +++ b/model_prices_and_context_window.json @@ -1251,6 +1251,16 @@ "mode": "completion", "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models" }, + "code-gecko-latest": { + "max_tokens": 64, + "max_input_tokens": 2048, + "max_output_tokens": 64, + "input_cost_per_token": 0.000000125, + "output_cost_per_token": 0.000000125, + "litellm_provider": "vertex_ai-code-text-models", + "mode": "completion", + "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models" + }, "codechat-bison": { "max_tokens": 1024, "max_input_tokens": 6144, From 5dacaa2c4c644f574d4d270594756cd06eb8fad0 Mon Sep 17 00:00:00 2001 From: Ishaan Jaff Date: Wed, 26 Jun 2024 11:37:39 -0700 Subject: [PATCH 7/8] add codechat-bison@latest --- ...odel_prices_and_context_window_backup.json | 36 +++++++++++++++++++ model_prices_and_context_window.json | 36 +++++++++++++++++++ 2 files changed, 72 insertions(+) diff --git a/litellm/model_prices_and_context_window_backup.json b/litellm/model_prices_and_context_window_backup.json index f7a23e8e1..e665e79f3 100644 --- a/litellm/model_prices_and_context_window_backup.json +++ b/litellm/model_prices_and_context_window_backup.json @@ -1261,6 +1261,18 @@ "mode": "completion", "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models" }, + "codechat-bison@latest": { + "max_tokens": 1024, + "max_input_tokens": 6144, + "max_output_tokens": 1024, + "input_cost_per_token": 0.000000125, + "output_cost_per_token": 0.000000125, + "input_cost_per_character": 0.00000025, + "output_cost_per_character": 0.0000005, + "litellm_provider": "vertex_ai-code-chat-models", + "mode": "chat", + "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models" + }, "codechat-bison": { "max_tokens": 1024, "max_input_tokens": 6144, @@ -1285,6 +1297,18 @@ "mode": "chat", "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models" }, + "codechat-bison@002": { + "max_tokens": 1024, + "max_input_tokens": 6144, + "max_output_tokens": 1024, + "input_cost_per_token": 0.000000125, + "output_cost_per_token": 0.000000125, + "input_cost_per_character": 0.00000025, + "output_cost_per_character": 0.0000005, + "litellm_provider": "vertex_ai-code-chat-models", + "mode": "chat", + "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models" + }, "codechat-bison-32k": { "max_tokens": 8192, "max_input_tokens": 32000, @@ -1297,6 +1321,18 @@ "mode": "chat", "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models" }, + "codechat-bison-32k@002": { + "max_tokens": 8192, + "max_input_tokens": 32000, + "max_output_tokens": 8192, + "input_cost_per_token": 0.000000125, + "output_cost_per_token": 0.000000125, + "input_cost_per_character": 0.00000025, + "output_cost_per_character": 0.0000005, + "litellm_provider": "vertex_ai-code-chat-models", + "mode": "chat", + "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models" + }, "gemini-pro": { "max_tokens": 8192, "max_input_tokens": 32760, diff --git a/model_prices_and_context_window.json b/model_prices_and_context_window.json index f7a23e8e1..e665e79f3 100644 --- a/model_prices_and_context_window.json +++ b/model_prices_and_context_window.json @@ -1261,6 +1261,18 @@ "mode": "completion", "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models" }, + "codechat-bison@latest": { + "max_tokens": 1024, + "max_input_tokens": 6144, + "max_output_tokens": 1024, + "input_cost_per_token": 0.000000125, + "output_cost_per_token": 0.000000125, + "input_cost_per_character": 0.00000025, + "output_cost_per_character": 0.0000005, + "litellm_provider": "vertex_ai-code-chat-models", + "mode": "chat", + "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models" + }, "codechat-bison": { "max_tokens": 1024, "max_input_tokens": 6144, @@ -1285,6 +1297,18 @@ "mode": "chat", "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models" }, + "codechat-bison@002": { + "max_tokens": 1024, + "max_input_tokens": 6144, + "max_output_tokens": 1024, + "input_cost_per_token": 0.000000125, + "output_cost_per_token": 0.000000125, + "input_cost_per_character": 0.00000025, + "output_cost_per_character": 0.0000005, + "litellm_provider": "vertex_ai-code-chat-models", + "mode": "chat", + "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models" + }, "codechat-bison-32k": { "max_tokens": 8192, "max_input_tokens": 32000, @@ -1297,6 +1321,18 @@ "mode": "chat", "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models" }, + "codechat-bison-32k@002": { + "max_tokens": 8192, + "max_input_tokens": 32000, + "max_output_tokens": 8192, + "input_cost_per_token": 0.000000125, + "output_cost_per_token": 0.000000125, + "input_cost_per_character": 0.00000025, + "output_cost_per_character": 0.0000005, + "litellm_provider": "vertex_ai-code-chat-models", + "mode": "chat", + "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models" + }, "gemini-pro": { "max_tokens": 8192, "max_input_tokens": 32760, From 7264113c2587cb0c5115f9722d9c51fd36f9654a Mon Sep 17 00:00:00 2001 From: Ishaan Jaff Date: Wed, 26 Jun 2024 18:08:54 -0700 Subject: [PATCH 8/8] vertex testing --- .../tests/test_amazing_vertex_completion.py | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) diff --git a/litellm/tests/test_amazing_vertex_completion.py b/litellm/tests/test_amazing_vertex_completion.py index c9e5501a8..901d68ef3 100644 --- a/litellm/tests/test_amazing_vertex_completion.py +++ b/litellm/tests/test_amazing_vertex_completion.py @@ -329,11 +329,14 @@ def test_vertex_ai(): "code-gecko@001", "code-gecko@002", "code-gecko@latest", + "codechat-bison@latest", "code-bison@001", "text-bison@001", "gemini-1.5-pro", "gemini-1.5-pro-preview-0215", - ]: + ] or ( + "gecko" in model or "32k" in model or "ultra" in model or "002" in model + ): # our account does not have access to this model continue print("making request", model) @@ -381,12 +384,15 @@ def test_vertex_ai_stream(): "code-gecko@001", "code-gecko@002", "code-gecko@latest", + "codechat-bison@latest", "code-bison@001", "text-bison@001", "gemini-1.5-pro", "gemini-1.5-pro-preview-0215", - ]: - # ouraccount does not have access to this model + ] or ( + "gecko" in model or "32k" in model or "ultra" in model or "002" in model + ): + # our account does not have access to this model continue print("making request", model) response = completion( @@ -433,11 +439,12 @@ async def test_async_vertexai_response(): "code-gecko@001", "code-gecko@002", "code-gecko@latest", + "codechat-bison@latest", "code-bison@001", "text-bison@001", "gemini-1.5-pro", "gemini-1.5-pro-preview-0215", - ]: + ] or ("gecko" in model or "32k" in model or "ultra" in model or "002" in model): # our account does not have access to this model continue try: @@ -479,11 +486,12 @@ async def test_async_vertexai_streaming_response(): "code-gecko@001", "code-gecko@002", "code-gecko@latest", + "codechat-bison@latest", "code-bison@001", "text-bison@001", "gemini-1.5-pro", "gemini-1.5-pro-preview-0215", - ]: + ] or ("gecko" in model or "32k" in model or "ultra" in model or "002" in model): # our account does not have access to this model continue try: