diff --git a/litellm/model_prices_and_context_window_backup.json b/litellm/model_prices_and_context_window_backup.json index 6b15084a9..415041dcb 100644 --- a/litellm/model_prices_and_context_window_backup.json +++ b/litellm/model_prices_and_context_window_backup.json @@ -1068,21 +1068,55 @@ "tool_use_system_prompt_tokens": 159 }, "text-bison": { - "max_tokens": 1024, + "max_tokens": 2048, "max_input_tokens": 8192, - "max_output_tokens": 1024, - "input_cost_per_token": 0.000000125, - "output_cost_per_token": 0.000000125, + "max_output_tokens": 2048, + "input_cost_per_character": 0.00000025, + "output_cost_per_character": 0.0000005, "litellm_provider": "vertex_ai-text-models", "mode": "completion", "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models" }, "text-bison@001": { + "max_tokens": 1024, + "max_input_tokens": 8192, + "max_output_tokens": 1024, + "input_cost_per_character": 0.00000025, + "output_cost_per_character": 0.0000005, + "litellm_provider": "vertex_ai-text-models", + "mode": "completion", + "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models" + }, + "text-bison@002": { + "max_tokens": 1024, + "max_input_tokens": 8192, + "max_output_tokens": 1024, + "input_cost_per_character": 0.00000025, + "output_cost_per_character": 0.0000005, + "litellm_provider": "vertex_ai-text-models", + "mode": "completion", + "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models" + }, + "text-bison32k": { "max_tokens": 1024, "max_input_tokens": 8192, "max_output_tokens": 1024, "input_cost_per_token": 0.000000125, "output_cost_per_token": 0.000000125, + "input_cost_per_character": 0.00000025, + "output_cost_per_character": 0.0000005, + "litellm_provider": "vertex_ai-text-models", + "mode": "completion", + "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models" + }, + "text-bison32k@002": { + "max_tokens": 1024, + "max_input_tokens": 8192, + "max_output_tokens": 1024, + "input_cost_per_token": 0.000000125, + "output_cost_per_token": 0.000000125, + "input_cost_per_character": 0.00000025, + "output_cost_per_character": 0.0000005, "litellm_provider": "vertex_ai-text-models", "mode": "completion", "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models" @@ -1113,6 +1147,8 @@ "max_output_tokens": 4096, "input_cost_per_token": 0.000000125, "output_cost_per_token": 0.000000125, + "input_cost_per_character": 0.00000025, + "output_cost_per_character": 0.0000005, "litellm_provider": "vertex_ai-chat-models", "mode": "chat", "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models" @@ -1123,6 +1159,8 @@ "max_output_tokens": 4096, "input_cost_per_token": 0.000000125, "output_cost_per_token": 0.000000125, + "input_cost_per_character": 0.00000025, + "output_cost_per_character": 0.0000005, "litellm_provider": "vertex_ai-chat-models", "mode": "chat", "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models" @@ -1133,6 +1171,8 @@ "max_output_tokens": 4096, "input_cost_per_token": 0.000000125, "output_cost_per_token": 0.000000125, + "input_cost_per_character": 0.00000025, + "output_cost_per_character": 0.0000005, "litellm_provider": "vertex_ai-chat-models", "mode": "chat", "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models" @@ -1143,6 +1183,20 @@ "max_output_tokens": 8192, "input_cost_per_token": 0.000000125, "output_cost_per_token": 0.000000125, + "input_cost_per_character": 0.00000025, + "output_cost_per_character": 0.0000005, + "litellm_provider": "vertex_ai-chat-models", + "mode": "chat", + "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models" + }, + "chat-bison-32k@002": { + "max_tokens": 8192, + "max_input_tokens": 32000, + "max_output_tokens": 8192, + "input_cost_per_token": 0.000000125, + "output_cost_per_token": 0.000000125, + "input_cost_per_character": 0.00000025, + "output_cost_per_character": 0.0000005, "litellm_provider": "vertex_ai-chat-models", "mode": "chat", "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models" @@ -1153,6 +1207,8 @@ "max_output_tokens": 1024, "input_cost_per_token": 0.000000125, "output_cost_per_token": 0.000000125, + "input_cost_per_character": 0.00000025, + "output_cost_per_character": 0.0000005, "litellm_provider": "vertex_ai-code-text-models", "mode": "chat", "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models" @@ -1163,6 +1219,44 @@ "max_output_tokens": 1024, "input_cost_per_token": 0.000000125, "output_cost_per_token": 0.000000125, + "input_cost_per_character": 0.00000025, + "output_cost_per_character": 0.0000005, + "litellm_provider": "vertex_ai-code-text-models", + "mode": "completion", + "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models" + }, + "code-bison@002": { + "max_tokens": 1024, + "max_input_tokens": 6144, + "max_output_tokens": 1024, + "input_cost_per_token": 0.000000125, + "output_cost_per_token": 0.000000125, + "input_cost_per_character": 0.00000025, + "output_cost_per_character": 0.0000005, + "litellm_provider": "vertex_ai-code-text-models", + "mode": "completion", + "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models" + }, + "code-bison32k": { + "max_tokens": 1024, + "max_input_tokens": 6144, + "max_output_tokens": 1024, + "input_cost_per_token": 0.000000125, + "output_cost_per_token": 0.000000125, + "input_cost_per_character": 0.00000025, + "output_cost_per_character": 0.0000005, + "litellm_provider": "vertex_ai-code-text-models", + "mode": "completion", + "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models" + }, + "code-bison-32k@002": { + "max_tokens": 1024, + "max_input_tokens": 6144, + "max_output_tokens": 1024, + "input_cost_per_token": 0.000000125, + "output_cost_per_token": 0.000000125, + "input_cost_per_character": 0.00000025, + "output_cost_per_character": 0.0000005, "litellm_provider": "vertex_ai-code-text-models", "mode": "completion", "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models" @@ -1197,12 +1291,36 @@ "mode": "completion", "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models" }, + "code-gecko-latest": { + "max_tokens": 64, + "max_input_tokens": 2048, + "max_output_tokens": 64, + "input_cost_per_token": 0.000000125, + "output_cost_per_token": 0.000000125, + "litellm_provider": "vertex_ai-code-text-models", + "mode": "completion", + "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models" + }, + "codechat-bison@latest": { + "max_tokens": 1024, + "max_input_tokens": 6144, + "max_output_tokens": 1024, + "input_cost_per_token": 0.000000125, + "output_cost_per_token": 0.000000125, + "input_cost_per_character": 0.00000025, + "output_cost_per_character": 0.0000005, + "litellm_provider": "vertex_ai-code-chat-models", + "mode": "chat", + "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models" + }, "codechat-bison": { "max_tokens": 1024, "max_input_tokens": 6144, "max_output_tokens": 1024, "input_cost_per_token": 0.000000125, "output_cost_per_token": 0.000000125, + "input_cost_per_character": 0.00000025, + "output_cost_per_character": 0.0000005, "litellm_provider": "vertex_ai-code-chat-models", "mode": "chat", "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models" @@ -1213,6 +1331,20 @@ "max_output_tokens": 1024, "input_cost_per_token": 0.000000125, "output_cost_per_token": 0.000000125, + "input_cost_per_character": 0.00000025, + "output_cost_per_character": 0.0000005, + "litellm_provider": "vertex_ai-code-chat-models", + "mode": "chat", + "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models" + }, + "codechat-bison@002": { + "max_tokens": 1024, + "max_input_tokens": 6144, + "max_output_tokens": 1024, + "input_cost_per_token": 0.000000125, + "output_cost_per_token": 0.000000125, + "input_cost_per_character": 0.00000025, + "output_cost_per_character": 0.0000005, "litellm_provider": "vertex_ai-code-chat-models", "mode": "chat", "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models" @@ -1223,6 +1355,20 @@ "max_output_tokens": 8192, "input_cost_per_token": 0.000000125, "output_cost_per_token": 0.000000125, + "input_cost_per_character": 0.00000025, + "output_cost_per_character": 0.0000005, + "litellm_provider": "vertex_ai-code-chat-models", + "mode": "chat", + "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models" + }, + "codechat-bison-32k@002": { + "max_tokens": 8192, + "max_input_tokens": 32000, + "max_output_tokens": 8192, + "input_cost_per_token": 0.000000125, + "output_cost_per_token": 0.000000125, + "input_cost_per_character": 0.00000025, + "output_cost_per_character": 0.0000005, "litellm_provider": "vertex_ai-code-chat-models", "mode": "chat", "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models" @@ -1272,6 +1418,36 @@ "supports_function_calling": true, "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models" }, + "gemini-1.0-ultra": { + "max_tokens": 8192, + "max_input_tokens": 8192, + "max_output_tokens": 2048, + "input_cost_per_image": 0.0025, + "input_cost_per_video_per_second": 0.002, + "input_cost_per_token": 0.0000005, + "input_cost_per_character": 0.000000125, + "output_cost_per_token": 0.0000015, + "output_cost_per_character": 0.000000375, + "litellm_provider": "vertex_ai-language-models", + "mode": "chat", + "supports_function_calling": true, + "source": "As of Jun, 2024. There is no available doc on vertex ai pricing gemini-1.0-ultra-001. Using gemini-1.0-pro pricing. Got max_tokens info here: https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models" + }, + "gemini-1.0-ultra-001": { + "max_tokens": 8192, + "max_input_tokens": 8192, + "max_output_tokens": 2048, + "input_cost_per_image": 0.0025, + "input_cost_per_video_per_second": 0.002, + "input_cost_per_token": 0.0000005, + "input_cost_per_character": 0.000000125, + "output_cost_per_token": 0.0000015, + "output_cost_per_character": 0.000000375, + "litellm_provider": "vertex_ai-language-models", + "mode": "chat", + "supports_function_calling": true, + "source": "As of Jun, 2024. There is no available doc on vertex ai pricing gemini-1.0-ultra-001. Using gemini-1.0-pro pricing. Got max_tokens info here: https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models" + }, "gemini-1.0-pro-002": { "max_tokens": 8192, "max_input_tokens": 32760, diff --git a/litellm/tests/test_amazing_vertex_completion.py b/litellm/tests/test_amazing_vertex_completion.py index c9e5501a8..901d68ef3 100644 --- a/litellm/tests/test_amazing_vertex_completion.py +++ b/litellm/tests/test_amazing_vertex_completion.py @@ -329,11 +329,14 @@ def test_vertex_ai(): "code-gecko@001", "code-gecko@002", "code-gecko@latest", + "codechat-bison@latest", "code-bison@001", "text-bison@001", "gemini-1.5-pro", "gemini-1.5-pro-preview-0215", - ]: + ] or ( + "gecko" in model or "32k" in model or "ultra" in model or "002" in model + ): # our account does not have access to this model continue print("making request", model) @@ -381,12 +384,15 @@ def test_vertex_ai_stream(): "code-gecko@001", "code-gecko@002", "code-gecko@latest", + "codechat-bison@latest", "code-bison@001", "text-bison@001", "gemini-1.5-pro", "gemini-1.5-pro-preview-0215", - ]: - # ouraccount does not have access to this model + ] or ( + "gecko" in model or "32k" in model or "ultra" in model or "002" in model + ): + # our account does not have access to this model continue print("making request", model) response = completion( @@ -433,11 +439,12 @@ async def test_async_vertexai_response(): "code-gecko@001", "code-gecko@002", "code-gecko@latest", + "codechat-bison@latest", "code-bison@001", "text-bison@001", "gemini-1.5-pro", "gemini-1.5-pro-preview-0215", - ]: + ] or ("gecko" in model or "32k" in model or "ultra" in model or "002" in model): # our account does not have access to this model continue try: @@ -479,11 +486,12 @@ async def test_async_vertexai_streaming_response(): "code-gecko@001", "code-gecko@002", "code-gecko@latest", + "codechat-bison@latest", "code-bison@001", "text-bison@001", "gemini-1.5-pro", "gemini-1.5-pro-preview-0215", - ]: + ] or ("gecko" in model or "32k" in model or "ultra" in model or "002" in model): # our account does not have access to this model continue try: diff --git a/model_prices_and_context_window.json b/model_prices_and_context_window.json index 6b15084a9..415041dcb 100644 --- a/model_prices_and_context_window.json +++ b/model_prices_and_context_window.json @@ -1068,21 +1068,55 @@ "tool_use_system_prompt_tokens": 159 }, "text-bison": { - "max_tokens": 1024, + "max_tokens": 2048, "max_input_tokens": 8192, - "max_output_tokens": 1024, - "input_cost_per_token": 0.000000125, - "output_cost_per_token": 0.000000125, + "max_output_tokens": 2048, + "input_cost_per_character": 0.00000025, + "output_cost_per_character": 0.0000005, "litellm_provider": "vertex_ai-text-models", "mode": "completion", "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models" }, "text-bison@001": { + "max_tokens": 1024, + "max_input_tokens": 8192, + "max_output_tokens": 1024, + "input_cost_per_character": 0.00000025, + "output_cost_per_character": 0.0000005, + "litellm_provider": "vertex_ai-text-models", + "mode": "completion", + "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models" + }, + "text-bison@002": { + "max_tokens": 1024, + "max_input_tokens": 8192, + "max_output_tokens": 1024, + "input_cost_per_character": 0.00000025, + "output_cost_per_character": 0.0000005, + "litellm_provider": "vertex_ai-text-models", + "mode": "completion", + "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models" + }, + "text-bison32k": { "max_tokens": 1024, "max_input_tokens": 8192, "max_output_tokens": 1024, "input_cost_per_token": 0.000000125, "output_cost_per_token": 0.000000125, + "input_cost_per_character": 0.00000025, + "output_cost_per_character": 0.0000005, + "litellm_provider": "vertex_ai-text-models", + "mode": "completion", + "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models" + }, + "text-bison32k@002": { + "max_tokens": 1024, + "max_input_tokens": 8192, + "max_output_tokens": 1024, + "input_cost_per_token": 0.000000125, + "output_cost_per_token": 0.000000125, + "input_cost_per_character": 0.00000025, + "output_cost_per_character": 0.0000005, "litellm_provider": "vertex_ai-text-models", "mode": "completion", "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models" @@ -1113,6 +1147,8 @@ "max_output_tokens": 4096, "input_cost_per_token": 0.000000125, "output_cost_per_token": 0.000000125, + "input_cost_per_character": 0.00000025, + "output_cost_per_character": 0.0000005, "litellm_provider": "vertex_ai-chat-models", "mode": "chat", "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models" @@ -1123,6 +1159,8 @@ "max_output_tokens": 4096, "input_cost_per_token": 0.000000125, "output_cost_per_token": 0.000000125, + "input_cost_per_character": 0.00000025, + "output_cost_per_character": 0.0000005, "litellm_provider": "vertex_ai-chat-models", "mode": "chat", "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models" @@ -1133,6 +1171,8 @@ "max_output_tokens": 4096, "input_cost_per_token": 0.000000125, "output_cost_per_token": 0.000000125, + "input_cost_per_character": 0.00000025, + "output_cost_per_character": 0.0000005, "litellm_provider": "vertex_ai-chat-models", "mode": "chat", "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models" @@ -1143,6 +1183,20 @@ "max_output_tokens": 8192, "input_cost_per_token": 0.000000125, "output_cost_per_token": 0.000000125, + "input_cost_per_character": 0.00000025, + "output_cost_per_character": 0.0000005, + "litellm_provider": "vertex_ai-chat-models", + "mode": "chat", + "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models" + }, + "chat-bison-32k@002": { + "max_tokens": 8192, + "max_input_tokens": 32000, + "max_output_tokens": 8192, + "input_cost_per_token": 0.000000125, + "output_cost_per_token": 0.000000125, + "input_cost_per_character": 0.00000025, + "output_cost_per_character": 0.0000005, "litellm_provider": "vertex_ai-chat-models", "mode": "chat", "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models" @@ -1153,6 +1207,8 @@ "max_output_tokens": 1024, "input_cost_per_token": 0.000000125, "output_cost_per_token": 0.000000125, + "input_cost_per_character": 0.00000025, + "output_cost_per_character": 0.0000005, "litellm_provider": "vertex_ai-code-text-models", "mode": "chat", "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models" @@ -1163,6 +1219,44 @@ "max_output_tokens": 1024, "input_cost_per_token": 0.000000125, "output_cost_per_token": 0.000000125, + "input_cost_per_character": 0.00000025, + "output_cost_per_character": 0.0000005, + "litellm_provider": "vertex_ai-code-text-models", + "mode": "completion", + "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models" + }, + "code-bison@002": { + "max_tokens": 1024, + "max_input_tokens": 6144, + "max_output_tokens": 1024, + "input_cost_per_token": 0.000000125, + "output_cost_per_token": 0.000000125, + "input_cost_per_character": 0.00000025, + "output_cost_per_character": 0.0000005, + "litellm_provider": "vertex_ai-code-text-models", + "mode": "completion", + "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models" + }, + "code-bison32k": { + "max_tokens": 1024, + "max_input_tokens": 6144, + "max_output_tokens": 1024, + "input_cost_per_token": 0.000000125, + "output_cost_per_token": 0.000000125, + "input_cost_per_character": 0.00000025, + "output_cost_per_character": 0.0000005, + "litellm_provider": "vertex_ai-code-text-models", + "mode": "completion", + "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models" + }, + "code-bison-32k@002": { + "max_tokens": 1024, + "max_input_tokens": 6144, + "max_output_tokens": 1024, + "input_cost_per_token": 0.000000125, + "output_cost_per_token": 0.000000125, + "input_cost_per_character": 0.00000025, + "output_cost_per_character": 0.0000005, "litellm_provider": "vertex_ai-code-text-models", "mode": "completion", "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models" @@ -1197,12 +1291,36 @@ "mode": "completion", "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models" }, + "code-gecko-latest": { + "max_tokens": 64, + "max_input_tokens": 2048, + "max_output_tokens": 64, + "input_cost_per_token": 0.000000125, + "output_cost_per_token": 0.000000125, + "litellm_provider": "vertex_ai-code-text-models", + "mode": "completion", + "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models" + }, + "codechat-bison@latest": { + "max_tokens": 1024, + "max_input_tokens": 6144, + "max_output_tokens": 1024, + "input_cost_per_token": 0.000000125, + "output_cost_per_token": 0.000000125, + "input_cost_per_character": 0.00000025, + "output_cost_per_character": 0.0000005, + "litellm_provider": "vertex_ai-code-chat-models", + "mode": "chat", + "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models" + }, "codechat-bison": { "max_tokens": 1024, "max_input_tokens": 6144, "max_output_tokens": 1024, "input_cost_per_token": 0.000000125, "output_cost_per_token": 0.000000125, + "input_cost_per_character": 0.00000025, + "output_cost_per_character": 0.0000005, "litellm_provider": "vertex_ai-code-chat-models", "mode": "chat", "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models" @@ -1213,6 +1331,20 @@ "max_output_tokens": 1024, "input_cost_per_token": 0.000000125, "output_cost_per_token": 0.000000125, + "input_cost_per_character": 0.00000025, + "output_cost_per_character": 0.0000005, + "litellm_provider": "vertex_ai-code-chat-models", + "mode": "chat", + "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models" + }, + "codechat-bison@002": { + "max_tokens": 1024, + "max_input_tokens": 6144, + "max_output_tokens": 1024, + "input_cost_per_token": 0.000000125, + "output_cost_per_token": 0.000000125, + "input_cost_per_character": 0.00000025, + "output_cost_per_character": 0.0000005, "litellm_provider": "vertex_ai-code-chat-models", "mode": "chat", "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models" @@ -1223,6 +1355,20 @@ "max_output_tokens": 8192, "input_cost_per_token": 0.000000125, "output_cost_per_token": 0.000000125, + "input_cost_per_character": 0.00000025, + "output_cost_per_character": 0.0000005, + "litellm_provider": "vertex_ai-code-chat-models", + "mode": "chat", + "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models" + }, + "codechat-bison-32k@002": { + "max_tokens": 8192, + "max_input_tokens": 32000, + "max_output_tokens": 8192, + "input_cost_per_token": 0.000000125, + "output_cost_per_token": 0.000000125, + "input_cost_per_character": 0.00000025, + "output_cost_per_character": 0.0000005, "litellm_provider": "vertex_ai-code-chat-models", "mode": "chat", "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models" @@ -1272,6 +1418,36 @@ "supports_function_calling": true, "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models" }, + "gemini-1.0-ultra": { + "max_tokens": 8192, + "max_input_tokens": 8192, + "max_output_tokens": 2048, + "input_cost_per_image": 0.0025, + "input_cost_per_video_per_second": 0.002, + "input_cost_per_token": 0.0000005, + "input_cost_per_character": 0.000000125, + "output_cost_per_token": 0.0000015, + "output_cost_per_character": 0.000000375, + "litellm_provider": "vertex_ai-language-models", + "mode": "chat", + "supports_function_calling": true, + "source": "As of Jun, 2024. There is no available doc on vertex ai pricing gemini-1.0-ultra-001. Using gemini-1.0-pro pricing. Got max_tokens info here: https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models" + }, + "gemini-1.0-ultra-001": { + "max_tokens": 8192, + "max_input_tokens": 8192, + "max_output_tokens": 2048, + "input_cost_per_image": 0.0025, + "input_cost_per_video_per_second": 0.002, + "input_cost_per_token": 0.0000005, + "input_cost_per_character": 0.000000125, + "output_cost_per_token": 0.0000015, + "output_cost_per_character": 0.000000375, + "litellm_provider": "vertex_ai-language-models", + "mode": "chat", + "supports_function_calling": true, + "source": "As of Jun, 2024. There is no available doc on vertex ai pricing gemini-1.0-ultra-001. Using gemini-1.0-pro pricing. Got max_tokens info here: https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models" + }, "gemini-1.0-pro-002": { "max_tokens": 8192, "max_input_tokens": 32760,