forked from phoenix/litellm-mirror
Merge pull request #4421 from BerriAI/litellm_add_vertex_models
[Feat] Add all Vertex AI Models
This commit is contained in:
commit
3dc578555c
3 changed files with 373 additions and 13 deletions
|
@ -1068,21 +1068,55 @@
|
|||
"tool_use_system_prompt_tokens": 159
|
||||
},
|
||||
"text-bison": {
|
||||
"max_tokens": 1024,
|
||||
"max_tokens": 2048,
|
||||
"max_input_tokens": 8192,
|
||||
"max_output_tokens": 1024,
|
||||
"input_cost_per_token": 0.000000125,
|
||||
"output_cost_per_token": 0.000000125,
|
||||
"max_output_tokens": 2048,
|
||||
"input_cost_per_character": 0.00000025,
|
||||
"output_cost_per_character": 0.0000005,
|
||||
"litellm_provider": "vertex_ai-text-models",
|
||||
"mode": "completion",
|
||||
"source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models"
|
||||
},
|
||||
"text-bison@001": {
|
||||
"max_tokens": 1024,
|
||||
"max_input_tokens": 8192,
|
||||
"max_output_tokens": 1024,
|
||||
"input_cost_per_character": 0.00000025,
|
||||
"output_cost_per_character": 0.0000005,
|
||||
"litellm_provider": "vertex_ai-text-models",
|
||||
"mode": "completion",
|
||||
"source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models"
|
||||
},
|
||||
"text-bison@002": {
|
||||
"max_tokens": 1024,
|
||||
"max_input_tokens": 8192,
|
||||
"max_output_tokens": 1024,
|
||||
"input_cost_per_character": 0.00000025,
|
||||
"output_cost_per_character": 0.0000005,
|
||||
"litellm_provider": "vertex_ai-text-models",
|
||||
"mode": "completion",
|
||||
"source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models"
|
||||
},
|
||||
"text-bison32k": {
|
||||
"max_tokens": 1024,
|
||||
"max_input_tokens": 8192,
|
||||
"max_output_tokens": 1024,
|
||||
"input_cost_per_token": 0.000000125,
|
||||
"output_cost_per_token": 0.000000125,
|
||||
"input_cost_per_character": 0.00000025,
|
||||
"output_cost_per_character": 0.0000005,
|
||||
"litellm_provider": "vertex_ai-text-models",
|
||||
"mode": "completion",
|
||||
"source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models"
|
||||
},
|
||||
"text-bison32k@002": {
|
||||
"max_tokens": 1024,
|
||||
"max_input_tokens": 8192,
|
||||
"max_output_tokens": 1024,
|
||||
"input_cost_per_token": 0.000000125,
|
||||
"output_cost_per_token": 0.000000125,
|
||||
"input_cost_per_character": 0.00000025,
|
||||
"output_cost_per_character": 0.0000005,
|
||||
"litellm_provider": "vertex_ai-text-models",
|
||||
"mode": "completion",
|
||||
"source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models"
|
||||
|
@ -1113,6 +1147,8 @@
|
|||
"max_output_tokens": 4096,
|
||||
"input_cost_per_token": 0.000000125,
|
||||
"output_cost_per_token": 0.000000125,
|
||||
"input_cost_per_character": 0.00000025,
|
||||
"output_cost_per_character": 0.0000005,
|
||||
"litellm_provider": "vertex_ai-chat-models",
|
||||
"mode": "chat",
|
||||
"source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models"
|
||||
|
@ -1123,6 +1159,8 @@
|
|||
"max_output_tokens": 4096,
|
||||
"input_cost_per_token": 0.000000125,
|
||||
"output_cost_per_token": 0.000000125,
|
||||
"input_cost_per_character": 0.00000025,
|
||||
"output_cost_per_character": 0.0000005,
|
||||
"litellm_provider": "vertex_ai-chat-models",
|
||||
"mode": "chat",
|
||||
"source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models"
|
||||
|
@ -1133,6 +1171,8 @@
|
|||
"max_output_tokens": 4096,
|
||||
"input_cost_per_token": 0.000000125,
|
||||
"output_cost_per_token": 0.000000125,
|
||||
"input_cost_per_character": 0.00000025,
|
||||
"output_cost_per_character": 0.0000005,
|
||||
"litellm_provider": "vertex_ai-chat-models",
|
||||
"mode": "chat",
|
||||
"source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models"
|
||||
|
@ -1143,6 +1183,20 @@
|
|||
"max_output_tokens": 8192,
|
||||
"input_cost_per_token": 0.000000125,
|
||||
"output_cost_per_token": 0.000000125,
|
||||
"input_cost_per_character": 0.00000025,
|
||||
"output_cost_per_character": 0.0000005,
|
||||
"litellm_provider": "vertex_ai-chat-models",
|
||||
"mode": "chat",
|
||||
"source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models"
|
||||
},
|
||||
"chat-bison-32k@002": {
|
||||
"max_tokens": 8192,
|
||||
"max_input_tokens": 32000,
|
||||
"max_output_tokens": 8192,
|
||||
"input_cost_per_token": 0.000000125,
|
||||
"output_cost_per_token": 0.000000125,
|
||||
"input_cost_per_character": 0.00000025,
|
||||
"output_cost_per_character": 0.0000005,
|
||||
"litellm_provider": "vertex_ai-chat-models",
|
||||
"mode": "chat",
|
||||
"source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models"
|
||||
|
@ -1153,6 +1207,8 @@
|
|||
"max_output_tokens": 1024,
|
||||
"input_cost_per_token": 0.000000125,
|
||||
"output_cost_per_token": 0.000000125,
|
||||
"input_cost_per_character": 0.00000025,
|
||||
"output_cost_per_character": 0.0000005,
|
||||
"litellm_provider": "vertex_ai-code-text-models",
|
||||
"mode": "chat",
|
||||
"source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models"
|
||||
|
@ -1163,6 +1219,44 @@
|
|||
"max_output_tokens": 1024,
|
||||
"input_cost_per_token": 0.000000125,
|
||||
"output_cost_per_token": 0.000000125,
|
||||
"input_cost_per_character": 0.00000025,
|
||||
"output_cost_per_character": 0.0000005,
|
||||
"litellm_provider": "vertex_ai-code-text-models",
|
||||
"mode": "completion",
|
||||
"source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models"
|
||||
},
|
||||
"code-bison@002": {
|
||||
"max_tokens": 1024,
|
||||
"max_input_tokens": 6144,
|
||||
"max_output_tokens": 1024,
|
||||
"input_cost_per_token": 0.000000125,
|
||||
"output_cost_per_token": 0.000000125,
|
||||
"input_cost_per_character": 0.00000025,
|
||||
"output_cost_per_character": 0.0000005,
|
||||
"litellm_provider": "vertex_ai-code-text-models",
|
||||
"mode": "completion",
|
||||
"source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models"
|
||||
},
|
||||
"code-bison32k": {
|
||||
"max_tokens": 1024,
|
||||
"max_input_tokens": 6144,
|
||||
"max_output_tokens": 1024,
|
||||
"input_cost_per_token": 0.000000125,
|
||||
"output_cost_per_token": 0.000000125,
|
||||
"input_cost_per_character": 0.00000025,
|
||||
"output_cost_per_character": 0.0000005,
|
||||
"litellm_provider": "vertex_ai-code-text-models",
|
||||
"mode": "completion",
|
||||
"source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models"
|
||||
},
|
||||
"code-bison-32k@002": {
|
||||
"max_tokens": 1024,
|
||||
"max_input_tokens": 6144,
|
||||
"max_output_tokens": 1024,
|
||||
"input_cost_per_token": 0.000000125,
|
||||
"output_cost_per_token": 0.000000125,
|
||||
"input_cost_per_character": 0.00000025,
|
||||
"output_cost_per_character": 0.0000005,
|
||||
"litellm_provider": "vertex_ai-code-text-models",
|
||||
"mode": "completion",
|
||||
"source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models"
|
||||
|
@ -1197,12 +1291,36 @@
|
|||
"mode": "completion",
|
||||
"source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models"
|
||||
},
|
||||
"code-gecko-latest": {
|
||||
"max_tokens": 64,
|
||||
"max_input_tokens": 2048,
|
||||
"max_output_tokens": 64,
|
||||
"input_cost_per_token": 0.000000125,
|
||||
"output_cost_per_token": 0.000000125,
|
||||
"litellm_provider": "vertex_ai-code-text-models",
|
||||
"mode": "completion",
|
||||
"source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models"
|
||||
},
|
||||
"codechat-bison@latest": {
|
||||
"max_tokens": 1024,
|
||||
"max_input_tokens": 6144,
|
||||
"max_output_tokens": 1024,
|
||||
"input_cost_per_token": 0.000000125,
|
||||
"output_cost_per_token": 0.000000125,
|
||||
"input_cost_per_character": 0.00000025,
|
||||
"output_cost_per_character": 0.0000005,
|
||||
"litellm_provider": "vertex_ai-code-chat-models",
|
||||
"mode": "chat",
|
||||
"source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models"
|
||||
},
|
||||
"codechat-bison": {
|
||||
"max_tokens": 1024,
|
||||
"max_input_tokens": 6144,
|
||||
"max_output_tokens": 1024,
|
||||
"input_cost_per_token": 0.000000125,
|
||||
"output_cost_per_token": 0.000000125,
|
||||
"input_cost_per_character": 0.00000025,
|
||||
"output_cost_per_character": 0.0000005,
|
||||
"litellm_provider": "vertex_ai-code-chat-models",
|
||||
"mode": "chat",
|
||||
"source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models"
|
||||
|
@ -1213,6 +1331,20 @@
|
|||
"max_output_tokens": 1024,
|
||||
"input_cost_per_token": 0.000000125,
|
||||
"output_cost_per_token": 0.000000125,
|
||||
"input_cost_per_character": 0.00000025,
|
||||
"output_cost_per_character": 0.0000005,
|
||||
"litellm_provider": "vertex_ai-code-chat-models",
|
||||
"mode": "chat",
|
||||
"source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models"
|
||||
},
|
||||
"codechat-bison@002": {
|
||||
"max_tokens": 1024,
|
||||
"max_input_tokens": 6144,
|
||||
"max_output_tokens": 1024,
|
||||
"input_cost_per_token": 0.000000125,
|
||||
"output_cost_per_token": 0.000000125,
|
||||
"input_cost_per_character": 0.00000025,
|
||||
"output_cost_per_character": 0.0000005,
|
||||
"litellm_provider": "vertex_ai-code-chat-models",
|
||||
"mode": "chat",
|
||||
"source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models"
|
||||
|
@ -1223,6 +1355,20 @@
|
|||
"max_output_tokens": 8192,
|
||||
"input_cost_per_token": 0.000000125,
|
||||
"output_cost_per_token": 0.000000125,
|
||||
"input_cost_per_character": 0.00000025,
|
||||
"output_cost_per_character": 0.0000005,
|
||||
"litellm_provider": "vertex_ai-code-chat-models",
|
||||
"mode": "chat",
|
||||
"source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models"
|
||||
},
|
||||
"codechat-bison-32k@002": {
|
||||
"max_tokens": 8192,
|
||||
"max_input_tokens": 32000,
|
||||
"max_output_tokens": 8192,
|
||||
"input_cost_per_token": 0.000000125,
|
||||
"output_cost_per_token": 0.000000125,
|
||||
"input_cost_per_character": 0.00000025,
|
||||
"output_cost_per_character": 0.0000005,
|
||||
"litellm_provider": "vertex_ai-code-chat-models",
|
||||
"mode": "chat",
|
||||
"source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models"
|
||||
|
@ -1272,6 +1418,36 @@
|
|||
"supports_function_calling": true,
|
||||
"source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models"
|
||||
},
|
||||
"gemini-1.0-ultra": {
|
||||
"max_tokens": 8192,
|
||||
"max_input_tokens": 8192,
|
||||
"max_output_tokens": 2048,
|
||||
"input_cost_per_image": 0.0025,
|
||||
"input_cost_per_video_per_second": 0.002,
|
||||
"input_cost_per_token": 0.0000005,
|
||||
"input_cost_per_character": 0.000000125,
|
||||
"output_cost_per_token": 0.0000015,
|
||||
"output_cost_per_character": 0.000000375,
|
||||
"litellm_provider": "vertex_ai-language-models",
|
||||
"mode": "chat",
|
||||
"supports_function_calling": true,
|
||||
"source": "As of Jun, 2024. There is no available doc on vertex ai pricing gemini-1.0-ultra-001. Using gemini-1.0-pro pricing. Got max_tokens info here: https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models"
|
||||
},
|
||||
"gemini-1.0-ultra-001": {
|
||||
"max_tokens": 8192,
|
||||
"max_input_tokens": 8192,
|
||||
"max_output_tokens": 2048,
|
||||
"input_cost_per_image": 0.0025,
|
||||
"input_cost_per_video_per_second": 0.002,
|
||||
"input_cost_per_token": 0.0000005,
|
||||
"input_cost_per_character": 0.000000125,
|
||||
"output_cost_per_token": 0.0000015,
|
||||
"output_cost_per_character": 0.000000375,
|
||||
"litellm_provider": "vertex_ai-language-models",
|
||||
"mode": "chat",
|
||||
"supports_function_calling": true,
|
||||
"source": "As of Jun, 2024. There is no available doc on vertex ai pricing gemini-1.0-ultra-001. Using gemini-1.0-pro pricing. Got max_tokens info here: https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models"
|
||||
},
|
||||
"gemini-1.0-pro-002": {
|
||||
"max_tokens": 8192,
|
||||
"max_input_tokens": 32760,
|
||||
|
|
|
@ -329,11 +329,14 @@ def test_vertex_ai():
|
|||
"code-gecko@001",
|
||||
"code-gecko@002",
|
||||
"code-gecko@latest",
|
||||
"codechat-bison@latest",
|
||||
"code-bison@001",
|
||||
"text-bison@001",
|
||||
"gemini-1.5-pro",
|
||||
"gemini-1.5-pro-preview-0215",
|
||||
]:
|
||||
] or (
|
||||
"gecko" in model or "32k" in model or "ultra" in model or "002" in model
|
||||
):
|
||||
# our account does not have access to this model
|
||||
continue
|
||||
print("making request", model)
|
||||
|
@ -381,12 +384,15 @@ def test_vertex_ai_stream():
|
|||
"code-gecko@001",
|
||||
"code-gecko@002",
|
||||
"code-gecko@latest",
|
||||
"codechat-bison@latest",
|
||||
"code-bison@001",
|
||||
"text-bison@001",
|
||||
"gemini-1.5-pro",
|
||||
"gemini-1.5-pro-preview-0215",
|
||||
]:
|
||||
# ouraccount does not have access to this model
|
||||
] or (
|
||||
"gecko" in model or "32k" in model or "ultra" in model or "002" in model
|
||||
):
|
||||
# our account does not have access to this model
|
||||
continue
|
||||
print("making request", model)
|
||||
response = completion(
|
||||
|
@ -433,11 +439,12 @@ async def test_async_vertexai_response():
|
|||
"code-gecko@001",
|
||||
"code-gecko@002",
|
||||
"code-gecko@latest",
|
||||
"codechat-bison@latest",
|
||||
"code-bison@001",
|
||||
"text-bison@001",
|
||||
"gemini-1.5-pro",
|
||||
"gemini-1.5-pro-preview-0215",
|
||||
]:
|
||||
] or ("gecko" in model or "32k" in model or "ultra" in model or "002" in model):
|
||||
# our account does not have access to this model
|
||||
continue
|
||||
try:
|
||||
|
@ -479,11 +486,12 @@ async def test_async_vertexai_streaming_response():
|
|||
"code-gecko@001",
|
||||
"code-gecko@002",
|
||||
"code-gecko@latest",
|
||||
"codechat-bison@latest",
|
||||
"code-bison@001",
|
||||
"text-bison@001",
|
||||
"gemini-1.5-pro",
|
||||
"gemini-1.5-pro-preview-0215",
|
||||
]:
|
||||
] or ("gecko" in model or "32k" in model or "ultra" in model or "002" in model):
|
||||
# our account does not have access to this model
|
||||
continue
|
||||
try:
|
||||
|
|
|
@ -1068,21 +1068,55 @@
|
|||
"tool_use_system_prompt_tokens": 159
|
||||
},
|
||||
"text-bison": {
|
||||
"max_tokens": 1024,
|
||||
"max_tokens": 2048,
|
||||
"max_input_tokens": 8192,
|
||||
"max_output_tokens": 1024,
|
||||
"input_cost_per_token": 0.000000125,
|
||||
"output_cost_per_token": 0.000000125,
|
||||
"max_output_tokens": 2048,
|
||||
"input_cost_per_character": 0.00000025,
|
||||
"output_cost_per_character": 0.0000005,
|
||||
"litellm_provider": "vertex_ai-text-models",
|
||||
"mode": "completion",
|
||||
"source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models"
|
||||
},
|
||||
"text-bison@001": {
|
||||
"max_tokens": 1024,
|
||||
"max_input_tokens": 8192,
|
||||
"max_output_tokens": 1024,
|
||||
"input_cost_per_character": 0.00000025,
|
||||
"output_cost_per_character": 0.0000005,
|
||||
"litellm_provider": "vertex_ai-text-models",
|
||||
"mode": "completion",
|
||||
"source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models"
|
||||
},
|
||||
"text-bison@002": {
|
||||
"max_tokens": 1024,
|
||||
"max_input_tokens": 8192,
|
||||
"max_output_tokens": 1024,
|
||||
"input_cost_per_character": 0.00000025,
|
||||
"output_cost_per_character": 0.0000005,
|
||||
"litellm_provider": "vertex_ai-text-models",
|
||||
"mode": "completion",
|
||||
"source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models"
|
||||
},
|
||||
"text-bison32k": {
|
||||
"max_tokens": 1024,
|
||||
"max_input_tokens": 8192,
|
||||
"max_output_tokens": 1024,
|
||||
"input_cost_per_token": 0.000000125,
|
||||
"output_cost_per_token": 0.000000125,
|
||||
"input_cost_per_character": 0.00000025,
|
||||
"output_cost_per_character": 0.0000005,
|
||||
"litellm_provider": "vertex_ai-text-models",
|
||||
"mode": "completion",
|
||||
"source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models"
|
||||
},
|
||||
"text-bison32k@002": {
|
||||
"max_tokens": 1024,
|
||||
"max_input_tokens": 8192,
|
||||
"max_output_tokens": 1024,
|
||||
"input_cost_per_token": 0.000000125,
|
||||
"output_cost_per_token": 0.000000125,
|
||||
"input_cost_per_character": 0.00000025,
|
||||
"output_cost_per_character": 0.0000005,
|
||||
"litellm_provider": "vertex_ai-text-models",
|
||||
"mode": "completion",
|
||||
"source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models"
|
||||
|
@ -1113,6 +1147,8 @@
|
|||
"max_output_tokens": 4096,
|
||||
"input_cost_per_token": 0.000000125,
|
||||
"output_cost_per_token": 0.000000125,
|
||||
"input_cost_per_character": 0.00000025,
|
||||
"output_cost_per_character": 0.0000005,
|
||||
"litellm_provider": "vertex_ai-chat-models",
|
||||
"mode": "chat",
|
||||
"source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models"
|
||||
|
@ -1123,6 +1159,8 @@
|
|||
"max_output_tokens": 4096,
|
||||
"input_cost_per_token": 0.000000125,
|
||||
"output_cost_per_token": 0.000000125,
|
||||
"input_cost_per_character": 0.00000025,
|
||||
"output_cost_per_character": 0.0000005,
|
||||
"litellm_provider": "vertex_ai-chat-models",
|
||||
"mode": "chat",
|
||||
"source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models"
|
||||
|
@ -1133,6 +1171,8 @@
|
|||
"max_output_tokens": 4096,
|
||||
"input_cost_per_token": 0.000000125,
|
||||
"output_cost_per_token": 0.000000125,
|
||||
"input_cost_per_character": 0.00000025,
|
||||
"output_cost_per_character": 0.0000005,
|
||||
"litellm_provider": "vertex_ai-chat-models",
|
||||
"mode": "chat",
|
||||
"source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models"
|
||||
|
@ -1143,6 +1183,20 @@
|
|||
"max_output_tokens": 8192,
|
||||
"input_cost_per_token": 0.000000125,
|
||||
"output_cost_per_token": 0.000000125,
|
||||
"input_cost_per_character": 0.00000025,
|
||||
"output_cost_per_character": 0.0000005,
|
||||
"litellm_provider": "vertex_ai-chat-models",
|
||||
"mode": "chat",
|
||||
"source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models"
|
||||
},
|
||||
"chat-bison-32k@002": {
|
||||
"max_tokens": 8192,
|
||||
"max_input_tokens": 32000,
|
||||
"max_output_tokens": 8192,
|
||||
"input_cost_per_token": 0.000000125,
|
||||
"output_cost_per_token": 0.000000125,
|
||||
"input_cost_per_character": 0.00000025,
|
||||
"output_cost_per_character": 0.0000005,
|
||||
"litellm_provider": "vertex_ai-chat-models",
|
||||
"mode": "chat",
|
||||
"source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models"
|
||||
|
@ -1153,6 +1207,8 @@
|
|||
"max_output_tokens": 1024,
|
||||
"input_cost_per_token": 0.000000125,
|
||||
"output_cost_per_token": 0.000000125,
|
||||
"input_cost_per_character": 0.00000025,
|
||||
"output_cost_per_character": 0.0000005,
|
||||
"litellm_provider": "vertex_ai-code-text-models",
|
||||
"mode": "chat",
|
||||
"source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models"
|
||||
|
@ -1163,6 +1219,44 @@
|
|||
"max_output_tokens": 1024,
|
||||
"input_cost_per_token": 0.000000125,
|
||||
"output_cost_per_token": 0.000000125,
|
||||
"input_cost_per_character": 0.00000025,
|
||||
"output_cost_per_character": 0.0000005,
|
||||
"litellm_provider": "vertex_ai-code-text-models",
|
||||
"mode": "completion",
|
||||
"source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models"
|
||||
},
|
||||
"code-bison@002": {
|
||||
"max_tokens": 1024,
|
||||
"max_input_tokens": 6144,
|
||||
"max_output_tokens": 1024,
|
||||
"input_cost_per_token": 0.000000125,
|
||||
"output_cost_per_token": 0.000000125,
|
||||
"input_cost_per_character": 0.00000025,
|
||||
"output_cost_per_character": 0.0000005,
|
||||
"litellm_provider": "vertex_ai-code-text-models",
|
||||
"mode": "completion",
|
||||
"source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models"
|
||||
},
|
||||
"code-bison32k": {
|
||||
"max_tokens": 1024,
|
||||
"max_input_tokens": 6144,
|
||||
"max_output_tokens": 1024,
|
||||
"input_cost_per_token": 0.000000125,
|
||||
"output_cost_per_token": 0.000000125,
|
||||
"input_cost_per_character": 0.00000025,
|
||||
"output_cost_per_character": 0.0000005,
|
||||
"litellm_provider": "vertex_ai-code-text-models",
|
||||
"mode": "completion",
|
||||
"source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models"
|
||||
},
|
||||
"code-bison-32k@002": {
|
||||
"max_tokens": 1024,
|
||||
"max_input_tokens": 6144,
|
||||
"max_output_tokens": 1024,
|
||||
"input_cost_per_token": 0.000000125,
|
||||
"output_cost_per_token": 0.000000125,
|
||||
"input_cost_per_character": 0.00000025,
|
||||
"output_cost_per_character": 0.0000005,
|
||||
"litellm_provider": "vertex_ai-code-text-models",
|
||||
"mode": "completion",
|
||||
"source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models"
|
||||
|
@ -1197,12 +1291,36 @@
|
|||
"mode": "completion",
|
||||
"source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models"
|
||||
},
|
||||
"code-gecko-latest": {
|
||||
"max_tokens": 64,
|
||||
"max_input_tokens": 2048,
|
||||
"max_output_tokens": 64,
|
||||
"input_cost_per_token": 0.000000125,
|
||||
"output_cost_per_token": 0.000000125,
|
||||
"litellm_provider": "vertex_ai-code-text-models",
|
||||
"mode": "completion",
|
||||
"source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models"
|
||||
},
|
||||
"codechat-bison@latest": {
|
||||
"max_tokens": 1024,
|
||||
"max_input_tokens": 6144,
|
||||
"max_output_tokens": 1024,
|
||||
"input_cost_per_token": 0.000000125,
|
||||
"output_cost_per_token": 0.000000125,
|
||||
"input_cost_per_character": 0.00000025,
|
||||
"output_cost_per_character": 0.0000005,
|
||||
"litellm_provider": "vertex_ai-code-chat-models",
|
||||
"mode": "chat",
|
||||
"source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models"
|
||||
},
|
||||
"codechat-bison": {
|
||||
"max_tokens": 1024,
|
||||
"max_input_tokens": 6144,
|
||||
"max_output_tokens": 1024,
|
||||
"input_cost_per_token": 0.000000125,
|
||||
"output_cost_per_token": 0.000000125,
|
||||
"input_cost_per_character": 0.00000025,
|
||||
"output_cost_per_character": 0.0000005,
|
||||
"litellm_provider": "vertex_ai-code-chat-models",
|
||||
"mode": "chat",
|
||||
"source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models"
|
||||
|
@ -1213,6 +1331,20 @@
|
|||
"max_output_tokens": 1024,
|
||||
"input_cost_per_token": 0.000000125,
|
||||
"output_cost_per_token": 0.000000125,
|
||||
"input_cost_per_character": 0.00000025,
|
||||
"output_cost_per_character": 0.0000005,
|
||||
"litellm_provider": "vertex_ai-code-chat-models",
|
||||
"mode": "chat",
|
||||
"source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models"
|
||||
},
|
||||
"codechat-bison@002": {
|
||||
"max_tokens": 1024,
|
||||
"max_input_tokens": 6144,
|
||||
"max_output_tokens": 1024,
|
||||
"input_cost_per_token": 0.000000125,
|
||||
"output_cost_per_token": 0.000000125,
|
||||
"input_cost_per_character": 0.00000025,
|
||||
"output_cost_per_character": 0.0000005,
|
||||
"litellm_provider": "vertex_ai-code-chat-models",
|
||||
"mode": "chat",
|
||||
"source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models"
|
||||
|
@ -1223,6 +1355,20 @@
|
|||
"max_output_tokens": 8192,
|
||||
"input_cost_per_token": 0.000000125,
|
||||
"output_cost_per_token": 0.000000125,
|
||||
"input_cost_per_character": 0.00000025,
|
||||
"output_cost_per_character": 0.0000005,
|
||||
"litellm_provider": "vertex_ai-code-chat-models",
|
||||
"mode": "chat",
|
||||
"source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models"
|
||||
},
|
||||
"codechat-bison-32k@002": {
|
||||
"max_tokens": 8192,
|
||||
"max_input_tokens": 32000,
|
||||
"max_output_tokens": 8192,
|
||||
"input_cost_per_token": 0.000000125,
|
||||
"output_cost_per_token": 0.000000125,
|
||||
"input_cost_per_character": 0.00000025,
|
||||
"output_cost_per_character": 0.0000005,
|
||||
"litellm_provider": "vertex_ai-code-chat-models",
|
||||
"mode": "chat",
|
||||
"source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models"
|
||||
|
@ -1272,6 +1418,36 @@
|
|||
"supports_function_calling": true,
|
||||
"source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models"
|
||||
},
|
||||
"gemini-1.0-ultra": {
|
||||
"max_tokens": 8192,
|
||||
"max_input_tokens": 8192,
|
||||
"max_output_tokens": 2048,
|
||||
"input_cost_per_image": 0.0025,
|
||||
"input_cost_per_video_per_second": 0.002,
|
||||
"input_cost_per_token": 0.0000005,
|
||||
"input_cost_per_character": 0.000000125,
|
||||
"output_cost_per_token": 0.0000015,
|
||||
"output_cost_per_character": 0.000000375,
|
||||
"litellm_provider": "vertex_ai-language-models",
|
||||
"mode": "chat",
|
||||
"supports_function_calling": true,
|
||||
"source": "As of Jun, 2024. There is no available doc on vertex ai pricing gemini-1.0-ultra-001. Using gemini-1.0-pro pricing. Got max_tokens info here: https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models"
|
||||
},
|
||||
"gemini-1.0-ultra-001": {
|
||||
"max_tokens": 8192,
|
||||
"max_input_tokens": 8192,
|
||||
"max_output_tokens": 2048,
|
||||
"input_cost_per_image": 0.0025,
|
||||
"input_cost_per_video_per_second": 0.002,
|
||||
"input_cost_per_token": 0.0000005,
|
||||
"input_cost_per_character": 0.000000125,
|
||||
"output_cost_per_token": 0.0000015,
|
||||
"output_cost_per_character": 0.000000375,
|
||||
"litellm_provider": "vertex_ai-language-models",
|
||||
"mode": "chat",
|
||||
"supports_function_calling": true,
|
||||
"source": "As of Jun, 2024. There is no available doc on vertex ai pricing gemini-1.0-ultra-001. Using gemini-1.0-pro pricing. Got max_tokens info here: https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models"
|
||||
},
|
||||
"gemini-1.0-pro-002": {
|
||||
"max_tokens": 8192,
|
||||
"max_input_tokens": 32760,
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue