forked from phoenix/litellm-mirror
fix(vertex_httpx.py): re-raise vertex content policy violation error
Fixes https://github.com/BerriAI/litellm/issues/4270
This commit is contained in:
parent
aef5cf3f22
commit
f41c443abb
7 changed files with 309 additions and 37 deletions
|
@ -9,7 +9,7 @@ import types
|
|||
import uuid
|
||||
from enum import Enum
|
||||
from functools import partial
|
||||
from typing import Any, Callable, List, Literal, Optional, Tuple, Union
|
||||
from typing import Any, Callable, Dict, List, Literal, Optional, Tuple, Union
|
||||
|
||||
import httpx # type: ignore
|
||||
import ijson
|
||||
|
@ -241,6 +241,20 @@ class VertexGeminiConfig:
|
|||
"europe-west9",
|
||||
]
|
||||
|
||||
def get_flagged_finish_reasons(self) -> Dict[str, str]:
|
||||
"""
|
||||
Return Dictionary of finish reasons which indicate response was flagged
|
||||
|
||||
and what it means
|
||||
"""
|
||||
return {
|
||||
"SAFETY": "The token generation was stopped as the response was flagged for safety reasons. NOTE: When streaming the Candidate.content will be empty if content filters blocked the output.",
|
||||
"RECITATION": "The token generation was stopped as the response was flagged for unauthorized citations.",
|
||||
"BLOCKLIST": "The token generation was stopped as the response was flagged for the terms which are included from the terminology blocklist.",
|
||||
"PROHIBITED_CONTENT": "The token generation was stopped as the response was flagged for the prohibited contents.",
|
||||
"SPII": "The token generation was stopped as the response was flagged for Sensitive Personally Identifiable Information (SPII) contents.",
|
||||
}
|
||||
|
||||
|
||||
async def make_call(
|
||||
client: Optional[AsyncHTTPHandler],
|
||||
|
@ -362,6 +376,27 @@ class VertexLLM(BaseLLM):
|
|||
status_code=422,
|
||||
)
|
||||
|
||||
## CHECK IF RESPONSE FLAGGED
|
||||
if len(completion_response["candidates"]) > 0:
|
||||
content_policy_violations = (
|
||||
VertexGeminiConfig().get_flagged_finish_reasons()
|
||||
)
|
||||
if (
|
||||
"finishReason" in completion_response["candidates"][0]
|
||||
and completion_response["candidates"][0]["finishReason"]
|
||||
in content_policy_violations.keys()
|
||||
):
|
||||
## CONTENT POLICY VIOLATION ERROR
|
||||
raise VertexAIError(
|
||||
status_code=400,
|
||||
message="The response was blocked. Reason={}. Raw Response={}".format(
|
||||
content_policy_violations[
|
||||
completion_response["candidates"][0]["finishReason"]
|
||||
],
|
||||
completion_response,
|
||||
),
|
||||
)
|
||||
|
||||
model_response.choices = [] # type: ignore
|
||||
|
||||
## GET MODEL ##
|
||||
|
@ -804,6 +839,7 @@ class VertexLLM(BaseLLM):
|
|||
client = HTTPHandler(**_params) # type: ignore
|
||||
else:
|
||||
client = client
|
||||
|
||||
try:
|
||||
response = client.post(url=url, headers=headers, json=data) # type: ignore
|
||||
response.raise_for_status()
|
||||
|
|
|
@ -1928,6 +1928,7 @@ def completion(
|
|||
acompletion=acompletion,
|
||||
timeout=timeout,
|
||||
custom_llm_provider=custom_llm_provider,
|
||||
client=client,
|
||||
)
|
||||
|
||||
elif custom_llm_provider == "vertex_ai":
|
||||
|
|
|
@ -1185,6 +1185,33 @@
|
|||
"supports_tool_choice": true,
|
||||
"source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models"
|
||||
},
|
||||
"gemini-1.5-flash": {
|
||||
"max_tokens": 8192,
|
||||
"max_input_tokens": 1000000,
|
||||
"max_output_tokens": 8192,
|
||||
"max_images_per_prompt": 3000,
|
||||
"max_videos_per_prompt": 10,
|
||||
"max_video_length": 1,
|
||||
"max_audio_length_hours": 8.4,
|
||||
"max_audio_per_prompt": 1,
|
||||
"max_pdf_size_mb": 30,
|
||||
"input_cost_per_image": 0.0001315,
|
||||
"input_cost_per_video_per_second": 0.0001315,
|
||||
"input_cost_per_audio_per_second": 0.000125,
|
||||
"input_cost_per_token": 0.00000003125,
|
||||
"input_cost_per_token_above_128k_tokens": 0.0000000625,
|
||||
"output_cost_per_token": 0.00000009375,
|
||||
"output_cost_per_token_above_128k_tokens": 0.0000001875,
|
||||
"output_cost_per_image": 0.000263,
|
||||
"output_cost_per_video_per_second": 0.000263,
|
||||
"output_cost_per_audio_per_second": 0.00025,
|
||||
"litellm_provider": "vertex_ai-language-models",
|
||||
"mode": "chat",
|
||||
"supports_system_messages": true,
|
||||
"supports_function_calling": true,
|
||||
"supports_vision": true,
|
||||
"source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models"
|
||||
},
|
||||
"gemini-1.5-flash-001": {
|
||||
"max_tokens": 8192,
|
||||
"max_input_tokens": 1000000,
|
||||
|
@ -1207,6 +1234,7 @@
|
|||
"output_cost_per_audio_per_second": 0.00025,
|
||||
"litellm_provider": "vertex_ai-language-models",
|
||||
"mode": "chat",
|
||||
"supports_system_messages": true,
|
||||
"supports_function_calling": true,
|
||||
"supports_vision": true,
|
||||
"source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models"
|
||||
|
@ -1233,6 +1261,7 @@
|
|||
"output_cost_per_audio_per_second": 0.00025,
|
||||
"litellm_provider": "vertex_ai-language-models",
|
||||
"mode": "chat",
|
||||
"supports_system_messages": true,
|
||||
"supports_function_calling": true,
|
||||
"supports_vision": true,
|
||||
"source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models"
|
||||
|
@ -1253,6 +1282,7 @@
|
|||
"output_cost_per_audio_per_second": 0.00025,
|
||||
"litellm_provider": "vertex_ai-language-models",
|
||||
"mode": "chat",
|
||||
"supports_system_messages": true,
|
||||
"supports_function_calling": true,
|
||||
"supports_tool_choice": true,
|
||||
"source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models"
|
||||
|
@ -1273,6 +1303,7 @@
|
|||
"output_cost_per_audio_per_second": 0.00025,
|
||||
"litellm_provider": "vertex_ai-language-models",
|
||||
"mode": "chat",
|
||||
"supports_system_messages": true,
|
||||
"supports_function_calling": true,
|
||||
"supports_tool_choice": true,
|
||||
"source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models"
|
||||
|
@ -1293,6 +1324,7 @@
|
|||
"output_cost_per_audio_per_second": 0.00025,
|
||||
"litellm_provider": "vertex_ai-language-models",
|
||||
"mode": "chat",
|
||||
"supports_system_messages": true,
|
||||
"supports_function_calling": true,
|
||||
"supports_tool_choice": true,
|
||||
"source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models"
|
||||
|
|
|
@ -15,6 +15,7 @@ import asyncio
|
|||
import json
|
||||
import os
|
||||
import tempfile
|
||||
from unittest.mock import MagicMock, patch
|
||||
|
||||
import pytest
|
||||
|
||||
|
@ -695,37 +696,161 @@ async def test_gemini_pro_function_calling_httpx(provider, sync_mode):
|
|||
pytest.fail("An unexpected exception occurred - {}".format(str(e)))
|
||||
|
||||
|
||||
@pytest.mark.skip(reason="exhausted vertex quota. need to refactor to mock the call")
|
||||
# @pytest.mark.skip(reason="exhausted vertex quota. need to refactor to mock the call")
|
||||
def vertex_httpx_mock_post(url, data=None, json=None, headers=None):
|
||||
mock_response = MagicMock()
|
||||
mock_response.status_code = 200
|
||||
mock_response.headers = {"Content-Type": "application/json"}
|
||||
mock_response.json.return_value = {
|
||||
"candidates": [
|
||||
{
|
||||
"finishReason": "RECITATION",
|
||||
"safetyRatings": [
|
||||
{
|
||||
"category": "HARM_CATEGORY_HATE_SPEECH",
|
||||
"probability": "NEGLIGIBLE",
|
||||
"probabilityScore": 0.14965563,
|
||||
"severity": "HARM_SEVERITY_NEGLIGIBLE",
|
||||
"severityScore": 0.13660839,
|
||||
},
|
||||
{
|
||||
"category": "HARM_CATEGORY_DANGEROUS_CONTENT",
|
||||
"probability": "NEGLIGIBLE",
|
||||
"probabilityScore": 0.16344544,
|
||||
"severity": "HARM_SEVERITY_NEGLIGIBLE",
|
||||
"severityScore": 0.10230471,
|
||||
},
|
||||
{
|
||||
"category": "HARM_CATEGORY_HARASSMENT",
|
||||
"probability": "NEGLIGIBLE",
|
||||
"probabilityScore": 0.1979091,
|
||||
"severity": "HARM_SEVERITY_NEGLIGIBLE",
|
||||
"severityScore": 0.06052939,
|
||||
},
|
||||
{
|
||||
"category": "HARM_CATEGORY_SEXUALLY_EXPLICIT",
|
||||
"probability": "NEGLIGIBLE",
|
||||
"probabilityScore": 0.1765296,
|
||||
"severity": "HARM_SEVERITY_NEGLIGIBLE",
|
||||
"severityScore": 0.18417984,
|
||||
},
|
||||
],
|
||||
"citationMetadata": {
|
||||
"citations": [
|
||||
{
|
||||
"startIndex": 251,
|
||||
"endIndex": 380,
|
||||
"uri": "https://chocolatecake2023.blogspot.com/2023/02/taste-deliciousness-of-perfectly-baked.html?m=1",
|
||||
},
|
||||
{
|
||||
"startIndex": 393,
|
||||
"endIndex": 535,
|
||||
"uri": "https://skinnymixes.co.uk/blogs/food-recipes/peanut-butter-cup-cookies",
|
||||
},
|
||||
{
|
||||
"startIndex": 439,
|
||||
"endIndex": 581,
|
||||
"uri": "https://mast-producing-trees.org/aldis-chocolate-chips-are-peanut-and-tree-nut-free/",
|
||||
},
|
||||
{
|
||||
"startIndex": 1117,
|
||||
"endIndex": 1265,
|
||||
"uri": "https://github.com/frdrck100/To_Do_Assignments",
|
||||
},
|
||||
{
|
||||
"startIndex": 1146,
|
||||
"endIndex": 1288,
|
||||
"uri": "https://skinnymixes.co.uk/blogs/food-recipes/peanut-butter-cup-cookies",
|
||||
},
|
||||
{
|
||||
"startIndex": 1166,
|
||||
"endIndex": 1299,
|
||||
"uri": "https://www.girlversusdough.com/brookies/",
|
||||
},
|
||||
{
|
||||
"startIndex": 1780,
|
||||
"endIndex": 1909,
|
||||
"uri": "https://chocolatecake2023.blogspot.com/2023/02/taste-deliciousness-of-perfectly-baked.html?m=1",
|
||||
},
|
||||
{
|
||||
"startIndex": 1834,
|
||||
"endIndex": 1964,
|
||||
"uri": "https://newsd.in/national-cream-cheese-brownie-day-2023-date-history-how-to-make-a-cream-cheese-brownie/",
|
||||
},
|
||||
{
|
||||
"startIndex": 1846,
|
||||
"endIndex": 1989,
|
||||
"uri": "https://github.com/frdrck100/To_Do_Assignments",
|
||||
},
|
||||
{
|
||||
"startIndex": 2121,
|
||||
"endIndex": 2261,
|
||||
"uri": "https://recipes.net/copycat/hardee/hardees-chocolate-chip-cookie-recipe/",
|
||||
},
|
||||
{
|
||||
"startIndex": 2505,
|
||||
"endIndex": 2671,
|
||||
"uri": "https://www.tfrecipes.com/Oranges%20with%20dried%20cherries/",
|
||||
},
|
||||
{
|
||||
"startIndex": 3390,
|
||||
"endIndex": 3529,
|
||||
"uri": "https://github.com/quantumcognition/Crud-palm",
|
||||
},
|
||||
{
|
||||
"startIndex": 3568,
|
||||
"endIndex": 3724,
|
||||
"uri": "https://recipes.net/dessert/cakes/ultimate-easy-gingerbread/",
|
||||
},
|
||||
{
|
||||
"startIndex": 3640,
|
||||
"endIndex": 3770,
|
||||
"uri": "https://recipes.net/dessert/cookies/soft-and-chewy-peanut-butter-cookies/",
|
||||
},
|
||||
]
|
||||
},
|
||||
}
|
||||
],
|
||||
"usageMetadata": {"promptTokenCount": 336, "totalTokenCount": 336},
|
||||
}
|
||||
return mock_response
|
||||
|
||||
|
||||
@pytest.mark.parametrize("provider", ["vertex_ai_beta"]) # "vertex_ai",
|
||||
@pytest.mark.asyncio
|
||||
async def test_gemini_pro_json_schema_httpx(provider):
|
||||
async def test_gemini_pro_json_schema_httpx_content_policy_error(provider):
|
||||
load_vertex_ai_credentials()
|
||||
litellm.set_verbose = True
|
||||
messages = [
|
||||
{
|
||||
"role": "user",
|
||||
"content": """
|
||||
List 5 popular cookie recipes.
|
||||
|
||||
Using this JSON schema:
|
||||
List 5 popular cookie recipes.
|
||||
|
||||
Recipe = {"recipe_name": str}
|
||||
|
||||
Return a `list[Recipe]`
|
||||
Using this JSON schema:
|
||||
```json
|
||||
{'$defs': {'Recipe': {'properties': {'recipe_name': {'examples': ['Chocolate Chip Cookies', 'Peanut Butter Cookies'], 'maxLength': 100, 'title': 'The recipe name', 'type': 'string'}, 'estimated_time': {'anyOf': [{'minimum': 0, 'type': 'integer'}, {'type': 'null'}], 'default': None, 'description': 'The estimated time to make the recipe in minutes', 'examples': [30, 45], 'title': 'The estimated time'}, 'ingredients': {'examples': [['flour', 'sugar', 'chocolate chips'], ['peanut butter', 'sugar', 'eggs']], 'items': {'type': 'string'}, 'maxItems': 10, 'title': 'The ingredients', 'type': 'array'}, 'instructions': {'examples': [['mix', 'bake'], ['mix', 'chill', 'bake']], 'items': {'type': 'string'}, 'maxItems': 10, 'title': 'The instructions', 'type': 'array'}}, 'required': ['recipe_name', 'ingredients', 'instructions'], 'title': 'Recipe', 'type': 'object'}}, 'properties': {'recipes': {'items': {'$ref': '#/$defs/Recipe'}, 'maxItems': 11, 'title': 'The recipes', 'type': 'array'}}, 'required': ['recipes'], 'title': 'MyRecipes', 'type': 'object'}
|
||||
```
|
||||
""",
|
||||
}
|
||||
]
|
||||
from litellm.llms.custom_httpx.http_handler import HTTPHandler
|
||||
|
||||
client = HTTPHandler()
|
||||
|
||||
with patch.object(client, "post", side_effect=vertex_httpx_mock_post) as mock_call:
|
||||
try:
|
||||
response = completion(
|
||||
model="vertex_ai_beta/gemini-1.5-flash-preview-0514",
|
||||
model="vertex_ai_beta/gemini-1.5-flash",
|
||||
messages=messages,
|
||||
response_format={"type": "json_object"},
|
||||
client=client,
|
||||
)
|
||||
except litellm.ContentPolicyViolationError as e:
|
||||
pass
|
||||
|
||||
assert response.choices[0].message.content is not None
|
||||
response_json = json.loads(response.choices[0].message.content)
|
||||
|
||||
assert isinstance(response_json, dict) or isinstance(response_json, list)
|
||||
mock_call.assert_called_once()
|
||||
|
||||
|
||||
@pytest.mark.skip(reason="exhausted vertex quota. need to refactor to mock the call")
|
||||
|
|
|
@ -1,26 +1,26 @@
|
|||
from openai import AuthenticationError, BadRequestError, RateLimitError, OpenAIError
|
||||
import asyncio
|
||||
import os
|
||||
import subprocess
|
||||
import sys
|
||||
import traceback
|
||||
import subprocess, asyncio
|
||||
from typing import Any
|
||||
|
||||
from openai import AuthenticationError, BadRequestError, OpenAIError, RateLimitError
|
||||
|
||||
sys.path.insert(
|
||||
0, os.path.abspath("../..")
|
||||
) # Adds the parent directory to the system path
|
||||
import litellm
|
||||
from litellm import (
|
||||
embedding,
|
||||
completion,
|
||||
# AuthenticationError,
|
||||
ContextWindowExceededError,
|
||||
# RateLimitError,
|
||||
# ServiceUnavailableError,
|
||||
# OpenAIError,
|
||||
)
|
||||
from concurrent.futures import ThreadPoolExecutor
|
||||
from unittest.mock import MagicMock, patch
|
||||
|
||||
import pytest
|
||||
from unittest.mock import patch, MagicMock
|
||||
|
||||
import litellm
|
||||
from litellm import ( # AuthenticationError,; RateLimitError,; ServiceUnavailableError,; OpenAIError,
|
||||
ContextWindowExceededError,
|
||||
completion,
|
||||
embedding,
|
||||
)
|
||||
|
||||
litellm.vertex_project = "pathrise-convert-1606954137718"
|
||||
litellm.vertex_location = "us-central1"
|
||||
|
@ -252,6 +252,7 @@ def test_completion_azure_exception():
|
|||
async def asynctest_completion_azure_exception():
|
||||
try:
|
||||
import openai
|
||||
|
||||
import litellm
|
||||
|
||||
print("azure gpt-3.5 test\n\n")
|
||||
|
@ -283,8 +284,11 @@ async def asynctest_completion_azure_exception():
|
|||
|
||||
def asynctest_completion_openai_exception_bad_model():
|
||||
try:
|
||||
import asyncio
|
||||
|
||||
import openai
|
||||
import litellm, asyncio
|
||||
|
||||
import litellm
|
||||
|
||||
print("azure exception bad model\n\n")
|
||||
litellm.set_verbose = True
|
||||
|
@ -311,8 +315,11 @@ def asynctest_completion_openai_exception_bad_model():
|
|||
|
||||
def asynctest_completion_azure_exception_bad_model():
|
||||
try:
|
||||
import asyncio
|
||||
|
||||
import openai
|
||||
import litellm, asyncio
|
||||
|
||||
import litellm
|
||||
|
||||
print("azure exception bad model\n\n")
|
||||
litellm.set_verbose = True
|
||||
|
@ -663,7 +670,7 @@ def test_litellm_predibase_exception():
|
|||
# print(f"accuracy_score: {accuracy_score}")
|
||||
|
||||
|
||||
@pytest.mark.parametrize("provider", ["predibase"])
|
||||
@pytest.mark.parametrize("provider", ["predibase", "vertex_ai_beta"])
|
||||
def test_exception_mapping(provider):
|
||||
"""
|
||||
For predibase, run through a set of mock exceptions
|
||||
|
|
|
@ -6235,7 +6235,11 @@ def exception_type(
|
|||
llm_provider="sagemaker",
|
||||
response=original_exception.response,
|
||||
)
|
||||
elif custom_llm_provider == "vertex_ai":
|
||||
elif (
|
||||
custom_llm_provider == "vertex_ai"
|
||||
or custom_llm_provider == "vertex_ai_beta"
|
||||
or custom_llm_provider == "gemini"
|
||||
):
|
||||
if (
|
||||
"Vertex AI API has not been used in project" in error_str
|
||||
or "Unable to find your project" in error_str
|
||||
|
@ -6254,6 +6258,13 @@ def exception_type(
|
|||
),
|
||||
litellm_debug_info=extra_information,
|
||||
)
|
||||
if "400 Request payload size exceeds" in error_str:
|
||||
exception_mapping_worked = True
|
||||
raise ContextWindowExceededError(
|
||||
message=f"VertexException - {error_str}",
|
||||
model=model,
|
||||
llm_provider=custom_llm_provider,
|
||||
)
|
||||
elif (
|
||||
"None Unknown Error." in error_str
|
||||
or "Content has no parts." in error_str
|
||||
|
@ -6287,13 +6298,13 @@ def exception_type(
|
|||
)
|
||||
elif "The response was blocked." in error_str:
|
||||
exception_mapping_worked = True
|
||||
raise UnprocessableEntityError(
|
||||
message=f"VertexAIException UnprocessableEntityError - {error_str}",
|
||||
raise ContentPolicyViolationError(
|
||||
message=f"VertexAIException ContentPolicyViolationError - {error_str}",
|
||||
model=model,
|
||||
llm_provider="vertex_ai",
|
||||
litellm_debug_info=extra_information,
|
||||
response=httpx.Response(
|
||||
status_code=422,
|
||||
status_code=400,
|
||||
request=httpx.Request(
|
||||
method="POST",
|
||||
url=" https://cloud.google.com/vertex-ai/",
|
||||
|
@ -6345,6 +6356,27 @@ def exception_type(
|
|||
),
|
||||
),
|
||||
)
|
||||
if original_exception.status_code == 401:
|
||||
exception_mapping_worked = True
|
||||
raise AuthenticationError(
|
||||
message=f"VertexAIException - {original_exception.message}",
|
||||
llm_provider=custom_llm_provider,
|
||||
model=model,
|
||||
)
|
||||
if original_exception.status_code == 404:
|
||||
exception_mapping_worked = True
|
||||
raise NotFoundError(
|
||||
message=f"VertexAIException - {original_exception.message}",
|
||||
llm_provider=custom_llm_provider,
|
||||
model=model,
|
||||
)
|
||||
if original_exception.status_code == 408:
|
||||
exception_mapping_worked = True
|
||||
raise Timeout(
|
||||
message=f"VertexAIException - {original_exception.message}",
|
||||
llm_provider=custom_llm_provider,
|
||||
model=model,
|
||||
)
|
||||
|
||||
if original_exception.status_code == 429:
|
||||
exception_mapping_worked = True
|
||||
|
@ -6374,6 +6406,13 @@ def exception_type(
|
|||
request=httpx.Request(method="completion", url="https://github.com/BerriAI/litellm"), # type: ignore
|
||||
),
|
||||
)
|
||||
if original_exception.status_code == 503:
|
||||
exception_mapping_worked = True
|
||||
raise ServiceUnavailableError(
|
||||
message=f"VertexAIException - {original_exception.message}",
|
||||
llm_provider=custom_llm_provider,
|
||||
model=model,
|
||||
)
|
||||
elif custom_llm_provider == "palm" or custom_llm_provider == "gemini":
|
||||
if "503 Getting metadata" in error_str:
|
||||
# auth errors look like this
|
||||
|
|
|
@ -1185,6 +1185,33 @@
|
|||
"supports_tool_choice": true,
|
||||
"source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models"
|
||||
},
|
||||
"gemini-1.5-flash": {
|
||||
"max_tokens": 8192,
|
||||
"max_input_tokens": 1000000,
|
||||
"max_output_tokens": 8192,
|
||||
"max_images_per_prompt": 3000,
|
||||
"max_videos_per_prompt": 10,
|
||||
"max_video_length": 1,
|
||||
"max_audio_length_hours": 8.4,
|
||||
"max_audio_per_prompt": 1,
|
||||
"max_pdf_size_mb": 30,
|
||||
"input_cost_per_image": 0.0001315,
|
||||
"input_cost_per_video_per_second": 0.0001315,
|
||||
"input_cost_per_audio_per_second": 0.000125,
|
||||
"input_cost_per_token": 0.00000003125,
|
||||
"input_cost_per_token_above_128k_tokens": 0.0000000625,
|
||||
"output_cost_per_token": 0.00000009375,
|
||||
"output_cost_per_token_above_128k_tokens": 0.0000001875,
|
||||
"output_cost_per_image": 0.000263,
|
||||
"output_cost_per_video_per_second": 0.000263,
|
||||
"output_cost_per_audio_per_second": 0.00025,
|
||||
"litellm_provider": "vertex_ai-language-models",
|
||||
"mode": "chat",
|
||||
"supports_system_messages": true,
|
||||
"supports_function_calling": true,
|
||||
"supports_vision": true,
|
||||
"source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models"
|
||||
},
|
||||
"gemini-1.5-flash-001": {
|
||||
"max_tokens": 8192,
|
||||
"max_input_tokens": 1000000,
|
||||
|
@ -1207,6 +1234,7 @@
|
|||
"output_cost_per_audio_per_second": 0.00025,
|
||||
"litellm_provider": "vertex_ai-language-models",
|
||||
"mode": "chat",
|
||||
"supports_system_messages": true,
|
||||
"supports_function_calling": true,
|
||||
"supports_vision": true,
|
||||
"source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models"
|
||||
|
@ -1233,6 +1261,7 @@
|
|||
"output_cost_per_audio_per_second": 0.00025,
|
||||
"litellm_provider": "vertex_ai-language-models",
|
||||
"mode": "chat",
|
||||
"supports_system_messages": true,
|
||||
"supports_function_calling": true,
|
||||
"supports_vision": true,
|
||||
"source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models"
|
||||
|
@ -1253,6 +1282,7 @@
|
|||
"output_cost_per_audio_per_second": 0.00025,
|
||||
"litellm_provider": "vertex_ai-language-models",
|
||||
"mode": "chat",
|
||||
"supports_system_messages": true,
|
||||
"supports_function_calling": true,
|
||||
"supports_tool_choice": true,
|
||||
"source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models"
|
||||
|
@ -1273,6 +1303,7 @@
|
|||
"output_cost_per_audio_per_second": 0.00025,
|
||||
"litellm_provider": "vertex_ai-language-models",
|
||||
"mode": "chat",
|
||||
"supports_system_messages": true,
|
||||
"supports_function_calling": true,
|
||||
"supports_tool_choice": true,
|
||||
"source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models"
|
||||
|
@ -1293,6 +1324,7 @@
|
|||
"output_cost_per_audio_per_second": 0.00025,
|
||||
"litellm_provider": "vertex_ai-language-models",
|
||||
"mode": "chat",
|
||||
"supports_system_messages": true,
|
||||
"supports_function_calling": true,
|
||||
"supports_tool_choice": true,
|
||||
"source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models"
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue