diff --git a/.gitignore b/.gitignore index d760ba17f4..d35923f7c3 100644 --- a/.gitignore +++ b/.gitignore @@ -77,3 +77,5 @@ litellm/proxy/_experimental/out/404.html litellm/proxy/_experimental/out/model_hub.html .mypy_cache/* litellm/proxy/application.log +tests/llm_translation/vertex_test_account.json +tests/llm_translation/test_vertex_key.json diff --git a/litellm/llms/vertex_ai/batches/handler.py b/litellm/llms/vertex_ai/batches/handler.py index 0274cd5b05..3d723d8ecf 100644 --- a/litellm/llms/vertex_ai/batches/handler.py +++ b/litellm/llms/vertex_ai/batches/handler.py @@ -10,7 +10,10 @@ from litellm.llms.custom_httpx.http_handler import ( ) from litellm.llms.vertex_ai.gemini.vertex_and_google_ai_studio_gemini import VertexLLM from litellm.types.llms.openai import Batch, CreateBatchRequest -from litellm.types.llms.vertex_ai import VertexAIBatchPredictionJob +from litellm.types.llms.vertex_ai import ( + VERTEX_CREDENTIALS_TYPES, + VertexAIBatchPredictionJob, +) from .transformation import VertexAIBatchTransformation @@ -25,7 +28,7 @@ class VertexAIBatchPrediction(VertexLLM): _is_async: bool, create_batch_data: CreateBatchRequest, api_base: Optional[str], - vertex_credentials: Optional[str], + vertex_credentials: Optional[VERTEX_CREDENTIALS_TYPES], vertex_project: Optional[str], vertex_location: Optional[str], timeout: Union[float, httpx.Timeout], @@ -130,7 +133,7 @@ class VertexAIBatchPrediction(VertexLLM): _is_async: bool, batch_id: str, api_base: Optional[str], - vertex_credentials: Optional[str], + vertex_credentials: Optional[VERTEX_CREDENTIALS_TYPES], vertex_project: Optional[str], vertex_location: Optional[str], timeout: Union[float, httpx.Timeout], diff --git a/litellm/llms/vertex_ai/files/handler.py b/litellm/llms/vertex_ai/files/handler.py index 4bae106045..266169cdfb 100644 --- a/litellm/llms/vertex_ai/files/handler.py +++ b/litellm/llms/vertex_ai/files/handler.py @@ -9,6 +9,7 @@ from litellm.integrations.gcs_bucket.gcs_bucket_base import ( ) from litellm.llms.custom_httpx.http_handler import get_async_httpx_client from litellm.types.llms.openai import CreateFileRequest, FileObject +from litellm.types.llms.vertex_ai import VERTEX_CREDENTIALS_TYPES from .transformation import VertexAIFilesTransformation @@ -34,7 +35,7 @@ class VertexAIFilesHandler(GCSBucketBase): self, create_file_data: CreateFileRequest, api_base: Optional[str], - vertex_credentials: Optional[str], + vertex_credentials: Optional[VERTEX_CREDENTIALS_TYPES], vertex_project: Optional[str], vertex_location: Optional[str], timeout: Union[float, httpx.Timeout], @@ -70,7 +71,7 @@ class VertexAIFilesHandler(GCSBucketBase): _is_async: bool, create_file_data: CreateFileRequest, api_base: Optional[str], - vertex_credentials: Optional[str], + vertex_credentials: Optional[VERTEX_CREDENTIALS_TYPES], vertex_project: Optional[str], vertex_location: Optional[str], timeout: Union[float, httpx.Timeout], diff --git a/litellm/llms/vertex_ai/fine_tuning/handler.py b/litellm/llms/vertex_ai/fine_tuning/handler.py index 8564b8cb69..3cf409c78e 100644 --- a/litellm/llms/vertex_ai/fine_tuning/handler.py +++ b/litellm/llms/vertex_ai/fine_tuning/handler.py @@ -13,6 +13,7 @@ from litellm.llms.vertex_ai.gemini.vertex_and_google_ai_studio_gemini import Ver from litellm.types.fine_tuning import OpenAIFineTuningHyperparameters from litellm.types.llms.openai import FineTuningJobCreate from litellm.types.llms.vertex_ai import ( + VERTEX_CREDENTIALS_TYPES, FineTuneHyperparameters, FineTuneJobCreate, FineTunesupervisedTuningSpec, @@ -222,7 +223,7 @@ class VertexFineTuningAPI(VertexLLM): create_fine_tuning_job_data: FineTuningJobCreate, vertex_project: Optional[str], vertex_location: Optional[str], - vertex_credentials: Optional[str], + vertex_credentials: Optional[VERTEX_CREDENTIALS_TYPES], api_base: Optional[str], timeout: Union[float, httpx.Timeout], kwargs: Optional[dict] = None, diff --git a/litellm/llms/vertex_ai/gemini/vertex_and_google_ai_studio_gemini.py b/litellm/llms/vertex_ai/gemini/vertex_and_google_ai_studio_gemini.py index dff63ce148..a87b5f3a2a 100644 --- a/litellm/llms/vertex_ai/gemini/vertex_and_google_ai_studio_gemini.py +++ b/litellm/llms/vertex_ai/gemini/vertex_and_google_ai_studio_gemini.py @@ -40,6 +40,7 @@ from litellm.types.llms.openai import ( ChatCompletionUsageBlock, ) from litellm.types.llms.vertex_ai import ( + VERTEX_CREDENTIALS_TYPES, Candidates, ContentType, FunctionCallingConfig, @@ -930,7 +931,7 @@ class VertexLLM(VertexBase): client: Optional[AsyncHTTPHandler] = None, vertex_project: Optional[str] = None, vertex_location: Optional[str] = None, - vertex_credentials: Optional[str] = None, + vertex_credentials: Optional[VERTEX_CREDENTIALS_TYPES] = None, gemini_api_key: Optional[str] = None, extra_headers: Optional[dict] = None, ) -> CustomStreamWrapper: @@ -1018,7 +1019,7 @@ class VertexLLM(VertexBase): client: Optional[AsyncHTTPHandler] = None, vertex_project: Optional[str] = None, vertex_location: Optional[str] = None, - vertex_credentials: Optional[str] = None, + vertex_credentials: Optional[VERTEX_CREDENTIALS_TYPES] = None, gemini_api_key: Optional[str] = None, extra_headers: Optional[dict] = None, ) -> Union[ModelResponse, CustomStreamWrapper]: @@ -1123,7 +1124,7 @@ class VertexLLM(VertexBase): timeout: Optional[Union[float, httpx.Timeout]], vertex_project: Optional[str], vertex_location: Optional[str], - vertex_credentials: Optional[str], + vertex_credentials: Optional[VERTEX_CREDENTIALS_TYPES], gemini_api_key: Optional[str], litellm_params: dict, logger_fn=None, diff --git a/litellm/llms/vertex_ai/image_generation/image_generation_handler.py b/litellm/llms/vertex_ai/image_generation/image_generation_handler.py index bb39fcb1ad..1d5322c08d 100644 --- a/litellm/llms/vertex_ai/image_generation/image_generation_handler.py +++ b/litellm/llms/vertex_ai/image_generation/image_generation_handler.py @@ -11,6 +11,7 @@ from litellm.llms.custom_httpx.http_handler import ( get_async_httpx_client, ) from litellm.llms.vertex_ai.gemini.vertex_and_google_ai_studio_gemini import VertexLLM +from litellm.types.llms.vertex_ai import VERTEX_CREDENTIALS_TYPES from litellm.types.utils import ImageResponse @@ -44,7 +45,7 @@ class VertexImageGeneration(VertexLLM): prompt: str, vertex_project: Optional[str], vertex_location: Optional[str], - vertex_credentials: Optional[str], + vertex_credentials: Optional[VERTEX_CREDENTIALS_TYPES], model_response: ImageResponse, logging_obj: Any, model: Optional[ @@ -139,7 +140,7 @@ class VertexImageGeneration(VertexLLM): prompt: str, vertex_project: Optional[str], vertex_location: Optional[str], - vertex_credentials: Optional[str], + vertex_credentials: Optional[VERTEX_CREDENTIALS_TYPES], model_response: litellm.ImageResponse, logging_obj: Any, model: Optional[ diff --git a/litellm/llms/vertex_ai/text_to_speech/text_to_speech_handler.py b/litellm/llms/vertex_ai/text_to_speech/text_to_speech_handler.py index 10c73e815c..18bc72db46 100644 --- a/litellm/llms/vertex_ai/text_to_speech/text_to_speech_handler.py +++ b/litellm/llms/vertex_ai/text_to_speech/text_to_speech_handler.py @@ -9,6 +9,7 @@ from litellm.llms.custom_httpx.http_handler import ( ) from litellm.llms.openai.openai import HttpxBinaryResponseContent from litellm.llms.vertex_ai.gemini.vertex_and_google_ai_studio_gemini import VertexLLM +from litellm.types.llms.vertex_ai import VERTEX_CREDENTIALS_TYPES class VertexInput(TypedDict, total=False): @@ -45,7 +46,7 @@ class VertexTextToSpeechAPI(VertexLLM): logging_obj, vertex_project: Optional[str], vertex_location: Optional[str], - vertex_credentials: Optional[str], + vertex_credentials: Optional[VERTEX_CREDENTIALS_TYPES], api_base: Optional[str], timeout: Union[float, httpx.Timeout], model: str, diff --git a/litellm/llms/vertex_ai/vertex_ai_partner_models/main.py b/litellm/llms/vertex_ai/vertex_ai_partner_models/main.py index ad52472130..fb2393631b 100644 --- a/litellm/llms/vertex_ai/vertex_ai_partner_models/main.py +++ b/litellm/llms/vertex_ai/vertex_ai_partner_models/main.py @@ -160,7 +160,8 @@ class VertexAIPartnerModels(VertexBase): url=default_api_base, ) - model = model.split("@")[0] + if "codestral" in model or "mistral" in model: + model = model.split("@")[0] if "codestral" in model and litellm_params.get("text_completion") is True: optional_params["model"] = model diff --git a/litellm/llms/vertex_ai/vertex_embeddings/embedding_handler.py b/litellm/llms/vertex_ai/vertex_embeddings/embedding_handler.py index 0f73db30a0..3ef40703e8 100644 --- a/litellm/llms/vertex_ai/vertex_embeddings/embedding_handler.py +++ b/litellm/llms/vertex_ai/vertex_embeddings/embedding_handler.py @@ -41,7 +41,7 @@ class VertexEmbedding(VertexBase): client: Optional[Union[AsyncHTTPHandler, HTTPHandler]] = None, vertex_project: Optional[str] = None, vertex_location: Optional[str] = None, - vertex_credentials: Optional[str] = None, + vertex_credentials: Optional[VERTEX_CREDENTIALS_TYPES] = None, gemini_api_key: Optional[str] = None, extra_headers: Optional[dict] = None, ) -> EmbeddingResponse: @@ -148,7 +148,7 @@ class VertexEmbedding(VertexBase): client: Optional[AsyncHTTPHandler] = None, vertex_project: Optional[str] = None, vertex_location: Optional[str] = None, - vertex_credentials: Optional[str] = None, + vertex_credentials: Optional[VERTEX_CREDENTIALS_TYPES] = None, gemini_api_key: Optional[str] = None, extra_headers: Optional[dict] = None, encoding=None, diff --git a/litellm/llms/vertex_ai/vertex_llm_base.py b/litellm/llms/vertex_ai/vertex_llm_base.py index 71346a2e01..8286cb515f 100644 --- a/litellm/llms/vertex_ai/vertex_llm_base.py +++ b/litellm/llms/vertex_ai/vertex_llm_base.py @@ -12,6 +12,7 @@ from litellm._logging import verbose_logger from litellm.litellm_core_utils.asyncify import asyncify from litellm.llms.base import BaseLLM from litellm.llms.custom_httpx.http_handler import AsyncHTTPHandler +from litellm.types.llms.vertex_ai import VERTEX_CREDENTIALS_TYPES from .common_utils import _get_gemini_url, _get_vertex_url, all_gemini_url_modes @@ -34,7 +35,7 @@ class VertexBase(BaseLLM): return vertex_region or "us-central1" def load_auth( - self, credentials: Optional[str], project_id: Optional[str] + self, credentials: Optional[VERTEX_CREDENTIALS_TYPES], project_id: Optional[str] ) -> Tuple[Any, str]: import google.auth as google_auth from google.auth import identity_pool @@ -42,29 +43,36 @@ class VertexBase(BaseLLM): Request, # type: ignore[import-untyped] ) - if credentials is not None and isinstance(credentials, str): + if credentials is not None: import google.oauth2.service_account - verbose_logger.debug( - "Vertex: Loading vertex credentials from %s", credentials - ) - verbose_logger.debug( - "Vertex: checking if credentials is a valid path, os.path.exists(%s)=%s, current dir %s", - credentials, - os.path.exists(credentials), - os.getcwd(), - ) + if isinstance(credentials, str): + verbose_logger.debug( + "Vertex: Loading vertex credentials from %s", credentials + ) + verbose_logger.debug( + "Vertex: checking if credentials is a valid path, os.path.exists(%s)=%s, current dir %s", + credentials, + os.path.exists(credentials), + os.getcwd(), + ) - try: - if os.path.exists(credentials): - json_obj = json.load(open(credentials)) - else: - json_obj = json.loads(credentials) - except Exception: - raise Exception( - "Unable to load vertex credentials from environment. Got={}".format( - credentials + try: + if os.path.exists(credentials): + json_obj = json.load(open(credentials)) + else: + json_obj = json.loads(credentials) + except Exception: + raise Exception( + "Unable to load vertex credentials from environment. Got={}".format( + credentials + ) ) + elif isinstance(credentials, dict): + json_obj = credentials + else: + raise ValueError( + "Invalid credentials type: {}".format(type(credentials)) ) # Check if the JSON object contains Workload Identity Federation configuration @@ -109,7 +117,7 @@ class VertexBase(BaseLLM): def _ensure_access_token( self, - credentials: Optional[str], + credentials: Optional[VERTEX_CREDENTIALS_TYPES], project_id: Optional[str], custom_llm_provider: Literal[ "vertex_ai", "vertex_ai_beta", "gemini" @@ -202,7 +210,7 @@ class VertexBase(BaseLLM): gemini_api_key: Optional[str], vertex_project: Optional[str], vertex_location: Optional[str], - vertex_credentials: Optional[str], + vertex_credentials: Optional[VERTEX_CREDENTIALS_TYPES], stream: Optional[bool], custom_llm_provider: Literal["vertex_ai", "vertex_ai_beta", "gemini"], api_base: Optional[str], @@ -253,7 +261,7 @@ class VertexBase(BaseLLM): async def _ensure_access_token_async( self, - credentials: Optional[str], + credentials: Optional[VERTEX_CREDENTIALS_TYPES], project_id: Optional[str], custom_llm_provider: Literal[ "vertex_ai", "vertex_ai_beta", "gemini" diff --git a/litellm/proxy/vertex_ai_endpoints/vertex_passthrough_router.py b/litellm/proxy/vertex_ai_endpoints/vertex_passthrough_router.py index fdba424765..0273a62047 100644 --- a/litellm/proxy/vertex_ai_endpoints/vertex_passthrough_router.py +++ b/litellm/proxy/vertex_ai_endpoints/vertex_passthrough_router.py @@ -6,6 +6,7 @@ from litellm._logging import verbose_proxy_logger from litellm.proxy.vertex_ai_endpoints.vertex_endpoints import ( VertexPassThroughCredentials, ) +from litellm.types.llms.vertex_ai import VERTEX_CREDENTIALS_TYPES class VertexPassThroughRouter: @@ -58,7 +59,7 @@ class VertexPassThroughRouter: self, project_id: str, location: str, - vertex_credentials: str, + vertex_credentials: VERTEX_CREDENTIALS_TYPES, ): """ Add the vertex credentials for the given project-id, location diff --git a/litellm/types/llms/vertex_ai.py b/litellm/types/llms/vertex_ai.py index d6440f3a42..7024909a34 100644 --- a/litellm/types/llms/vertex_ai.py +++ b/litellm/types/llms/vertex_ai.py @@ -481,3 +481,6 @@ class VertexBatchPredictionResponse(TypedDict, total=False): createTime: str updateTime: str modelVersionId: str + + +VERTEX_CREDENTIALS_TYPES = Union[str, Dict[str, str]] diff --git a/litellm/types/passthrough_endpoints/vertex_ai.py b/litellm/types/passthrough_endpoints/vertex_ai.py index 3933aadcd4..9087119807 100644 --- a/litellm/types/passthrough_endpoints/vertex_ai.py +++ b/litellm/types/passthrough_endpoints/vertex_ai.py @@ -6,6 +6,8 @@ from typing import Optional from pydantic import BaseModel +from ..llms.vertex_ai import VERTEX_CREDENTIALS_TYPES + class VertexPassThroughCredentials(BaseModel): # Example: vertex_project = "my-project-123" @@ -15,4 +17,4 @@ class VertexPassThroughCredentials(BaseModel): vertex_location: Optional[str] = None # Example: vertex_credentials = "/path/to/credentials.json" or "os.environ/GOOGLE_CREDS" - vertex_credentials: Optional[str] = None + vertex_credentials: Optional[VERTEX_CREDENTIALS_TYPES] = None diff --git a/litellm/types/router.py b/litellm/types/router.py index fc95bbc670..e2c92783da 100644 --- a/litellm/types/router.py +++ b/litellm/types/router.py @@ -18,6 +18,7 @@ from litellm.llms.custom_httpx.http_handler import AsyncHTTPHandler, HTTPHandler from ..exceptions import RateLimitError from .completion import CompletionRequest from .embedding import EmbeddingRequest +from .llms.vertex_ai import VERTEX_CREDENTIALS_TYPES from .utils import ModelResponse, ProviderSpecificModelInfo @@ -171,7 +172,7 @@ class GenericLiteLLMParams(BaseModel): ## VERTEX AI ## vertex_project: Optional[str] = None vertex_location: Optional[str] = None - vertex_credentials: Optional[str] = None + vertex_credentials: Optional[Union[str, dict]] = None ## AWS BEDROCK / SAGEMAKER ## aws_access_key_id: Optional[str] = None aws_secret_access_key: Optional[str] = None @@ -213,7 +214,7 @@ class GenericLiteLLMParams(BaseModel): ## VERTEX AI ## vertex_project: Optional[str] = None, vertex_location: Optional[str] = None, - vertex_credentials: Optional[str] = None, + vertex_credentials: Optional[VERTEX_CREDENTIALS_TYPES] = None, ## AWS BEDROCK / SAGEMAKER ## aws_access_key_id: Optional[str] = None, aws_secret_access_key: Optional[str] = None, diff --git a/tests/local_testing/test_amazing_vertex_completion.py b/tests/local_testing/test_amazing_vertex_completion.py index 0fd82ad7bf..02e0c9b2f1 100644 --- a/tests/local_testing/test_amazing_vertex_completion.py +++ b/tests/local_testing/test_amazing_vertex_completion.py @@ -1518,7 +1518,7 @@ async def test_gemini_pro_json_schema_args_sent_httpx( ) elif resp is not None: - assert resp.model == model.split("/")[1].split("@")[0] + assert resp.model == model.split("/")[1] @pytest.mark.parametrize( @@ -2740,7 +2740,7 @@ async def test_partner_models_httpx_ai21(): "total_tokens": 194, }, "meta": {"requestDurationMillis": 501}, - "model": "jamba-1.5", + "model": "jamba-1.5-mini@001", } mock_response.json = return_val @@ -2769,7 +2769,7 @@ async def test_partner_models_httpx_ai21(): kwargs["data"] = json.loads(kwargs["data"]) assert kwargs["data"] == { - "model": "jamba-1.5-mini", + "model": "jamba-1.5-mini@001", "messages": [ { "role": "system", @@ -3222,3 +3222,67 @@ def test_vertexai_code_gecko(): for chunk in response: print(chunk) + + +def vertex_ai_anthropic_thinking_mock_response(*args, **kwargs): + mock_response = MagicMock() + mock_response.status_code = 200 + mock_response.headers = {"Content-Type": "application/json"} + mock_response.json.return_value = { + "id": "msg_vrtx_011pL6Np3MKxXL3R8theMRJW", + "type": "message", + "role": "assistant", + "model": "claude-3-7-sonnet-20250219", + "content": [ + { + "type": "thinking", + "thinking": 'This is a very simple and common greeting in programming and computing. "Hello, world!" is often the first program people write when learning a new programming language, where they create a program that outputs this phrase.\n\nI should respond in a friendly way and acknowledge this greeting. I can keep it simple and welcoming.', + "signature": "EugBCkYQAhgCIkAqCkezmsp8DG9Jjoc/CD7yXavPXVvP4TAuwjc/ZgHRIgroz5FzAYxic3CnNiW5w2fx/4+1f4ZYVxWJVLmrEA46EgwFsxbpN2jxMxjIzy0aDIAbMy9rW6B5lGVETCIw4r2UW0A7m5Df991SMSMPvHU9VdL8p9S/F2wajLnLVpl5tH89csm4NqnMpxnou61yKlCLldFGIto1Kvit5W1jqn2gx2dGIOyR4YaJ0c8AIFfQa5TIXf+EChVDzhPKLWZ8D/Q3gCGxBx+m/4dLI8HMZA8Ob3iCMI23eBKmh62FCWJGuA==", + }, + { + "type": "text", + "text": "Hi there! 👋 \n\nIt's nice to meet you! \"Hello, world!\" is such a classic phrase in computing - it's often the first output from someone's very first program.\n\nHow are you doing today? Is there something specific I can help you with?", + }, + ], + "stop_reason": "end_turn", + "stop_sequence": None, + "usage": { + "input_tokens": 39, + "cache_creation_input_tokens": 0, + "cache_read_input_tokens": 0, + "output_tokens": 134, + }, + } + + return mock_response + + +def test_vertex_anthropic_completion(): + from litellm import completion + from litellm.llms.custom_httpx.http_handler import HTTPHandler + + client = HTTPHandler() + + load_vertex_ai_credentials() + + with patch.object( + client, "post", side_effect=vertex_ai_anthropic_thinking_mock_response + ): + response = completion( + model="vertex_ai/claude-3-7-sonnet@20250219", + messages=[{"role": "user", "content": "Hello, world!"}], + vertex_ai_location="us-east5", + vertex_ai_project="test-project", + thinking={"type": "enabled", "budget_tokens": 1024}, + client=client, + ) + print(response) + assert response.model == "claude-3-7-sonnet@20250219" + assert response._hidden_params["response_cost"] is not None + assert response._hidden_params["response_cost"] > 0 + + assert response.choices[0].message.reasoning_content is not None + assert isinstance(response.choices[0].message.reasoning_content, str) + assert response.choices[0].message.thinking_blocks is not None + assert isinstance(response.choices[0].message.thinking_blocks, list) + assert len(response.choices[0].message.thinking_blocks) > 0