From b9e1d6035799d6be25bdaada460bae9c57c47c00 Mon Sep 17 00:00:00 2001 From: Ishaan Jaff Date: Thu, 8 Aug 2024 15:32:07 -0700 Subject: [PATCH] add native cachedContents endpoint --- litellm/llms/fine_tuning_apis/vertex_ai.py | 2 ++ .../vertex_ai_endpoints/vertex_endpoints.py | 27 +++++++++++++++++++ 2 files changed, 29 insertions(+) diff --git a/litellm/llms/fine_tuning_apis/vertex_ai.py b/litellm/llms/fine_tuning_apis/vertex_ai.py index 5f96f04831..beebb0d565 100644 --- a/litellm/llms/fine_tuning_apis/vertex_ai.py +++ b/litellm/llms/fine_tuning_apis/vertex_ai.py @@ -278,6 +278,8 @@ class VertexFineTuningAPI(VertexLLM): url = f"https://{vertex_location}-aiplatform.googleapis.com/v1/projects/{vertex_project}/locations/{vertex_location}{request_route}" elif "countTokens" in request_route: url = f"https://{vertex_location}-aiplatform.googleapis.com/v1/projects/{vertex_project}/locations/{vertex_location}{request_route}" + elif "cachedContents" in request_route: + url = f"https://{vertex_location}-aiplatform.googleapis.com/v1beta1/projects/{vertex_project}/locations/{vertex_location}{request_route}" else: raise ValueError(f"Unsupported Vertex AI request route: {request_route}") if self.async_handler is None: diff --git a/litellm/proxy/vertex_ai_endpoints/vertex_endpoints.py b/litellm/proxy/vertex_ai_endpoints/vertex_endpoints.py index b8c04583c3..1bfb1c2a09 100644 --- a/litellm/proxy/vertex_ai_endpoints/vertex_endpoints.py +++ b/litellm/proxy/vertex_ai_endpoints/vertex_endpoints.py @@ -303,3 +303,30 @@ async def vertex_cancel_fine_tuning_job( return response except Exception as e: raise exception_handler(e) from e + + +@router.post( + "/vertex-ai/cachedContents", + dependencies=[Depends(user_api_key_auth)], + tags=["Vertex AI endpoints"], +) +async def vertex_create_add_cached_content( + request: Request, + fastapi_response: Response, + user_api_key_dict: UserAPIKeyAuth = Depends(user_api_key_auth), +): + """ + this is a pass through endpoint for the Vertex AI API. /cachedContents endpoint + + Vertex API Reference: https://cloud.google.com/vertex-ai/generative-ai/docs/context-cache/context-cache-create#create-context-cache-sample-drest + + it uses the vertex ai credentials on the proxy and forwards to vertex ai api + """ + try: + response = await execute_post_vertex_ai_request( + request=request, + route="/cachedContents", + ) + return response + except Exception as e: + raise exception_handler(e) from e