From 19474c3f0fc9d1e0d9328736657a66cf7994f71f Mon Sep 17 00:00:00 2001 From: Ishaan Jaff Date: Sat, 3 Aug 2024 11:46:39 -0700 Subject: [PATCH 01/15] support v1/projects/tuningJobs --- .../proxy/fine_tuning_endpoints/endpoints.py | 69 +++++++++++++++++++ 1 file changed, 69 insertions(+) diff --git a/litellm/proxy/fine_tuning_endpoints/endpoints.py b/litellm/proxy/fine_tuning_endpoints/endpoints.py index cda226b5a..c2d89dd25 100644 --- a/litellm/proxy/fine_tuning_endpoints/endpoints.py +++ b/litellm/proxy/fine_tuning_endpoints/endpoints.py @@ -429,3 +429,72 @@ async def retrieve_fine_tuning_job( param=getattr(e, "param", "None"), code=getattr(e, "status_code", 500), ) + + +@router.post( + "/v1/projects/tuningJobs", + dependencies=[Depends(user_api_key_auth)], + tags=["fine-tuning"], + summary="✨ (Enterprise) Create Fine-Tuning Jobs", +) +async def vertex_create_fine_tuning_job( + request: Request, + fastapi_response: Response, + user_api_key_dict: UserAPIKeyAuth = Depends(user_api_key_auth), +): + """ + this is a pass through endpoint for the Vertex AI API. /tuningJobs endpoint + + it uses the vertex ai credentials on the proxy and forwards to vertex ai api + """ + try: + from litellm.fine_tuning.main import vertex_fine_tuning_apis_instance + from litellm.proxy.proxy_server import ( + add_litellm_data_to_request, + general_settings, + get_custom_headers, + premium_user, + proxy_config, + proxy_logging_obj, + version, + ) + + # get configs for custom_llm_provider + llm_provider_config = get_fine_tuning_provider_config( + custom_llm_provider="vertex_ai" + ) + + vertex_project = llm_provider_config.get("vertex_project", None) + vertex_location = llm_provider_config.get("vertex_location", None) + vertex_credentials = llm_provider_config.get("vertex_credentials", None) + request_data_json = await request.json() + response = await vertex_fine_tuning_apis_instance.pass_through_vertex_ai_fine_tuning_job( + request_data=request_data_json, + vertex_project=vertex_project, + vertex_location=vertex_location, + vertex_credentials=vertex_credentials, + ) + + return response + except Exception as e: + verbose_proxy_logger.error( + "litellm.proxy.proxy_server.v1/projects/tuningJobs(): Exception occurred - {}".format( + str(e) + ) + ) + verbose_proxy_logger.debug(traceback.format_exc()) + if isinstance(e, HTTPException): + raise ProxyException( + message=getattr(e, "message", str(e.detail)), + type=getattr(e, "type", "None"), + param=getattr(e, "param", "None"), + code=getattr(e, "status_code", status.HTTP_400_BAD_REQUEST), + ) + else: + error_msg = f"{str(e)}" + raise ProxyException( + message=getattr(e, "message", error_msg), + type=getattr(e, "type", "None"), + param=getattr(e, "param", "None"), + code=getattr(e, "status_code", 500), + ) From 2e596c6c5b0ec8446745339f3c6a7351f727ad2f Mon Sep 17 00:00:00 2001 From: Ishaan Jaff Date: Sat, 3 Aug 2024 11:47:43 -0700 Subject: [PATCH 02/15] add support for pass through vertex ai ft jobs --- litellm/llms/fine_tuning_apis/vertex_ai.py | 42 ++++++++++++++++++++++ 1 file changed, 42 insertions(+) diff --git a/litellm/llms/fine_tuning_apis/vertex_ai.py b/litellm/llms/fine_tuning_apis/vertex_ai.py index f370652d2..618894245 100644 --- a/litellm/llms/fine_tuning_apis/vertex_ai.py +++ b/litellm/llms/fine_tuning_apis/vertex_ai.py @@ -240,3 +240,45 @@ class VertexFineTuningAPI(VertexLLM): vertex_response ) return open_ai_response + + async def pass_through_vertex_ai_fine_tuning_job( + self, + request_data: dict, + vertex_project: str, + vertex_location: str, + vertex_credentials: str, + ): + auth_header, _ = self._get_token_and_url( + model="", + gemini_api_key=None, + vertex_credentials=vertex_credentials, + vertex_project=vertex_project, + vertex_location=vertex_location, + stream=False, + custom_llm_provider="vertex_ai_beta", + api_base="", + ) + + headers = { + "Authorization": f"Bearer {auth_header}", + "Content-Type": "application/json", + } + + fine_tuning_url = f"https://{vertex_location}-aiplatform.googleapis.com/v1/projects/{vertex_project}/locations/{vertex_location}/tuningJobs" + + if self.async_handler is None: + raise ValueError("VertexAI Fine Tuning - async_handler is not initialized") + + response = await self.async_handler.post( + headers=headers, + url=fine_tuning_url, + json=request_data, # type: ignore + ) + + if response.status_code != 200: + raise Exception( + f"Error creating fine tuning job. Status code: {response.status_code}. Response: {response.text}" + ) + + response_json = response.json() + return response_json From af91740fbb0e3c9a9eede61d0303861cd9c811e8 Mon Sep 17 00:00:00 2001 From: Ishaan Jaff Date: Sat, 3 Aug 2024 11:56:43 -0700 Subject: [PATCH 03/15] docs native vertex ft endpoint --- docs/my-website/docs/fine_tuning.md | 24 +++++++++++++++++++++++- 1 file changed, 23 insertions(+), 1 deletion(-) diff --git a/docs/my-website/docs/fine_tuning.md b/docs/my-website/docs/fine_tuning.md index c69f4c1e6..fd3cbc792 100644 --- a/docs/my-website/docs/fine_tuning.md +++ b/docs/my-website/docs/fine_tuning.md @@ -124,7 +124,7 @@ ft_job = await client.fine_tuning.jobs.create( ``` - + ```shell curl http://localhost:4000/v1/fine_tuning/jobs \ @@ -136,6 +136,28 @@ curl http://localhost:4000/v1/fine_tuning/jobs \ "training_file": "gs://cloud-samples-data/ai-platform/generative_ai/sft_train_data.jsonl" }' ``` + + + + +:::info + +Use this to create Fine tuning Jobs in [the Vertex AI API Format](https://cloud.google.com/vertex-ai/generative-ai/docs/model-reference/tuning#create-tuning) + +::: + +```shell +curl http://localhost:4000/v1/projects/tuningJobs \ + -H "Content-Type: application/json" \ + -H "Authorization: Bearer sk-1234" \ + -d '{ + "baseModel": "gemini-1.0-pro-002", + "supervisedTuningSpec" : { + "training_dataset_uri": "gs://cloud-samples-data/ai-platform/generative_ai/sft_train_data.jsonl" + } +}' +``` + From af04bbeccaf713a4fe6beca531b1596737456f3c Mon Sep 17 00:00:00 2001 From: Ishaan Jaff Date: Sat, 3 Aug 2024 16:52:03 -0700 Subject: [PATCH 04/15] set native vertex endpoints --- litellm/proxy/proxy_server.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/litellm/proxy/proxy_server.py b/litellm/proxy/proxy_server.py index 0f57a5fd1..83126b954 100644 --- a/litellm/proxy/proxy_server.py +++ b/litellm/proxy/proxy_server.py @@ -213,6 +213,8 @@ from litellm.proxy.utils import ( send_email, update_spend, ) +from litellm.proxy.vertex_ai_endpoints.vertex_endpoints import router as vertex_router +from litellm.proxy.vertex_ai_endpoints.vertex_endpoints import set_default_vertex_config from litellm.router import ( AssistantsTypedDict, Deployment, @@ -1818,6 +1820,10 @@ class ProxyConfig: files_config = config.get("files_settings", None) set_files_config(config=files_config) + ## default config for vertex ai routes + default_vertex_config = config.get("default_vertex_config", None) + set_default_vertex_config(config=default_vertex_config) + ## ROUTER SETTINGS (e.g. routing_strategy, ...) router_settings = config.get("router_settings", None) if router_settings and isinstance(router_settings, dict): @@ -9631,6 +9637,7 @@ def cleanup_router_config_variables(): app.include_router(router) app.include_router(fine_tuning_router) +app.include_router(vertex_router) app.include_router(health_router) app.include_router(key_management_router) app.include_router(internal_user_router) From 64c008045f2c0bd1812dcd85a7585b06bf0dfdd9 Mon Sep 17 00:00:00 2001 From: Ishaan Jaff Date: Sat, 3 Aug 2024 16:52:43 -0700 Subject: [PATCH 05/15] use native endpoints --- litellm/llms/fine_tuning_apis/vertex_ai.py | 9 +- .../proxy/fine_tuning_endpoints/endpoints.py | 69 ---------- litellm/proxy/proxy_config.yaml | 5 + .../vertex_ai_endpoints/vertex_endpoints.py | 120 ++++++++++++++++++ 4 files changed, 131 insertions(+), 72 deletions(-) create mode 100644 litellm/proxy/vertex_ai_endpoints/vertex_endpoints.py diff --git a/litellm/llms/fine_tuning_apis/vertex_ai.py b/litellm/llms/fine_tuning_apis/vertex_ai.py index 618894245..189ace11a 100644 --- a/litellm/llms/fine_tuning_apis/vertex_ai.py +++ b/litellm/llms/fine_tuning_apis/vertex_ai.py @@ -241,12 +241,13 @@ class VertexFineTuningAPI(VertexLLM): ) return open_ai_response - async def pass_through_vertex_ai_fine_tuning_job( + async def pass_through_vertex_ai_POST_request( self, request_data: dict, vertex_project: str, vertex_location: str, vertex_credentials: str, + request_route: str, ): auth_header, _ = self._get_token_and_url( model="", @@ -264,14 +265,16 @@ class VertexFineTuningAPI(VertexLLM): "Content-Type": "application/json", } - fine_tuning_url = f"https://{vertex_location}-aiplatform.googleapis.com/v1/projects/{vertex_project}/locations/{vertex_location}/tuningJobs" + url = None + if request_route == "tuningJobs": + url = f"https://{vertex_location}-aiplatform.googleapis.com/v1/projects/{vertex_project}/locations/{vertex_location}/tuningJobs" if self.async_handler is None: raise ValueError("VertexAI Fine Tuning - async_handler is not initialized") response = await self.async_handler.post( headers=headers, - url=fine_tuning_url, + url=url, json=request_data, # type: ignore ) diff --git a/litellm/proxy/fine_tuning_endpoints/endpoints.py b/litellm/proxy/fine_tuning_endpoints/endpoints.py index c2d89dd25..cda226b5a 100644 --- a/litellm/proxy/fine_tuning_endpoints/endpoints.py +++ b/litellm/proxy/fine_tuning_endpoints/endpoints.py @@ -429,72 +429,3 @@ async def retrieve_fine_tuning_job( param=getattr(e, "param", "None"), code=getattr(e, "status_code", 500), ) - - -@router.post( - "/v1/projects/tuningJobs", - dependencies=[Depends(user_api_key_auth)], - tags=["fine-tuning"], - summary="✨ (Enterprise) Create Fine-Tuning Jobs", -) -async def vertex_create_fine_tuning_job( - request: Request, - fastapi_response: Response, - user_api_key_dict: UserAPIKeyAuth = Depends(user_api_key_auth), -): - """ - this is a pass through endpoint for the Vertex AI API. /tuningJobs endpoint - - it uses the vertex ai credentials on the proxy and forwards to vertex ai api - """ - try: - from litellm.fine_tuning.main import vertex_fine_tuning_apis_instance - from litellm.proxy.proxy_server import ( - add_litellm_data_to_request, - general_settings, - get_custom_headers, - premium_user, - proxy_config, - proxy_logging_obj, - version, - ) - - # get configs for custom_llm_provider - llm_provider_config = get_fine_tuning_provider_config( - custom_llm_provider="vertex_ai" - ) - - vertex_project = llm_provider_config.get("vertex_project", None) - vertex_location = llm_provider_config.get("vertex_location", None) - vertex_credentials = llm_provider_config.get("vertex_credentials", None) - request_data_json = await request.json() - response = await vertex_fine_tuning_apis_instance.pass_through_vertex_ai_fine_tuning_job( - request_data=request_data_json, - vertex_project=vertex_project, - vertex_location=vertex_location, - vertex_credentials=vertex_credentials, - ) - - return response - except Exception as e: - verbose_proxy_logger.error( - "litellm.proxy.proxy_server.v1/projects/tuningJobs(): Exception occurred - {}".format( - str(e) - ) - ) - verbose_proxy_logger.debug(traceback.format_exc()) - if isinstance(e, HTTPException): - raise ProxyException( - message=getattr(e, "message", str(e.detail)), - type=getattr(e, "type", "None"), - param=getattr(e, "param", "None"), - code=getattr(e, "status_code", status.HTTP_400_BAD_REQUEST), - ) - else: - error_msg = f"{str(e)}" - raise ProxyException( - message=getattr(e, "message", error_msg), - type=getattr(e, "type", "None"), - param=getattr(e, "param", "None"), - code=getattr(e, "status_code", 500), - ) diff --git a/litellm/proxy/proxy_config.yaml b/litellm/proxy/proxy_config.yaml index aa2bfc525..0750a3937 100644 --- a/litellm/proxy/proxy_config.yaml +++ b/litellm/proxy/proxy_config.yaml @@ -48,6 +48,11 @@ files_settings: - custom_llm_provider: openai api_key: os.environ/OPENAI_API_KEY +default_vertex_config: + vertex_project: "adroit-crow-413218" + vertex_location: "us-central1" + vertex_credentials: "/Users/ishaanjaffer/Downloads/adroit-crow-413218-a956eef1a2a8.json" + general_settings: diff --git a/litellm/proxy/vertex_ai_endpoints/vertex_endpoints.py b/litellm/proxy/vertex_ai_endpoints/vertex_endpoints.py new file mode 100644 index 000000000..be09a4932 --- /dev/null +++ b/litellm/proxy/vertex_ai_endpoints/vertex_endpoints.py @@ -0,0 +1,120 @@ +import asyncio +import traceback +from datetime import datetime, timedelta, timezone +from typing import List, Optional + +import fastapi +import httpx +from fastapi import ( + APIRouter, + Depends, + File, + Form, + Header, + HTTPException, + Request, + Response, + UploadFile, + status, +) + +import litellm +from litellm._logging import verbose_proxy_logger +from litellm.batches.main import FileObject +from litellm.fine_tuning.main import vertex_fine_tuning_apis_instance +from litellm.proxy._types import * +from litellm.proxy.auth.user_api_key_auth import user_api_key_auth + +router = APIRouter() +default_vertex_config = None + + +def set_default_vertex_config(config): + global default_vertex_config + if config is None: + return + + if not isinstance(config, list): + raise ValueError("invalid files config, expected a list is not a list") + + for element in config: + if isinstance(element, dict): + for key, value in element.items(): + if isinstance(value, str) and value.startswith("os.environ/"): + element[key] = litellm.get_secret(value) + + default_vertex_config = config + + +def exception_handler(e: Exception): + verbose_proxy_logger.error( + "litellm.proxy.proxy_server.v1/projects/tuningJobs(): Exception occurred - {}".format( + str(e) + ) + ) + verbose_proxy_logger.debug(traceback.format_exc()) + if isinstance(e, HTTPException): + return ProxyException( + message=getattr(e, "message", str(e.detail)), + type=getattr(e, "type", "None"), + param=getattr(e, "param", "None"), + code=getattr(e, "status_code", status.HTTP_400_BAD_REQUEST), + ) + else: + error_msg = f"{str(e)}" + return ProxyException( + message=getattr(e, "message", error_msg), + type=getattr(e, "type", "None"), + param=getattr(e, "param", "None"), + code=getattr(e, "status_code", 500), + ) + + +async def execute_post_vertex_ai_request( + request: Request, + route: str, +): + from litellm.fine_tuning.main import vertex_fine_tuning_apis_instance + + vertex_project = default_vertex_config.get("vertex_project", None) + vertex_location = default_vertex_config.get("vertex_location", None) + vertex_credentials = default_vertex_config.get("vertex_credentials", None) + request_data_json = await request.json() + + response = ( + await vertex_fine_tuning_apis_instance.pass_through_vertex_ai_POST_request( + request_data=request_data_json, + vertex_project=vertex_project, + vertex_location=vertex_location, + vertex_credentials=vertex_credentials, + request_route=route, + ) + ) + + return response + + +@router.post( + "/vertex-ai/tuningJobs", + dependencies=[Depends(user_api_key_auth)], + tags=["Vertex AI endpoints"], +) +async def vertex_create_fine_tuning_job( + request: Request, + fastapi_response: Response, + endpoint_name: str, + user_api_key_dict: UserAPIKeyAuth = Depends(user_api_key_auth), +): + """ + this is a pass through endpoint for the Vertex AI API. /tuningJobs endpoint + + it uses the vertex ai credentials on the proxy and forwards to vertex ai api + """ + try: + response = await execute_post_vertex_ai_request( + request=request, + route="/tuningJobs", + ) + return response + except Exception as e: + raise exception_handler(e) from e From 8051781af015bbbfd2c52a80067bd7078cef8358 Mon Sep 17 00:00:00 2001 From: Ishaan Jaff Date: Sat, 3 Aug 2024 16:56:33 -0700 Subject: [PATCH 06/15] working code for vertex ai routes --- litellm/llms/fine_tuning_apis/vertex_ai.py | 2 +- .../proxy/vertex_ai_endpoints/vertex_endpoints.py | 14 ++++++-------- 2 files changed, 7 insertions(+), 9 deletions(-) diff --git a/litellm/llms/fine_tuning_apis/vertex_ai.py b/litellm/llms/fine_tuning_apis/vertex_ai.py index 189ace11a..cb238b04b 100644 --- a/litellm/llms/fine_tuning_apis/vertex_ai.py +++ b/litellm/llms/fine_tuning_apis/vertex_ai.py @@ -266,7 +266,7 @@ class VertexFineTuningAPI(VertexLLM): } url = None - if request_route == "tuningJobs": + if request_route == "/tuningJobs": url = f"https://{vertex_location}-aiplatform.googleapis.com/v1/projects/{vertex_project}/locations/{vertex_location}/tuningJobs" if self.async_handler is None: diff --git a/litellm/proxy/vertex_ai_endpoints/vertex_endpoints.py b/litellm/proxy/vertex_ai_endpoints/vertex_endpoints.py index be09a4932..659459cec 100644 --- a/litellm/proxy/vertex_ai_endpoints/vertex_endpoints.py +++ b/litellm/proxy/vertex_ai_endpoints/vertex_endpoints.py @@ -34,14 +34,13 @@ def set_default_vertex_config(config): if config is None: return - if not isinstance(config, list): - raise ValueError("invalid files config, expected a list is not a list") + if not isinstance(config, dict): + raise ValueError("invalid config, vertex default config must be a dictionary") - for element in config: - if isinstance(element, dict): - for key, value in element.items(): - if isinstance(value, str) and value.startswith("os.environ/"): - element[key] = litellm.get_secret(value) + if isinstance(config, dict): + for key, value in config.items(): + if isinstance(value, str) and value.startswith("os.environ/"): + config[key] = litellm.get_secret(value) default_vertex_config = config @@ -102,7 +101,6 @@ async def execute_post_vertex_ai_request( async def vertex_create_fine_tuning_job( request: Request, fastapi_response: Response, - endpoint_name: str, user_api_key_dict: UserAPIKeyAuth = Depends(user_api_key_auth), ): """ From c98733863afed9154be4d91ccff6434ec8297e56 Mon Sep 17 00:00:00 2001 From: Ishaan Jaff Date: Sat, 3 Aug 2024 17:17:54 -0700 Subject: [PATCH 07/15] add vertex generateContent --- litellm/llms/fine_tuning_apis/vertex_ai.py | 4 + .../vertex_ai_endpoints/vertex_endpoints.py | 83 ++++++++++++++++++- 2 files changed, 86 insertions(+), 1 deletion(-) diff --git a/litellm/llms/fine_tuning_apis/vertex_ai.py b/litellm/llms/fine_tuning_apis/vertex_ai.py index cb238b04b..2d3e8d1c3 100644 --- a/litellm/llms/fine_tuning_apis/vertex_ai.py +++ b/litellm/llms/fine_tuning_apis/vertex_ai.py @@ -268,6 +268,10 @@ class VertexFineTuningAPI(VertexLLM): url = None if request_route == "/tuningJobs": url = f"https://{vertex_location}-aiplatform.googleapis.com/v1/projects/{vertex_project}/locations/{vertex_location}/tuningJobs" + elif "/tuningJobs/" in request_route and "cancel" in request_route: + url = f"https://{vertex_location}-aiplatform.googleapis.com/v1/projects/{vertex_project}/locations/{vertex_location}/tuningJobs{request_route}" + elif "generateContent" in request_route: + url = f"https://{vertex_location}-aiplatform.googleapis.com/v1/projects/{vertex_project}/locations/{vertex_location}{request_route}" if self.async_handler is None: raise ValueError("VertexAI Fine Tuning - async_handler is not initialized") diff --git a/litellm/proxy/vertex_ai_endpoints/vertex_endpoints.py b/litellm/proxy/vertex_ai_endpoints/vertex_endpoints.py index 659459cec..7ef552508 100644 --- a/litellm/proxy/vertex_ai_endpoints/vertex_endpoints.py +++ b/litellm/proxy/vertex_ai_endpoints/vertex_endpoints.py @@ -1,3 +1,4 @@ +import ast import asyncio import traceback from datetime import datetime, timedelta, timezone @@ -78,7 +79,21 @@ async def execute_post_vertex_ai_request( vertex_project = default_vertex_config.get("vertex_project", None) vertex_location = default_vertex_config.get("vertex_location", None) vertex_credentials = default_vertex_config.get("vertex_credentials", None) - request_data_json = await request.json() + + request_data_json = {} + body = await request.body() + body_str = body.decode() + if len(body_str) > 0: + try: + request_data_json = ast.literal_eval(body_str) + except: + request_data_json = json.loads(body_str) + + verbose_proxy_logger.debug( + "Request received by LiteLLM:\n{}".format( + json.dumps(request_data_json, indent=4) + ), + ) response = ( await vertex_fine_tuning_apis_instance.pass_through_vertex_ai_POST_request( @@ -93,6 +108,41 @@ async def execute_post_vertex_ai_request( return response +@router.post( + "/vertex-ai/publishers/google/models/{model_id:path}:generateContent", + dependencies=[Depends(user_api_key_auth)], + tags=["Vertex AI endpoints"], +) +async def vertex_generate_content( + request: Request, + fastapi_response: Response, + model_id: str, + user_api_key_dict: UserAPIKeyAuth = Depends(user_api_key_auth), +): + """ + this is a pass through endpoint for the Vertex AI API. /generateContent endpoint + + Example Curl: + ``` + curl http://localhost:4000/vertex-ai/publishers/google/models/gemini-1.5-flash-001:generateContent \ + -H "Content-Type: application/json" \ + -H "Authorization: Bearer sk-1234" \ + -d '{"contents":[{"role": "user", "parts":[{"text": "hi"}]}]}' + ``` + + Vertex API Reference: https://cloud.google.com/vertex-ai/generative-ai/docs/model-reference/inference#rest + it uses the vertex ai credentials on the proxy and forwards to vertex ai api + """ + try: + response = await execute_post_vertex_ai_request( + request=request, + route=f"/publishers/google/models/{model_id}:generateContent", + ) + return response + except Exception as e: + raise exception_handler(e) from e + + @router.post( "/vertex-ai/tuningJobs", dependencies=[Depends(user_api_key_auth)], @@ -106,6 +156,8 @@ async def vertex_create_fine_tuning_job( """ this is a pass through endpoint for the Vertex AI API. /tuningJobs endpoint + Vertex API Reference: https://cloud.google.com/vertex-ai/generative-ai/docs/model-reference/tuning + it uses the vertex ai credentials on the proxy and forwards to vertex ai api """ try: @@ -116,3 +168,32 @@ async def vertex_create_fine_tuning_job( return response except Exception as e: raise exception_handler(e) from e + + +@router.post( + "/vertex-ai/tuningJobs/{job_id:path}:cancel", + dependencies=[Depends(user_api_key_auth)], + tags=["Vertex AI endpoints"], +) +async def vertex_cancel_fine_tuning_job( + request: Request, + job_id: str, + fastapi_response: Response, + user_api_key_dict: UserAPIKeyAuth = Depends(user_api_key_auth), +): + """ + this is a pass through endpoint for the Vertex AI API. tuningJobs/{job_id:path}:cancel + + Vertex API Reference: https://cloud.google.com/vertex-ai/generative-ai/docs/model-reference/tuning#cancel_a_tuning_job + + it uses the vertex ai credentials on the proxy and forwards to vertex ai api + """ + try: + + response = await execute_post_vertex_ai_request( + request=request, + route=f"/tuningJobs/{job_id}:cancel", + ) + return response + except Exception as e: + raise exception_handler(e) from e From 2d5c57e5457c09ed902dc18914a65953da87983a Mon Sep 17 00:00:00 2001 From: Ishaan Jaff Date: Sat, 3 Aug 2024 17:26:49 -0700 Subject: [PATCH 08/15] add vertex embeddings endpoints --- litellm/llms/fine_tuning_apis/vertex_ai.py | 3 +- .../vertex_ai_endpoints/vertex_endpoints.py | 39 +++++++++++++++++++ 2 files changed, 41 insertions(+), 1 deletion(-) diff --git a/litellm/llms/fine_tuning_apis/vertex_ai.py b/litellm/llms/fine_tuning_apis/vertex_ai.py index 2d3e8d1c3..f964e7bba 100644 --- a/litellm/llms/fine_tuning_apis/vertex_ai.py +++ b/litellm/llms/fine_tuning_apis/vertex_ai.py @@ -272,7 +272,8 @@ class VertexFineTuningAPI(VertexLLM): url = f"https://{vertex_location}-aiplatform.googleapis.com/v1/projects/{vertex_project}/locations/{vertex_location}/tuningJobs{request_route}" elif "generateContent" in request_route: url = f"https://{vertex_location}-aiplatform.googleapis.com/v1/projects/{vertex_project}/locations/{vertex_location}{request_route}" - + elif "predict" in request_route: + url = f"https://{vertex_location}-aiplatform.googleapis.com/v1/projects/{vertex_project}/locations/{vertex_location}{request_route}" if self.async_handler is None: raise ValueError("VertexAI Fine Tuning - async_handler is not initialized") diff --git a/litellm/proxy/vertex_ai_endpoints/vertex_endpoints.py b/litellm/proxy/vertex_ai_endpoints/vertex_endpoints.py index 7ef552508..da63aa025 100644 --- a/litellm/proxy/vertex_ai_endpoints/vertex_endpoints.py +++ b/litellm/proxy/vertex_ai_endpoints/vertex_endpoints.py @@ -143,6 +143,45 @@ async def vertex_generate_content( raise exception_handler(e) from e +@router.post( + "/vertex-ai/publishers/google/models/{model_id:path}:predict", + dependencies=[Depends(user_api_key_auth)], + tags=["Vertex AI endpoints"], +) +async def vertex_predict_endpoint( + request: Request, + fastapi_response: Response, + model_id: str, + user_api_key_dict: UserAPIKeyAuth = Depends(user_api_key_auth), +): + """ + this is a pass through endpoint for the Vertex AI API. /predict endpoint + Use this for: + - Embeddings API - Text Embedding, Multi Modal Embedding + - Imagen API + - Code Completion API + + Example Curl: + ``` + curl http://localhost:4000/vertex-ai/publishers/google/models/textembedding-gecko@001:predict \ + -H "Content-Type: application/json" \ + -H "Authorization: Bearer sk-1234" \ + -d '{"instances":[{"content": "gm"}]}' + ``` + + Vertex API Reference: https://cloud.google.com/vertex-ai/generative-ai/docs/model-reference/text-embeddings-api#generative-ai-get-text-embedding-drest + it uses the vertex ai credentials on the proxy and forwards to vertex ai api + """ + try: + response = await execute_post_vertex_ai_request( + request=request, + route=f"/publishers/google/models/{model_id}:predict", + ) + return response + except Exception as e: + raise exception_handler(e) from e + + @router.post( "/vertex-ai/tuningJobs", dependencies=[Depends(user_api_key_auth)], From c8438715af437a336573808ee913660d0dbad130 Mon Sep 17 00:00:00 2001 From: Ishaan Jaff Date: Sat, 3 Aug 2024 17:34:10 -0700 Subject: [PATCH 09/15] add vertex ai countTokens endpoint --- litellm/llms/fine_tuning_apis/vertex_ai.py | 5 ++ .../vertex_ai_endpoints/vertex_endpoints.py | 63 +++++++++++++++++++ 2 files changed, 68 insertions(+) diff --git a/litellm/llms/fine_tuning_apis/vertex_ai.py b/litellm/llms/fine_tuning_apis/vertex_ai.py index f964e7bba..c24deca94 100644 --- a/litellm/llms/fine_tuning_apis/vertex_ai.py +++ b/litellm/llms/fine_tuning_apis/vertex_ai.py @@ -274,6 +274,11 @@ class VertexFineTuningAPI(VertexLLM): url = f"https://{vertex_location}-aiplatform.googleapis.com/v1/projects/{vertex_project}/locations/{vertex_location}{request_route}" elif "predict" in request_route: url = f"https://{vertex_location}-aiplatform.googleapis.com/v1/projects/{vertex_project}/locations/{vertex_location}{request_route}" + elif "/batchPredictionJobs" in request_route: + url = f"https://{vertex_location}-aiplatform.googleapis.com/v1/projects/{vertex_project}/locations/{vertex_location}{request_route}" + elif "countTokens" in request_route: + url = f"https://{vertex_location}-aiplatform.googleapis.com/v1/projects/{vertex_project}/locations/{vertex_location}{request_route}" + if self.async_handler is None: raise ValueError("VertexAI Fine Tuning - async_handler is not initialized") diff --git a/litellm/proxy/vertex_ai_endpoints/vertex_endpoints.py b/litellm/proxy/vertex_ai_endpoints/vertex_endpoints.py index da63aa025..c7ce354b2 100644 --- a/litellm/proxy/vertex_ai_endpoints/vertex_endpoints.py +++ b/litellm/proxy/vertex_ai_endpoints/vertex_endpoints.py @@ -182,6 +182,69 @@ async def vertex_predict_endpoint( raise exception_handler(e) from e +@router.post( + "/vertex-ai/publishers/google/models/{model_id:path}:countTokens", + dependencies=[Depends(user_api_key_auth)], + tags=["Vertex AI endpoints"], +) +async def vertex_countTokens_endpoint( + request: Request, + fastapi_response: Response, + model_id: str, + user_api_key_dict: UserAPIKeyAuth = Depends(user_api_key_auth), +): + """ + this is a pass through endpoint for the Vertex AI API. /countTokens endpoint + https://cloud.google.com/vertex-ai/generative-ai/docs/model-reference/count-tokens#curl + + + Example Curl: + ``` + curl http://localhost:4000/vertex-ai/publishers/google/models/gemini-1.5-flash-001:countTokens \ + -H "Content-Type: application/json" \ + -H "Authorization: Bearer sk-1234" \ + -d '{"contents":[{"role": "user", "parts":[{"text": "hi"}]}]}' + ``` + + it uses the vertex ai credentials on the proxy and forwards to vertex ai api + """ + try: + response = await execute_post_vertex_ai_request( + request=request, + route=f"/publishers/google/models/{model_id}:countTokens", + ) + return response + except Exception as e: + raise exception_handler(e) from e + + +@router.post( + "/vertex-ai/batchPredictionJobs", + dependencies=[Depends(user_api_key_auth)], + tags=["Vertex AI endpoints"], +) +async def vertex_create_batch_prediction_job( + request: Request, + fastapi_response: Response, + user_api_key_dict: UserAPIKeyAuth = Depends(user_api_key_auth), +): + """ + this is a pass through endpoint for the Vertex AI API. /batchPredictionJobs endpoint + + Vertex API Reference: https://cloud.google.com/vertex-ai/generative-ai/docs/model-reference/batch-prediction-api#syntax + + it uses the vertex ai credentials on the proxy and forwards to vertex ai api + """ + try: + response = await execute_post_vertex_ai_request( + request=request, + route="/batchPredictionJobs", + ) + return response + except Exception as e: + raise exception_handler(e) from e + + @router.post( "/vertex-ai/tuningJobs", dependencies=[Depends(user_api_key_auth)], From 94e5d0f73437086e4b5f8864f9f80873bcd58a20 Mon Sep 17 00:00:00 2001 From: Ishaan Jaff Date: Sat, 3 Aug 2024 17:41:45 -0700 Subject: [PATCH 10/15] docs add vertex ai endpoints --- docs/my-website/docs/vertex_ai.md | 40 +++++++++++++++++++++++++++++++ docs/my-website/sidebars.js | 5 ++-- 2 files changed, 43 insertions(+), 2 deletions(-) create mode 100644 docs/my-website/docs/vertex_ai.md diff --git a/docs/my-website/docs/vertex_ai.md b/docs/my-website/docs/vertex_ai.md new file mode 100644 index 000000000..c18044c20 --- /dev/null +++ b/docs/my-website/docs/vertex_ai.md @@ -0,0 +1,40 @@ +# [BETA] Vertex AI Endpoints + +## Supported APIs + +- Gemini API +- Embeddings API +- Imagen API +- Code Completion API +- Batch prediction API +- Tuning API +- CountTokens API + +## Quick Start Usage + +#### 1. Set `default_vertex_config` on your `config.yaml` + + +Add the following credentials to your litellm config.yaml to use the Vertex AI endpoints. + +```yaml +default_vertex_config: + vertex_project: "adroit-crow-413218" + vertex_location: "us-central1" + vertex_credentials: "/Users/ishaanjaffer/Downloads/adroit-crow-413218-a956eef1a2a8.json" # Add path to service account.json +``` + +#### 2. Start litellm proxy + +```shell +litellm --config /path/to/config.yaml +``` + +#### 3. Test it + +```shell +curl http://localhost:4000/vertex-ai/publishers/google/models/textembedding-gecko@001:countTokens \ +-H "Content-Type: application/json" \ +-H "Authorization: Bearer sk-1234" \ +-d '{"instances":[{"content": "gm"}]}' +``` \ No newline at end of file diff --git a/docs/my-website/sidebars.js b/docs/my-website/sidebars.js index 69fd32cb3..a57a9aecd 100644 --- a/docs/my-website/sidebars.js +++ b/docs/my-website/sidebars.js @@ -110,7 +110,7 @@ const sidebars = { }, { type: "category", - label: "Embedding(), Image Generation(), Assistants(), Moderation(), Audio Transcriptions(), TTS(), Batches(), Fine-Tuning()", + label: "Supported Endpoints - /images, /audio/speech, /assistants etc", items: [ "embedding/supported_embedding", "embedding/async_embedding", @@ -121,7 +121,8 @@ const sidebars = { "assistants", "batches", "fine_tuning", - "anthropic_completion" + "anthropic_completion", + "vertex_ai" ], }, { From 50c9fa38f902b54e6cd8f98823636fe7203a3cd7 Mon Sep 17 00:00:00 2001 From: Ishaan Jaff Date: Sat, 3 Aug 2024 17:44:28 -0700 Subject: [PATCH 11/15] docs link to vertex ai endpoints --- docs/my-website/docs/proxy/user_keys.md | 3 +++ docs/my-website/docs/vertex_ai.md | 2 +- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/docs/my-website/docs/proxy/user_keys.md b/docs/my-website/docs/proxy/user_keys.md index 75e547d17..79d019a20 100644 --- a/docs/my-website/docs/proxy/user_keys.md +++ b/docs/my-website/docs/proxy/user_keys.md @@ -23,6 +23,9 @@ LiteLLM Proxy is **Azure OpenAI-compatible**: LiteLLM Proxy is **Anthropic-compatible**: * /messages +LiteLLM Proxy is **Vertex AI compatible**: +- [Supports ALL Vertex Endpoints](../vertex_ai) + This doc covers: * /chat/completion diff --git a/docs/my-website/docs/vertex_ai.md b/docs/my-website/docs/vertex_ai.md index c18044c20..2c9a6279a 100644 --- a/docs/my-website/docs/vertex_ai.md +++ b/docs/my-website/docs/vertex_ai.md @@ -1,6 +1,6 @@ # [BETA] Vertex AI Endpoints -## Supported APIs +## Supported API Endpoints - Gemini API - Embeddings API From 7f95a865fefe925b5d98ab7c46d8e433cb02dec1 Mon Sep 17 00:00:00 2001 From: Ishaan Jaff Date: Sat, 3 Aug 2024 17:56:38 -0700 Subject: [PATCH 12/15] docs add example curl command --- docs/my-website/docs/vertex_ai.md | 37 +++++++++++++++++++++++++++++++ 1 file changed, 37 insertions(+) diff --git a/docs/my-website/docs/vertex_ai.md b/docs/my-website/docs/vertex_ai.md index 2c9a6279a..7ae06f3af 100644 --- a/docs/my-website/docs/vertex_ai.md +++ b/docs/my-website/docs/vertex_ai.md @@ -37,4 +37,41 @@ curl http://localhost:4000/vertex-ai/publishers/google/models/textembedding-geck -H "Content-Type: application/json" \ -H "Authorization: Bearer sk-1234" \ -d '{"instances":[{"content": "gm"}]}' +``` + + +### Gemini API (Generate Content) + +```shell +curl http://localhost:4000/vertex-ai/publishers/google/models/gemini-1.5-flash-001:generateContent \ + -H "Content-Type: application/json" \ + -H "Authorization: Bearer sk-1234" \ + -d '{"contents":[{"role": "user", "parts":[{"text": "hi"}]}]}' +``` + +### Embeddings API + +```shell +curl http://localhost:4000/vertex-ai/publishers/google/models/textembedding-gecko@001:predict \ + -H "Content-Type: application/json" \ + -H "Authorization: Bearer sk-1234" \ + -d '{"instances":[{"content": "gm"}]}' +``` + +### Imagen API + +```shell +curl http://localhost:4000/vertex-ai/publishers/google/models/imagen-3.0-generate-001:predict \ + -H "Content-Type: application/json" \ + -H "Authorization: Bearer sk-1234" \ + -d '{"instances":[{"prompt": "make an otter"}], "parameters": {"sampleCount": 1}}' +``` + +### Count Tokens API + +```shell +curl http://localhost:4000/vertex-ai/publishers/google/models/gemini-1.5-flash-001:countTokens \ + -H "Content-Type: application/json" \ + -H "Authorization: Bearer sk-1234" \ + -d '{"contents":[{"role": "user", "parts":[{"text": "hi"}]}]}' ``` \ No newline at end of file From 9d2eab555ba679b0bd41594556d3f8184cc80bc3 Mon Sep 17 00:00:00 2001 From: Ishaan Jaff Date: Sat, 3 Aug 2024 17:58:20 -0700 Subject: [PATCH 13/15] docs tuning api --- docs/my-website/docs/vertex_ai.md | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/docs/my-website/docs/vertex_ai.md b/docs/my-website/docs/vertex_ai.md index 7ae06f3af..d9c8616a0 100644 --- a/docs/my-website/docs/vertex_ai.md +++ b/docs/my-website/docs/vertex_ai.md @@ -38,7 +38,7 @@ curl http://localhost:4000/vertex-ai/publishers/google/models/textembedding-geck -H "Authorization: Bearer sk-1234" \ -d '{"instances":[{"content": "gm"}]}' ``` - +## Usage Examples ### Gemini API (Generate Content) @@ -74,4 +74,20 @@ curl http://localhost:4000/vertex-ai/publishers/google/models/gemini-1.5-flash-0 -H "Content-Type: application/json" \ -H "Authorization: Bearer sk-1234" \ -d '{"contents":[{"role": "user", "parts":[{"text": "hi"}]}]}' +``` + +### Tuning API + +Create Fine Tuning Job + +```shell +curl http://localhost:4000/vertex-ai/tuningJobs \ + -H "Content-Type: application/json" \ + -H "Authorization: Bearer sk-1234" \ + -d '{ + "baseModel": "gemini-1.0-pro-002", + "supervisedTuningSpec" : { + "training_dataset_uri": "gs://cloud-samples-data/ai-platform/generative_ai/sft_train_data.jsonl" + } +}' ``` \ No newline at end of file From 3a94aac34389762b7fb81b17ec2a1b9c9041a726 Mon Sep 17 00:00:00 2001 From: Ishaan Jaff Date: Sat, 3 Aug 2024 17:59:46 -0700 Subject: [PATCH 14/15] docs - fix merge conflicts --- docs/my-website/sidebars.js | 49 +++---------------------------------- 1 file changed, 3 insertions(+), 46 deletions(-) diff --git a/docs/my-website/sidebars.js b/docs/my-website/sidebars.js index 65483e392..6f6bcfeea 100644 --- a/docs/my-website/sidebars.js +++ b/docs/my-website/sidebars.js @@ -90,50 +90,6 @@ const sidebars = { "proxy/cli", ] }, - { - type: "category", - label: "Completion()", - link: { - type: "generated-index", - title: "Completion()", - description: "Details on the completion() function", - slug: "/completion", - }, - items: [ - "completion/input", - "completion/provider_specific_params", - "completion/json_mode", - "completion/drop_params", - "completion/prompt_formatting", - "completion/output", - "exception_mapping", - "completion/stream", - "completion/message_trimming", - "completion/function_call", - "completion/vision", - "completion/model_alias", - "completion/batching", - "completion/mock_requests", - "completion/reliable_completions", - ], - }, - { - type: "category", - label: "Supported Endpoints - /images, /audio/speech, /assistants etc", - items: [ - "embedding/supported_embedding", - "embedding/async_embedding", - "embedding/moderation", - "image_generation", - "audio_transcription", - "text_to_speech", - "assistants", - "batches", - "fine_tuning", - "anthropic_completion", - "vertex_ai" - ], - }, { type: "category", label: "💯 Supported Models & Providers", @@ -222,7 +178,7 @@ const sidebars = { }, { type: "category", - label: "Embedding(), Image Generation(), Assistants(), Moderation(), Audio Transcriptions(), TTS(), Batches(), Fine-Tuning()", + label: "Supported Endpoints - /images, /audio/speech, /assistants etc", items: [ "embedding/supported_embedding", "embedding/async_embedding", @@ -233,7 +189,8 @@ const sidebars = { "assistants", "batches", "fine_tuning", - "anthropic_completion" + "anthropic_completion", + "vertex_ai" ], }, { From e73eb19678979e2b9e11ae62cbb9247cffb78c91 Mon Sep 17 00:00:00 2001 From: Ishaan Jaff Date: Sat, 3 Aug 2024 18:09:48 -0700 Subject: [PATCH 15/15] docs default vertex --- litellm/llms/fine_tuning_apis/vertex_ai.py | 3 ++- litellm/proxy/vertex_ai_endpoints/vertex_endpoints.py | 4 ++++ 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/litellm/llms/fine_tuning_apis/vertex_ai.py b/litellm/llms/fine_tuning_apis/vertex_ai.py index c24deca94..5f96f0483 100644 --- a/litellm/llms/fine_tuning_apis/vertex_ai.py +++ b/litellm/llms/fine_tuning_apis/vertex_ai.py @@ -278,7 +278,8 @@ class VertexFineTuningAPI(VertexLLM): url = f"https://{vertex_location}-aiplatform.googleapis.com/v1/projects/{vertex_project}/locations/{vertex_location}{request_route}" elif "countTokens" in request_route: url = f"https://{vertex_location}-aiplatform.googleapis.com/v1/projects/{vertex_project}/locations/{vertex_location}{request_route}" - + else: + raise ValueError(f"Unsupported Vertex AI request route: {request_route}") if self.async_handler is None: raise ValueError("VertexAI Fine Tuning - async_handler is not initialized") diff --git a/litellm/proxy/vertex_ai_endpoints/vertex_endpoints.py b/litellm/proxy/vertex_ai_endpoints/vertex_endpoints.py index c7ce354b2..b8c04583c 100644 --- a/litellm/proxy/vertex_ai_endpoints/vertex_endpoints.py +++ b/litellm/proxy/vertex_ai_endpoints/vertex_endpoints.py @@ -76,6 +76,10 @@ async def execute_post_vertex_ai_request( ): from litellm.fine_tuning.main import vertex_fine_tuning_apis_instance + if default_vertex_config is None: + raise ValueError( + "Vertex credentials not added on litellm proxy, please add `default_vertex_config` on your config.yaml" + ) vertex_project = default_vertex_config.get("vertex_project", None) vertex_location = default_vertex_config.get("vertex_location", None) vertex_credentials = default_vertex_config.get("vertex_credentials", None)