Merge pull request #5619 from BerriAI/litellm_vertex_use_get_httpx_client

[Fix-Perf] Vertex AI cache httpx clients
2024-09-10 13:59:39 -07:00 · 2024-09-10 13:59:39 -07:00 · f3593aed68
commit f3593aed68
parent dc63a16a6f d7afeee71c
2 changed files with 19 additions and 38 deletions
--- a/litellm/llms/vertex_ai_and_google_ai_studio/gemini/vertex_and_google_ai_studio_gemini.py
+++ b/litellm/llms/vertex_ai_and_google_ai_studio/gemini/vertex_and_google_ai_studio_gemini.py
@ -19,7 +19,11 @@ import litellm.litellm_core_utils
 import litellm.litellm_core_utils.litellm_logging
 from litellm import verbose_logger
 from litellm.litellm_core_utils.core_helpers import map_finish_reason
-from litellm.llms.custom_httpx.http_handler import AsyncHTTPHandler, HTTPHandler
+from litellm.llms.custom_httpx.http_handler import (
    AsyncHTTPHandler,
    HTTPHandler,
    _get_async_httpx_client,
 )
 from litellm.llms.prompt_templates.factory import (
    convert_url_to_base64,
    response_schema_prompt,
@ -1286,13 +1290,11 @@ class VertexLLM(BaseLLM):
    ) -> Union[ModelResponse, CustomStreamWrapper]:
        request_body = await async_transform_request_body(**data)  # type: ignore
        _async_client_params = {}
        if timeout:
            _async_client_params["timeout"] = timeout
        if client is None or not isinstance(client, AsyncHTTPHandler):
-            _params = {}
+            client = _get_async_httpx_client(params=_async_client_params)
            if timeout is not None:
                if isinstance(timeout, float) or isinstance(timeout, int):
                    timeout = httpx.Timeout(timeout)
                _params["timeout"] = timeout
            client = AsyncHTTPHandler(**_params)  # type: ignore
        else:
            client = client  # type: ignore
        ## LOGGING
--- a/litellm/proxy/proxy_config.yaml
+++ b/litellm/proxy/proxy_config.yaml
@ -1,37 +1,16 @@
 model_list:
-  - model_name: openai/*
+  - model_name: gemini-vision
    litellm_params:
-      model: openai/*
+      model: vertex_ai/gemini-1.0-pro-vision-001
-      api_key: os.environ/OPENAI_API_KEY
+      api_base: https://exampleopenaiendpoint-production.up.railway.app/v1/projects/adroit-crow-413218/locations/us-central1/publishers/google/models/gemini-1.0-pro-vision-001
-    model_info:
+      vertex_project: "adroit-crow-413218"
-      id: "good-openai"
+      vertex_location: "us-central1"
-  - model_name: openai/*
+      vertex_credentials: "/Users/ishaanjaffer/Downloads/adroit-crow-413218-a956eef1a2a8.json"
  - model_name: fake-openai-endpoint
    litellm_params:
-      model: openai/*
+      model: openai/fake
-      api_key: os.environ/non-exsitent-env-var
+      api_key: fake-key
-      tags: ["bad-model"]
+      api_base: https://exampleopenaiendpoint-production.up.railway.app/
    model_info:
      id: "test-openai"
 litellm_settings:
  callbacks: ["otel"]
 callback_settings:
  otel:
    message_logging: False
 router_settings:
 enable_tag_filtering: True # 👈 Key Chang
 general_settings: 
 master_key: sk-1234 
 alerting: ["slack"]
 spend_report_frequency: "1d"
 litellm_settings:
  success_callback: ["prometheus"]
  failure_callback: ["prometheus"]