Merge pull request #5619 from BerriAI/litellm_vertex_use_get_httpx_client

[Fix-Perf] Vertex AI cache httpx clients
This commit is contained in:
Ishaan Jaff 2024-09-10 13:59:39 -07:00 committed by GitHub
commit f3593aed68
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
2 changed files with 19 additions and 38 deletions

View file

@ -19,7 +19,11 @@ import litellm.litellm_core_utils
import litellm.litellm_core_utils.litellm_logging import litellm.litellm_core_utils.litellm_logging
from litellm import verbose_logger from litellm import verbose_logger
from litellm.litellm_core_utils.core_helpers import map_finish_reason from litellm.litellm_core_utils.core_helpers import map_finish_reason
from litellm.llms.custom_httpx.http_handler import AsyncHTTPHandler, HTTPHandler from litellm.llms.custom_httpx.http_handler import (
AsyncHTTPHandler,
HTTPHandler,
_get_async_httpx_client,
)
from litellm.llms.prompt_templates.factory import ( from litellm.llms.prompt_templates.factory import (
convert_url_to_base64, convert_url_to_base64,
response_schema_prompt, response_schema_prompt,
@ -1286,13 +1290,11 @@ class VertexLLM(BaseLLM):
) -> Union[ModelResponse, CustomStreamWrapper]: ) -> Union[ModelResponse, CustomStreamWrapper]:
request_body = await async_transform_request_body(**data) # type: ignore request_body = await async_transform_request_body(**data) # type: ignore
_async_client_params = {}
if timeout:
_async_client_params["timeout"] = timeout
if client is None or not isinstance(client, AsyncHTTPHandler): if client is None or not isinstance(client, AsyncHTTPHandler):
_params = {} client = _get_async_httpx_client(params=_async_client_params)
if timeout is not None:
if isinstance(timeout, float) or isinstance(timeout, int):
timeout = httpx.Timeout(timeout)
_params["timeout"] = timeout
client = AsyncHTTPHandler(**_params) # type: ignore
else: else:
client = client # type: ignore client = client # type: ignore
## LOGGING ## LOGGING

View file

@ -1,37 +1,16 @@
model_list: model_list:
- model_name: openai/* - model_name: gemini-vision
litellm_params: litellm_params:
model: openai/* model: vertex_ai/gemini-1.0-pro-vision-001
api_key: os.environ/OPENAI_API_KEY api_base: https://exampleopenaiendpoint-production.up.railway.app/v1/projects/adroit-crow-413218/locations/us-central1/publishers/google/models/gemini-1.0-pro-vision-001
model_info: vertex_project: "adroit-crow-413218"
id: "good-openai" vertex_location: "us-central1"
- model_name: openai/* vertex_credentials: "/Users/ishaanjaffer/Downloads/adroit-crow-413218-a956eef1a2a8.json"
- model_name: fake-openai-endpoint
litellm_params: litellm_params:
model: openai/* model: openai/fake
api_key: os.environ/non-exsitent-env-var api_key: fake-key
tags: ["bad-model"] api_base: https://exampleopenaiendpoint-production.up.railway.app/
model_info:
id: "test-openai"
litellm_settings:
callbacks: ["otel"]
callback_settings:
otel:
message_logging: False
router_settings:
enable_tag_filtering: True # 👈 Key Chang
general_settings: general_settings:
master_key: sk-1234 master_key: sk-1234
alerting: ["slack"]
spend_report_frequency: "1d"
litellm_settings:
success_callback: ["prometheus"]
failure_callback: ["prometheus"]