forked from phoenix/litellm-mirror
fix latency issues on google ai studio
This commit is contained in:
parent
ddfe687b13
commit
d69552718d
1 changed files with 15 additions and 14 deletions
|
@ -6,7 +6,11 @@ import httpx
|
||||||
import litellm
|
import litellm
|
||||||
from litellm.caching.caching import Cache, LiteLLMCacheType
|
from litellm.caching.caching import Cache, LiteLLMCacheType
|
||||||
from litellm.litellm_core_utils.litellm_logging import Logging
|
from litellm.litellm_core_utils.litellm_logging import Logging
|
||||||
from litellm.llms.custom_httpx.http_handler import AsyncHTTPHandler, HTTPHandler
|
from litellm.llms.custom_httpx.http_handler import (
|
||||||
|
AsyncHTTPHandler,
|
||||||
|
HTTPHandler,
|
||||||
|
get_async_httpx_client,
|
||||||
|
)
|
||||||
from litellm.llms.OpenAI.openai import AllMessageValues
|
from litellm.llms.OpenAI.openai import AllMessageValues
|
||||||
from litellm.types.llms.vertex_ai import (
|
from litellm.types.llms.vertex_ai import (
|
||||||
CachedContentListAllResponseBody,
|
CachedContentListAllResponseBody,
|
||||||
|
@ -331,6 +335,13 @@ class ContextCachingEndpoints(VertexBase):
|
||||||
if cached_content is not None:
|
if cached_content is not None:
|
||||||
return messages, cached_content
|
return messages, cached_content
|
||||||
|
|
||||||
|
cached_messages, non_cached_messages = separate_cached_messages(
|
||||||
|
messages=messages
|
||||||
|
)
|
||||||
|
|
||||||
|
if len(cached_messages) == 0:
|
||||||
|
return messages, None
|
||||||
|
|
||||||
## AUTHORIZATION ##
|
## AUTHORIZATION ##
|
||||||
token, url = self._get_token_and_url_context_caching(
|
token, url = self._get_token_and_url_context_caching(
|
||||||
gemini_api_key=api_key,
|
gemini_api_key=api_key,
|
||||||
|
@ -347,22 +358,12 @@ class ContextCachingEndpoints(VertexBase):
|
||||||
headers.update(extra_headers)
|
headers.update(extra_headers)
|
||||||
|
|
||||||
if client is None or not isinstance(client, AsyncHTTPHandler):
|
if client is None or not isinstance(client, AsyncHTTPHandler):
|
||||||
_params = {}
|
client = get_async_httpx_client(
|
||||||
if timeout is not None:
|
params={"timeout": timeout}, llm_provider=litellm.LlmProviders.VERTEX_AI
|
||||||
if isinstance(timeout, float) or isinstance(timeout, int):
|
)
|
||||||
timeout = httpx.Timeout(timeout)
|
|
||||||
_params["timeout"] = timeout
|
|
||||||
client = AsyncHTTPHandler(**_params) # type: ignore
|
|
||||||
else:
|
else:
|
||||||
client = client
|
client = client
|
||||||
|
|
||||||
cached_messages, non_cached_messages = separate_cached_messages(
|
|
||||||
messages=messages
|
|
||||||
)
|
|
||||||
|
|
||||||
if len(cached_messages) == 0:
|
|
||||||
return messages, None
|
|
||||||
|
|
||||||
## CHECK IF CACHED ALREADY
|
## CHECK IF CACHED ALREADY
|
||||||
generated_cache_key = local_cache_obj.get_cache_key(messages=cached_messages)
|
generated_cache_key = local_cache_obj.get_cache_key(messages=cached_messages)
|
||||||
google_cache_name = await self.async_check_cache(
|
google_cache_name = await self.async_check_cache(
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue