forked from phoenix/litellm-mirror
(refactor) caching use LLMCachingHandler for async_get_cache and set_cache (#6208)
* use folder for caching * fix importing caching * fix clickhouse pyright * fix linting * fix correctly pass kwargs and args * fix test case for embedding * fix linting * fix embedding caching logic * fix refactor handle utils.py * fix test_embedding_caching_azure_individual_items_reordered
This commit is contained in:
parent
20e50d7002
commit
4d1b4beb3d
96 changed files with 690 additions and 489 deletions
|
@ -10,7 +10,7 @@ sys.path.insert(
|
|||
import asyncio
|
||||
from litellm import Router, Timeout
|
||||
import time
|
||||
from litellm.caching import Cache
|
||||
from litellm.caching.caching import Cache
|
||||
import litellm
|
||||
import openai
|
||||
|
||||
|
|
|
@ -10,7 +10,7 @@ sys.path.insert(
|
|||
import asyncio
|
||||
from litellm import Router, Timeout
|
||||
import time
|
||||
from litellm.caching import Cache
|
||||
from litellm.caching.caching import Cache
|
||||
import litellm
|
||||
|
||||
litellm.cache = Cache(
|
||||
|
|
|
@ -3,7 +3,7 @@ import TabItem from '@theme/TabItem';
|
|||
|
||||
# Caching - In-Memory, Redis, s3, Redis Semantic Cache, Disk
|
||||
|
||||
[**See Code**](https://github.com/BerriAI/litellm/blob/main/litellm/caching.py)
|
||||
[**See Code**](https://github.com/BerriAI/litellm/blob/main/litellm.caching.caching.py)
|
||||
|
||||
:::info
|
||||
|
||||
|
@ -31,7 +31,7 @@ For the hosted version you can setup your own Redis DB here: https://app.redisla
|
|||
```python
|
||||
import litellm
|
||||
from litellm import completion
|
||||
from litellm.caching import Cache
|
||||
from litellm.caching.caching import Cache
|
||||
|
||||
litellm.cache = Cache(type="redis", host=<host>, port=<port>, password=<password>)
|
||||
|
||||
|
@ -68,7 +68,7 @@ AWS_SECRET_ACCESS_KEY = "WOl*****"
|
|||
```python
|
||||
import litellm
|
||||
from litellm import completion
|
||||
from litellm.caching import Cache
|
||||
from litellm.caching.caching import Cache
|
||||
|
||||
# pass s3-bucket name
|
||||
litellm.cache = Cache(type="s3", s3_bucket_name="cache-bucket-litellm", s3_region_name="us-west-2")
|
||||
|
@ -101,7 +101,7 @@ For the hosted version you can setup your own Redis DB here: https://app.redisla
|
|||
```python
|
||||
import litellm
|
||||
from litellm import completion
|
||||
from litellm.caching import Cache
|
||||
from litellm.caching.caching import Cache
|
||||
|
||||
random_number = random.randint(
|
||||
1, 100000
|
||||
|
@ -155,7 +155,7 @@ To set up a Qdrant cluster locally follow: https://qdrant.tech/documentation/qui
|
|||
```python
|
||||
import litellm
|
||||
from litellm import completion
|
||||
from litellm.caching import Cache
|
||||
from litellm.caching.caching import Cache
|
||||
|
||||
random_number = random.randint(
|
||||
1, 100000
|
||||
|
@ -210,7 +210,7 @@ assert response1.id == response2.id
|
|||
```python
|
||||
import litellm
|
||||
from litellm import completion
|
||||
from litellm.caching import Cache
|
||||
from litellm.caching.caching import Cache
|
||||
litellm.cache = Cache()
|
||||
|
||||
# Make completion calls
|
||||
|
@ -246,7 +246,7 @@ Then you can use the disk cache as follows.
|
|||
```python
|
||||
import litellm
|
||||
from litellm import completion
|
||||
from litellm.caching import Cache
|
||||
from litellm.caching.caching import Cache
|
||||
litellm.cache = Cache(type="disk")
|
||||
|
||||
# Make completion calls
|
||||
|
@ -422,7 +422,7 @@ def custom_get_cache_key(*args, **kwargs):
|
|||
|
||||
Set your function as litellm.cache.get_cache_key
|
||||
```python
|
||||
from litellm.caching import Cache
|
||||
from litellm.caching.caching import Cache
|
||||
|
||||
cache = Cache(type="redis", host=os.environ['REDIS_HOST'], port=os.environ['REDIS_PORT'], password=os.environ['REDIS_PASSWORD'])
|
||||
|
||||
|
@ -434,7 +434,7 @@ litellm.cache = cache # set litellm.cache to your cache
|
|||
## How to write custom add/get cache functions
|
||||
### 1. Init Cache
|
||||
```python
|
||||
from litellm.caching import Cache
|
||||
from litellm.caching.caching import Cache
|
||||
cache = Cache()
|
||||
```
|
||||
|
||||
|
|
|
@ -6,7 +6,7 @@ Use api.litellm.ai for caching `completion()` and `embedding()` responses
|
|||
```python
|
||||
import litellm
|
||||
from litellm import completion
|
||||
from litellm.caching import Cache
|
||||
from litellm.caching.caching import Cache
|
||||
litellm.cache = Cache(type="hosted") # init cache to use api.litellm.ai
|
||||
|
||||
# Make completion calls
|
||||
|
@ -31,7 +31,7 @@ response2 = completion(
|
|||
import time
|
||||
import litellm
|
||||
from litellm import completion, embedding
|
||||
from litellm.caching import Cache
|
||||
from litellm.caching.caching import Cache
|
||||
litellm.cache = Cache(type="hosted")
|
||||
|
||||
start_time = time.time()
|
||||
|
@ -53,7 +53,7 @@ LiteLLM can cache your streamed responses for you
|
|||
import litellm
|
||||
import time
|
||||
from litellm import completion
|
||||
from litellm.caching import Cache
|
||||
from litellm.caching.caching import Cache
|
||||
|
||||
litellm.cache = Cache(type="hosted")
|
||||
|
||||
|
|
|
@ -13,7 +13,7 @@ Keys in the cache are `model`, the following example will lead to a cache hit
|
|||
```python
|
||||
import litellm
|
||||
from litellm import completion
|
||||
from litellm.caching import Cache
|
||||
from litellm.caching.caching import Cache
|
||||
litellm.cache = Cache()
|
||||
|
||||
# Make completion calls
|
||||
|
@ -35,7 +35,7 @@ response2 = completion(
|
|||
Add custom key-value pairs to your cache.
|
||||
|
||||
```python
|
||||
from litellm.caching import Cache
|
||||
from litellm.caching.caching import Cache
|
||||
cache = Cache()
|
||||
|
||||
cache.add_cache(cache_key="test-key", result="1234")
|
||||
|
@ -50,7 +50,7 @@ LiteLLM can cache your streamed responses for you
|
|||
```python
|
||||
import litellm
|
||||
from litellm import completion
|
||||
from litellm.caching import Cache
|
||||
from litellm.caching.caching import Cache
|
||||
litellm.cache = Cache()
|
||||
|
||||
# Make completion calls
|
||||
|
@ -77,7 +77,7 @@ Keys in the cache are `model`, the following example will lead to a cache hit
|
|||
import time
|
||||
import litellm
|
||||
from litellm import embedding
|
||||
from litellm.caching import Cache
|
||||
from litellm.caching.caching import Cache
|
||||
litellm.cache = Cache()
|
||||
|
||||
start_time = time.time()
|
||||
|
|
|
@ -49,13 +49,13 @@ litellm_settings:
|
|||
cache: true
|
||||
cache_params: # set cache params for redis
|
||||
type: redis
|
||||
namespace: "litellm_caching"
|
||||
namespace: "litellm.caching.caching"
|
||||
```
|
||||
|
||||
and keys will be stored like:
|
||||
|
||||
```
|
||||
litellm_caching:<hash>
|
||||
litellm.caching.caching:<hash>
|
||||
```
|
||||
|
||||
#### Redis Cluster
|
||||
|
|
|
@ -645,7 +645,7 @@ litellm_settings:
|
|||
host: "localhost" # The host address for the Redis cache. Required if type is "redis".
|
||||
port: 6379 # The port number for the Redis cache. Required if type is "redis".
|
||||
password: "your_password" # The password for the Redis cache. Required if type is "redis".
|
||||
namespace: "litellm_caching" # namespace for redis cache
|
||||
namespace: "litellm.caching.caching" # namespace for redis cache
|
||||
|
||||
# Optional - Redis Cluster Settings
|
||||
redis_startup_nodes: [{"host": "127.0.0.1", "port": "7001"}]
|
||||
|
|
|
@ -25,7 +25,7 @@ from typing import Any, Dict, List, Literal, Optional, Union
|
|||
|
||||
import litellm
|
||||
from litellm._logging import verbose_proxy_logger
|
||||
from litellm.caching import DualCache
|
||||
from litellm.caching.caching import DualCache
|
||||
from litellm.integrations.custom_guardrail import CustomGuardrail
|
||||
from litellm.proxy._types import UserAPIKeyAuth
|
||||
from litellm.proxy.guardrails.guardrail_helpers import should_proceed_based_on_metadata
|
||||
|
|
|
@ -6,7 +6,7 @@ import dotenv, os
|
|||
import requests
|
||||
|
||||
from litellm.proxy._types import UserAPIKeyAuth
|
||||
from litellm.caching import DualCache
|
||||
from litellm.caching.caching import DualCache
|
||||
|
||||
from typing import Literal, Union, Optional
|
||||
|
||||
|
|
|
@ -13,7 +13,7 @@ sys.path.insert(
|
|||
) # Adds the parent directory to the system path
|
||||
from typing import Optional, Literal, Union, Any
|
||||
import litellm, traceback, sys, uuid
|
||||
from litellm.caching import DualCache
|
||||
from litellm.caching.caching import DualCache
|
||||
from litellm.proxy._types import UserAPIKeyAuth
|
||||
from litellm.integrations.custom_guardrail import CustomGuardrail
|
||||
from fastapi import HTTPException
|
||||
|
|
|
@ -9,7 +9,7 @@
|
|||
|
||||
from typing import Optional, Literal
|
||||
import litellm
|
||||
from litellm.caching import DualCache
|
||||
from litellm.caching.caching import DualCache
|
||||
from litellm.proxy._types import UserAPIKeyAuth
|
||||
from litellm.integrations.custom_logger import CustomLogger
|
||||
from litellm._logging import verbose_proxy_logger
|
||||
|
|
|
@ -10,7 +10,7 @@
|
|||
from typing import Optional, Literal
|
||||
import litellm
|
||||
from litellm.proxy.utils import PrismaClient
|
||||
from litellm.caching import DualCache
|
||||
from litellm.caching.caching import DualCache
|
||||
from litellm.proxy._types import UserAPIKeyAuth, LiteLLM_EndUserTable
|
||||
from litellm.integrations.custom_logger import CustomLogger
|
||||
from litellm._logging import verbose_proxy_logger
|
||||
|
|
|
@ -9,7 +9,7 @@
|
|||
|
||||
from typing import Optional, Literal, Union
|
||||
import litellm, traceback, sys, uuid
|
||||
from litellm.caching import DualCache
|
||||
from litellm.caching.caching import DualCache
|
||||
from litellm.proxy._types import UserAPIKeyAuth
|
||||
from litellm.integrations.custom_logger import CustomLogger
|
||||
from fastapi import HTTPException
|
||||
|
|
|
@ -15,7 +15,7 @@ sys.path.insert(
|
|||
) # Adds the parent directory to the system path
|
||||
from typing import Optional, Literal, Union
|
||||
import litellm, traceback, sys, uuid
|
||||
from litellm.caching import DualCache
|
||||
from litellm.caching.caching import DualCache
|
||||
from litellm.proxy._types import UserAPIKeyAuth
|
||||
from litellm.integrations.custom_logger import CustomLogger
|
||||
from fastapi import HTTPException
|
||||
|
|
|
@ -13,7 +13,7 @@ import traceback
|
|||
import sys
|
||||
import uuid
|
||||
import os
|
||||
from litellm.caching import DualCache
|
||||
from litellm.caching.caching import DualCache
|
||||
from litellm.proxy._types import UserAPIKeyAuth
|
||||
from litellm.integrations.custom_logger import CustomLogger
|
||||
from fastapi import HTTPException
|
||||
|
|
|
@ -12,7 +12,7 @@ sys.path.insert(
|
|||
) # Adds the parent directory to the system path
|
||||
from typing import Optional, Literal, Union
|
||||
import litellm, traceback, sys, uuid
|
||||
from litellm.caching import DualCache
|
||||
from litellm.caching.caching import DualCache
|
||||
from litellm.proxy._types import UserAPIKeyAuth
|
||||
from litellm.integrations.custom_logger import CustomLogger
|
||||
from fastapi import HTTPException
|
||||
|
|
|
@ -12,7 +12,7 @@ sys.path.insert(
|
|||
0, os.path.abspath("../..")
|
||||
) # Adds the parent directory to the system path
|
||||
from typing import Optional
|
||||
from litellm.caching import DualCache
|
||||
from litellm.caching.caching import DualCache
|
||||
from litellm.proxy._types import UserAPIKeyAuth
|
||||
from litellm._logging import verbose_proxy_logger
|
||||
import tempfile
|
||||
|
|
|
@ -7,7 +7,7 @@ import threading
|
|||
import os
|
||||
from typing import Callable, List, Optional, Dict, Union, Any, Literal, get_args
|
||||
from litellm.llms.custom_httpx.http_handler import AsyncHTTPHandler, HTTPHandler
|
||||
from litellm.caching import Cache
|
||||
from litellm.caching.caching import Cache
|
||||
from litellm._logging import (
|
||||
set_verbose,
|
||||
_turn_on_debug,
|
||||
|
|
|
@ -2,7 +2,6 @@ from concurrent.futures import FIRST_COMPLETED, ThreadPoolExecutor, wait
|
|||
from typing import List, Optional
|
||||
|
||||
import litellm
|
||||
from litellm import completion
|
||||
from litellm._logging import print_verbose
|
||||
from litellm.utils import get_optional_params
|
||||
|
||||
|
@ -108,7 +107,7 @@ def batch_completion(
|
|||
if "kwargs" in kwargs_modified:
|
||||
original_kwargs = kwargs_modified.pop("kwargs")
|
||||
future = executor.submit(
|
||||
completion, **kwargs_modified, **original_kwargs
|
||||
litellm.completion, **kwargs_modified, **original_kwargs
|
||||
)
|
||||
completions.append(future)
|
||||
|
||||
|
@ -156,7 +155,7 @@ def batch_completion_models(*args, **kwargs):
|
|||
with ThreadPoolExecutor(max_workers=len(models)) as executor:
|
||||
for model in models:
|
||||
futures[model] = executor.submit(
|
||||
completion, *args, model=model, **kwargs
|
||||
litellm.completion, *args, model=model, **kwargs
|
||||
)
|
||||
|
||||
for model, future in sorted(
|
||||
|
@ -178,7 +177,9 @@ def batch_completion_models(*args, **kwargs):
|
|||
): # don't override deployment values e.g. model name, api base, etc.
|
||||
deployment[key] = kwargs[key]
|
||||
kwargs = {**deployment, **nested_kwargs}
|
||||
futures[deployment["model"]] = executor.submit(completion, **kwargs)
|
||||
futures[deployment["model"]] = executor.submit(
|
||||
litellm.completion, **kwargs
|
||||
)
|
||||
|
||||
while futures:
|
||||
# wait for the first returned future
|
||||
|
@ -246,7 +247,7 @@ def batch_completion_models_all_responses(*args, **kwargs):
|
|||
|
||||
with concurrent.futures.ThreadPoolExecutor(max_workers=len(models)) as executor:
|
||||
for idx, model in enumerate(models):
|
||||
future = executor.submit(completion, *args, model=model, **kwargs)
|
||||
future = executor.submit(litellm.completion, *args, model=model, **kwargs)
|
||||
if future.result() is not None:
|
||||
responses.append(future.result())
|
||||
|
||||
|
|
|
@ -212,7 +212,7 @@ class RedisCache(BaseCache):
|
|||
|
||||
from litellm._service_logger import ServiceLogging
|
||||
|
||||
from ._redis import get_redis_client, get_redis_connection_pool
|
||||
from .._redis import get_redis_client, get_redis_connection_pool
|
||||
|
||||
redis_kwargs = {}
|
||||
if host is not None:
|
||||
|
@ -276,7 +276,7 @@ class RedisCache(BaseCache):
|
|||
)
|
||||
|
||||
def init_async_client(self):
|
||||
from ._redis import get_redis_async_client
|
||||
from .._redis import get_redis_async_client
|
||||
|
||||
return get_redis_async_client(
|
||||
connection_pool=self.async_redis_conn_pool, **self.redis_kwargs
|
||||
|
@ -302,7 +302,7 @@ class RedisCache(BaseCache):
|
|||
except Exception as e:
|
||||
# NON blocking - notify users Redis is throwing an exception
|
||||
print_verbose(
|
||||
f"LiteLLM Caching: set() - Got exception from REDIS : {str(e)}"
|
||||
f"litellm.caching.caching: set() - Got exception from REDIS : {str(e)}"
|
||||
)
|
||||
|
||||
def increment_cache(
|
||||
|
@ -705,7 +705,7 @@ class RedisCache(BaseCache):
|
|||
except Exception as e:
|
||||
# NON blocking - notify users Redis is throwing an exception
|
||||
verbose_logger.error(
|
||||
"LiteLLM Caching: get() - Got exception from REDIS: ", e
|
||||
"litellm.caching.caching: get() - Got exception from REDIS: ", e
|
||||
)
|
||||
|
||||
def batch_get_cache(self, key_list) -> dict:
|
||||
|
@ -781,7 +781,7 @@ class RedisCache(BaseCache):
|
|||
)
|
||||
# NON blocking - notify users Redis is throwing an exception
|
||||
print_verbose(
|
||||
f"LiteLLM Caching: async get() - Got exception from REDIS: {str(e)}"
|
||||
f"litellm.caching.caching: async get() - Got exception from REDIS: {str(e)}"
|
||||
)
|
||||
|
||||
async def async_batch_get_cache(self, key_list) -> dict:
|
440
litellm/caching/caching_handler.py
Normal file
440
litellm/caching/caching_handler.py
Normal file
|
@ -0,0 +1,440 @@
|
|||
"""
|
||||
This contains LLMCachingHandler
|
||||
|
||||
This exposes two methods:
|
||||
- async_get_cache
|
||||
- async_set_cache
|
||||
|
||||
This file is a wrapper around caching.py
|
||||
|
||||
In each method it will call the appropriate method from caching.py
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
import datetime
|
||||
import threading
|
||||
from typing import TYPE_CHECKING, Any, Callable, Dict, List, Optional, Tuple
|
||||
|
||||
from pydantic import BaseModel
|
||||
|
||||
import litellm
|
||||
from litellm._logging import print_verbose
|
||||
from litellm.caching.caching import (
|
||||
Cache,
|
||||
QdrantSemanticCache,
|
||||
RedisCache,
|
||||
RedisSemanticCache,
|
||||
S3Cache,
|
||||
)
|
||||
from litellm.types.rerank import RerankResponse
|
||||
from litellm.types.utils import (
|
||||
CallTypes,
|
||||
Embedding,
|
||||
EmbeddingResponse,
|
||||
ModelResponse,
|
||||
TextCompletionResponse,
|
||||
TranscriptionResponse,
|
||||
)
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from litellm.litellm_core_utils.litellm_logging import Logging as LiteLLMLoggingObj
|
||||
else:
|
||||
LiteLLMLoggingObj = Any
|
||||
|
||||
|
||||
class CachingHandlerResponse(BaseModel):
|
||||
"""
|
||||
This is the response object for the caching handler. We need to separate embedding cached responses and (completion / text_completion / transcription) cached responses
|
||||
|
||||
For embeddings there can be a cache hit for some of the inputs in the list and a cache miss for others
|
||||
"""
|
||||
|
||||
cached_result: Optional[Any] = None
|
||||
final_embedding_cached_response: Optional[EmbeddingResponse] = None
|
||||
embedding_all_elements_cache_hit: bool = (
|
||||
False # this is set to True when all elements in the list have a cache hit in the embedding cache, if true return the final_embedding_cached_response no need to make an API call
|
||||
)
|
||||
|
||||
|
||||
class LLMCachingHandler:
|
||||
def __init__(self):
|
||||
pass
|
||||
|
||||
async def _async_get_cache(
|
||||
self,
|
||||
model: str,
|
||||
original_function: Callable,
|
||||
logging_obj: LiteLLMLoggingObj,
|
||||
start_time: datetime.datetime,
|
||||
call_type: str,
|
||||
kwargs: Dict[str, Any],
|
||||
args: Optional[Tuple[Any, ...]] = None,
|
||||
) -> CachingHandlerResponse:
|
||||
"""
|
||||
Internal method to get from the cache.
|
||||
Handles different call types (embeddings, chat/completions, text_completion, transcription)
|
||||
and accordingly returns the cached response
|
||||
|
||||
Args:
|
||||
model: str:
|
||||
original_function: Callable:
|
||||
logging_obj: LiteLLMLoggingObj:
|
||||
start_time: datetime.datetime:
|
||||
call_type: str:
|
||||
kwargs: Dict[str, Any]:
|
||||
args: Optional[Tuple[Any, ...]] = None:
|
||||
|
||||
|
||||
Returns:
|
||||
CachingHandlerResponse:
|
||||
Raises:
|
||||
None
|
||||
"""
|
||||
from litellm.utils import (
|
||||
CustomStreamWrapper,
|
||||
convert_to_model_response_object,
|
||||
convert_to_streaming_response_async,
|
||||
)
|
||||
|
||||
args = args or ()
|
||||
|
||||
final_embedding_cached_response: Optional[EmbeddingResponse] = None
|
||||
cached_result: Optional[Any] = None
|
||||
if (
|
||||
(kwargs.get("caching", None) is None and litellm.cache is not None)
|
||||
or kwargs.get("caching", False) is True
|
||||
) and (
|
||||
kwargs.get("cache", {}).get("no-cache", False) is not True
|
||||
): # allow users to control returning cached responses from the completion function
|
||||
# checking cache
|
||||
print_verbose("INSIDE CHECKING CACHE")
|
||||
if (
|
||||
litellm.cache is not None
|
||||
and litellm.cache.supported_call_types is not None
|
||||
and str(original_function.__name__)
|
||||
in litellm.cache.supported_call_types
|
||||
):
|
||||
print_verbose("Checking Cache")
|
||||
if call_type == CallTypes.aembedding.value and isinstance(
|
||||
kwargs["input"], list
|
||||
):
|
||||
tasks = []
|
||||
for idx, i in enumerate(kwargs["input"]):
|
||||
preset_cache_key = litellm.cache.get_cache_key(
|
||||
*args, **{**kwargs, "input": i}
|
||||
)
|
||||
tasks.append(
|
||||
litellm.cache.async_get_cache(cache_key=preset_cache_key)
|
||||
)
|
||||
cached_result = await asyncio.gather(*tasks)
|
||||
## check if cached result is None ##
|
||||
if cached_result is not None and isinstance(cached_result, list):
|
||||
# set cached_result to None if all elements are None
|
||||
if all(result is None for result in cached_result):
|
||||
cached_result = None
|
||||
elif isinstance(litellm.cache.cache, RedisSemanticCache) or isinstance(
|
||||
litellm.cache.cache, RedisCache
|
||||
):
|
||||
preset_cache_key = litellm.cache.get_cache_key(*args, **kwargs)
|
||||
kwargs["preset_cache_key"] = (
|
||||
preset_cache_key # for streaming calls, we need to pass the preset_cache_key
|
||||
)
|
||||
cached_result = await litellm.cache.async_get_cache(*args, **kwargs)
|
||||
elif isinstance(litellm.cache.cache, QdrantSemanticCache):
|
||||
preset_cache_key = litellm.cache.get_cache_key(*args, **kwargs)
|
||||
kwargs["preset_cache_key"] = (
|
||||
preset_cache_key # for streaming calls, we need to pass the preset_cache_key
|
||||
)
|
||||
cached_result = await litellm.cache.async_get_cache(*args, **kwargs)
|
||||
else: # for s3 caching. [NOT RECOMMENDED IN PROD - this will slow down responses since boto3 is sync]
|
||||
preset_cache_key = litellm.cache.get_cache_key(*args, **kwargs)
|
||||
kwargs["preset_cache_key"] = (
|
||||
preset_cache_key # for streaming calls, we need to pass the preset_cache_key
|
||||
)
|
||||
cached_result = litellm.cache.get_cache(*args, **kwargs)
|
||||
if cached_result is not None and not isinstance(cached_result, list):
|
||||
print_verbose("Cache Hit!")
|
||||
cache_hit = True
|
||||
end_time = datetime.datetime.now()
|
||||
(
|
||||
model,
|
||||
custom_llm_provider,
|
||||
dynamic_api_key,
|
||||
api_base,
|
||||
) = litellm.get_llm_provider(
|
||||
model=model,
|
||||
custom_llm_provider=kwargs.get("custom_llm_provider", None),
|
||||
api_base=kwargs.get("api_base", None),
|
||||
api_key=kwargs.get("api_key", None),
|
||||
)
|
||||
print_verbose(
|
||||
f"Async Wrapper: Completed Call, calling async_success_handler: {logging_obj.async_success_handler}"
|
||||
)
|
||||
logging_obj.update_environment_variables(
|
||||
model=model,
|
||||
user=kwargs.get("user", None),
|
||||
optional_params={},
|
||||
litellm_params={
|
||||
"logger_fn": kwargs.get("logger_fn", None),
|
||||
"acompletion": True,
|
||||
"metadata": kwargs.get("metadata", {}),
|
||||
"model_info": kwargs.get("model_info", {}),
|
||||
"proxy_server_request": kwargs.get(
|
||||
"proxy_server_request", None
|
||||
),
|
||||
"preset_cache_key": kwargs.get("preset_cache_key", None),
|
||||
"stream_response": kwargs.get("stream_response", {}),
|
||||
"api_base": kwargs.get("api_base", ""),
|
||||
},
|
||||
input=kwargs.get("messages", ""),
|
||||
api_key=kwargs.get("api_key", None),
|
||||
original_response=str(cached_result),
|
||||
additional_args=None,
|
||||
stream=kwargs.get("stream", False),
|
||||
)
|
||||
call_type = original_function.__name__
|
||||
if call_type == CallTypes.acompletion.value and isinstance(
|
||||
cached_result, dict
|
||||
):
|
||||
if kwargs.get("stream", False) is True:
|
||||
cached_result = convert_to_streaming_response_async(
|
||||
response_object=cached_result,
|
||||
)
|
||||
cached_result = CustomStreamWrapper(
|
||||
completion_stream=cached_result,
|
||||
model=model,
|
||||
custom_llm_provider="cached_response",
|
||||
logging_obj=logging_obj,
|
||||
)
|
||||
else:
|
||||
cached_result = convert_to_model_response_object(
|
||||
response_object=cached_result,
|
||||
model_response_object=ModelResponse(),
|
||||
)
|
||||
if call_type == CallTypes.atext_completion.value and isinstance(
|
||||
cached_result, dict
|
||||
):
|
||||
if kwargs.get("stream", False) is True:
|
||||
cached_result = convert_to_streaming_response_async(
|
||||
response_object=cached_result,
|
||||
)
|
||||
cached_result = CustomStreamWrapper(
|
||||
completion_stream=cached_result,
|
||||
model=model,
|
||||
custom_llm_provider="cached_response",
|
||||
logging_obj=logging_obj,
|
||||
)
|
||||
else:
|
||||
cached_result = TextCompletionResponse(**cached_result)
|
||||
elif call_type == CallTypes.aembedding.value and isinstance(
|
||||
cached_result, dict
|
||||
):
|
||||
cached_result = convert_to_model_response_object(
|
||||
response_object=cached_result,
|
||||
model_response_object=EmbeddingResponse(),
|
||||
response_type="embedding",
|
||||
)
|
||||
elif call_type == CallTypes.arerank.value and isinstance(
|
||||
cached_result, dict
|
||||
):
|
||||
cached_result = convert_to_model_response_object(
|
||||
response_object=cached_result,
|
||||
model_response_object=None,
|
||||
response_type="rerank",
|
||||
)
|
||||
elif call_type == CallTypes.atranscription.value and isinstance(
|
||||
cached_result, dict
|
||||
):
|
||||
hidden_params = {
|
||||
"model": "whisper-1",
|
||||
"custom_llm_provider": custom_llm_provider,
|
||||
"cache_hit": True,
|
||||
}
|
||||
cached_result = convert_to_model_response_object(
|
||||
response_object=cached_result,
|
||||
model_response_object=TranscriptionResponse(),
|
||||
response_type="audio_transcription",
|
||||
hidden_params=hidden_params,
|
||||
)
|
||||
if kwargs.get("stream", False) is False:
|
||||
# LOG SUCCESS
|
||||
asyncio.create_task(
|
||||
logging_obj.async_success_handler(
|
||||
cached_result, start_time, end_time, cache_hit
|
||||
)
|
||||
)
|
||||
threading.Thread(
|
||||
target=logging_obj.success_handler,
|
||||
args=(cached_result, start_time, end_time, cache_hit),
|
||||
).start()
|
||||
cache_key = kwargs.get("preset_cache_key", None)
|
||||
if (
|
||||
isinstance(cached_result, BaseModel)
|
||||
or isinstance(cached_result, CustomStreamWrapper)
|
||||
) and hasattr(cached_result, "_hidden_params"):
|
||||
cached_result._hidden_params["cache_key"] = cache_key # type: ignore
|
||||
return CachingHandlerResponse(cached_result=cached_result)
|
||||
elif (
|
||||
call_type == CallTypes.aembedding.value
|
||||
and cached_result is not None
|
||||
and isinstance(cached_result, list)
|
||||
and litellm.cache is not None
|
||||
and not isinstance(
|
||||
litellm.cache.cache, S3Cache
|
||||
) # s3 doesn't support bulk writing. Exclude.
|
||||
):
|
||||
remaining_list = []
|
||||
non_null_list = []
|
||||
for idx, cr in enumerate(cached_result):
|
||||
if cr is None:
|
||||
remaining_list.append(kwargs["input"][idx])
|
||||
else:
|
||||
non_null_list.append((idx, cr))
|
||||
original_kwargs_input = kwargs["input"]
|
||||
kwargs["input"] = remaining_list
|
||||
if len(non_null_list) > 0:
|
||||
print_verbose(f"EMBEDDING CACHE HIT! - {len(non_null_list)}")
|
||||
final_embedding_cached_response = EmbeddingResponse(
|
||||
model=kwargs.get("model"),
|
||||
data=[None] * len(original_kwargs_input),
|
||||
)
|
||||
final_embedding_cached_response._hidden_params["cache_hit"] = (
|
||||
True
|
||||
)
|
||||
|
||||
for val in non_null_list:
|
||||
idx, cr = val # (idx, cr) tuple
|
||||
if cr is not None:
|
||||
final_embedding_cached_response.data[idx] = Embedding(
|
||||
embedding=cr["embedding"],
|
||||
index=idx,
|
||||
object="embedding",
|
||||
)
|
||||
if len(remaining_list) == 0:
|
||||
# LOG SUCCESS
|
||||
cache_hit = True
|
||||
end_time = datetime.datetime.now()
|
||||
(
|
||||
model,
|
||||
custom_llm_provider,
|
||||
dynamic_api_key,
|
||||
api_base,
|
||||
) = litellm.get_llm_provider(
|
||||
model=model,
|
||||
custom_llm_provider=kwargs.get("custom_llm_provider", None),
|
||||
api_base=kwargs.get("api_base", None),
|
||||
api_key=kwargs.get("api_key", None),
|
||||
)
|
||||
print_verbose(
|
||||
f"Async Wrapper: Completed Call, calling async_success_handler: {logging_obj.async_success_handler}"
|
||||
)
|
||||
logging_obj.update_environment_variables(
|
||||
model=model,
|
||||
user=kwargs.get("user", None),
|
||||
optional_params={},
|
||||
litellm_params={
|
||||
"logger_fn": kwargs.get("logger_fn", None),
|
||||
"acompletion": True,
|
||||
"metadata": kwargs.get("metadata", {}),
|
||||
"model_info": kwargs.get("model_info", {}),
|
||||
"proxy_server_request": kwargs.get(
|
||||
"proxy_server_request", None
|
||||
),
|
||||
"preset_cache_key": kwargs.get(
|
||||
"preset_cache_key", None
|
||||
),
|
||||
"stream_response": kwargs.get("stream_response", {}),
|
||||
"api_base": "",
|
||||
},
|
||||
input=kwargs.get("messages", ""),
|
||||
api_key=kwargs.get("api_key", None),
|
||||
original_response=str(final_embedding_cached_response),
|
||||
additional_args=None,
|
||||
stream=kwargs.get("stream", False),
|
||||
)
|
||||
asyncio.create_task(
|
||||
logging_obj.async_success_handler(
|
||||
final_embedding_cached_response,
|
||||
start_time,
|
||||
end_time,
|
||||
cache_hit,
|
||||
)
|
||||
)
|
||||
threading.Thread(
|
||||
target=logging_obj.success_handler,
|
||||
args=(
|
||||
final_embedding_cached_response,
|
||||
start_time,
|
||||
end_time,
|
||||
cache_hit,
|
||||
),
|
||||
).start()
|
||||
return CachingHandlerResponse(
|
||||
final_embedding_cached_response=final_embedding_cached_response,
|
||||
embedding_all_elements_cache_hit=True,
|
||||
)
|
||||
return CachingHandlerResponse(
|
||||
cached_result=cached_result,
|
||||
final_embedding_cached_response=final_embedding_cached_response,
|
||||
)
|
||||
|
||||
async def _async_set_cache(
|
||||
self,
|
||||
result: Any,
|
||||
original_function: Callable,
|
||||
kwargs: Dict[str, Any],
|
||||
args: Optional[Tuple[Any, ...]] = None,
|
||||
):
|
||||
"""
|
||||
Internal method to check the type of the result & cache used and adds the result to the cache accordingly
|
||||
|
||||
Args:
|
||||
result: Any:
|
||||
original_function: Callable:
|
||||
kwargs: Dict[str, Any]:
|
||||
args: Optional[Tuple[Any, ...]] = None:
|
||||
|
||||
Returns:
|
||||
None
|
||||
Raises:
|
||||
None
|
||||
"""
|
||||
args = args or ()
|
||||
# [OPTIONAL] ADD TO CACHE
|
||||
if (
|
||||
(litellm.cache is not None)
|
||||
and litellm.cache.supported_call_types is not None
|
||||
and (str(original_function.__name__) in litellm.cache.supported_call_types)
|
||||
and (kwargs.get("cache", {}).get("no-store", False) is not True)
|
||||
):
|
||||
if (
|
||||
isinstance(result, litellm.ModelResponse)
|
||||
or isinstance(result, litellm.EmbeddingResponse)
|
||||
or isinstance(result, TranscriptionResponse)
|
||||
or isinstance(result, RerankResponse)
|
||||
):
|
||||
if (
|
||||
isinstance(result, EmbeddingResponse)
|
||||
and isinstance(kwargs["input"], list)
|
||||
and litellm.cache is not None
|
||||
and not isinstance(
|
||||
litellm.cache.cache, S3Cache
|
||||
) # s3 doesn't support bulk writing. Exclude.
|
||||
):
|
||||
asyncio.create_task(
|
||||
litellm.cache.async_add_cache_pipeline(result, *args, **kwargs)
|
||||
)
|
||||
elif isinstance(litellm.cache.cache, S3Cache):
|
||||
threading.Thread(
|
||||
target=litellm.cache.add_cache,
|
||||
args=(result,) + args,
|
||||
kwargs=kwargs,
|
||||
).start()
|
||||
else:
|
||||
asyncio.create_task(
|
||||
litellm.cache.async_add_cache(result.json(), *args, **kwargs)
|
||||
)
|
||||
else:
|
||||
asyncio.create_task(
|
||||
litellm.cache.async_add_cache(result, *args, **kwargs)
|
||||
)
|
|
@ -43,7 +43,7 @@
|
|||
# ### REDIS
|
||||
# # if len(os.getenv("REDIS_HOST", "")) > 0 and len(os.getenv("REDIS_PORT", "")) > 0 and len(os.getenv("REDIS_PASSWORD", "")) > 0:
|
||||
# # print(f"redis host: {os.getenv('REDIS_HOST')}; redis port: {os.getenv('REDIS_PORT')}; password: {os.getenv('REDIS_PASSWORD')}")
|
||||
# # from litellm.caching import Cache
|
||||
# # from litellm.caching.caching import Cache
|
||||
# # litellm.cache = Cache(type="redis", host=os.getenv("REDIS_HOST"), port=os.getenv("REDIS_PORT"), password=os.getenv("REDIS_PASSWORD"))
|
||||
# # print("\033[92mLiteLLM: Switched on Redis caching\033[0m")
|
||||
|
||||
|
|
|
@ -21,7 +21,7 @@ import litellm.litellm_core_utils
|
|||
import litellm.litellm_core_utils.litellm_logging
|
||||
import litellm.types
|
||||
from litellm._logging import verbose_logger, verbose_proxy_logger
|
||||
from litellm.caching import DualCache
|
||||
from litellm.caching.caching import DualCache
|
||||
from litellm.integrations.custom_batch_logger import CustomBatchLogger
|
||||
from litellm.litellm_core_utils.exception_mapping_utils import (
|
||||
_add_key_name_and_team_to_alert,
|
||||
|
|
|
@ -13,7 +13,7 @@ import requests
|
|||
|
||||
import litellm
|
||||
from litellm._logging import verbose_logger
|
||||
from litellm.caching import DualCache
|
||||
from litellm.caching.caching import DualCache
|
||||
from litellm.proxy._types import UserAPIKeyAuth
|
||||
from litellm.types.utils import StandardLoggingPayload
|
||||
|
||||
|
@ -29,14 +29,30 @@ def create_client():
|
|||
clickhouse_host = os.getenv("CLICKHOUSE_HOST")
|
||||
if clickhouse_host is not None:
|
||||
verbose_logger.debug("setting up clickhouse")
|
||||
|
||||
port = os.getenv("CLICKHOUSE_PORT")
|
||||
if port is not None and isinstance(port, str):
|
||||
port = int(port)
|
||||
|
||||
host: Optional[str] = os.getenv("CLICKHOUSE_HOST")
|
||||
if host is None:
|
||||
raise ValueError("CLICKHOUSE_HOST is not set")
|
||||
|
||||
username: Optional[str] = os.getenv("CLICKHOUSE_USERNAME")
|
||||
if username is None:
|
||||
raise ValueError("CLICKHOUSE_USERNAME is not set")
|
||||
|
||||
password: Optional[str] = os.getenv("CLICKHOUSE_PASSWORD")
|
||||
if password is None:
|
||||
raise ValueError("CLICKHOUSE_PASSWORD is not set")
|
||||
if port is None:
|
||||
raise ValueError("CLICKHOUSE_PORT is not set")
|
||||
|
||||
client = clickhouse_connect.get_client(
|
||||
host=os.getenv("CLICKHOUSE_HOST"),
|
||||
host=host,
|
||||
port=port,
|
||||
username=os.getenv("CLICKHOUSE_USERNAME"),
|
||||
password=os.getenv("CLICKHOUSE_PASSWORD"),
|
||||
username=username,
|
||||
password=password,
|
||||
)
|
||||
return client
|
||||
else:
|
||||
|
@ -176,11 +192,29 @@ def _start_clickhouse():
|
|||
if port is not None and isinstance(port, str):
|
||||
port = int(port)
|
||||
|
||||
port = os.getenv("CLICKHOUSE_PORT")
|
||||
if port is not None and isinstance(port, str):
|
||||
port = int(port)
|
||||
|
||||
host: Optional[str] = os.getenv("CLICKHOUSE_HOST")
|
||||
if host is None:
|
||||
raise ValueError("CLICKHOUSE_HOST is not set")
|
||||
|
||||
username: Optional[str] = os.getenv("CLICKHOUSE_USERNAME")
|
||||
if username is None:
|
||||
raise ValueError("CLICKHOUSE_USERNAME is not set")
|
||||
|
||||
password: Optional[str] = os.getenv("CLICKHOUSE_PASSWORD")
|
||||
if password is None:
|
||||
raise ValueError("CLICKHOUSE_PASSWORD is not set")
|
||||
if port is None:
|
||||
raise ValueError("CLICKHOUSE_PORT is not set")
|
||||
|
||||
client = clickhouse_connect.get_client(
|
||||
host=os.getenv("CLICKHOUSE_HOST"),
|
||||
host=host,
|
||||
port=port,
|
||||
username=os.getenv("CLICKHOUSE_USERNAME"),
|
||||
password=os.getenv("CLICKHOUSE_PASSWORD"),
|
||||
username=username,
|
||||
password=password,
|
||||
)
|
||||
# view all tables in DB
|
||||
response = client.query("SHOW TABLES")
|
||||
|
@ -241,11 +275,25 @@ class ClickhouseLogger:
|
|||
if port is not None and isinstance(port, str):
|
||||
port = int(port)
|
||||
|
||||
host: Optional[str] = os.getenv("CLICKHOUSE_HOST")
|
||||
if host is None:
|
||||
raise ValueError("CLICKHOUSE_HOST is not set")
|
||||
|
||||
username: Optional[str] = os.getenv("CLICKHOUSE_USERNAME")
|
||||
if username is None:
|
||||
raise ValueError("CLICKHOUSE_USERNAME is not set")
|
||||
|
||||
password: Optional[str] = os.getenv("CLICKHOUSE_PASSWORD")
|
||||
if password is None:
|
||||
raise ValueError("CLICKHOUSE_PASSWORD is not set")
|
||||
if port is None:
|
||||
raise ValueError("CLICKHOUSE_PORT is not set")
|
||||
|
||||
client = clickhouse_connect.get_client(
|
||||
host=os.getenv("CLICKHOUSE_HOST"),
|
||||
host=host,
|
||||
port=port,
|
||||
username=os.getenv("CLICKHOUSE_USERNAME"),
|
||||
password=os.getenv("CLICKHOUSE_PASSWORD"),
|
||||
username=username,
|
||||
password=password,
|
||||
)
|
||||
self.client = client
|
||||
|
||||
|
|
|
@ -8,7 +8,7 @@ from typing import Any, Literal, Optional, Tuple, Union
|
|||
import dotenv
|
||||
from pydantic import BaseModel
|
||||
|
||||
from litellm.caching import DualCache
|
||||
from litellm.caching.caching import DualCache
|
||||
from litellm.proxy._types import UserAPIKeyAuth
|
||||
from litellm.types.llms.openai import ChatCompletionRequest
|
||||
from litellm.types.services import ServiceLoggerPayload
|
||||
|
|
|
@ -23,7 +23,7 @@ from litellm import (
|
|||
turn_off_message_logging,
|
||||
verbose_logger,
|
||||
)
|
||||
from litellm.caching import DualCache, InMemoryCache, S3Cache
|
||||
from litellm.caching.caching import DualCache, InMemoryCache, S3Cache
|
||||
from litellm.cost_calculator import _select_model_name_for_cost_calc
|
||||
from litellm.integrations.custom_guardrail import CustomGuardrail
|
||||
from litellm.integrations.custom_logger import CustomLogger
|
||||
|
|
|
@ -10,7 +10,7 @@ from openai import AsyncAzureOpenAI, AzureOpenAI
|
|||
from typing_extensions import overload
|
||||
|
||||
import litellm
|
||||
from litellm.caching import DualCache
|
||||
from litellm.caching.caching import DualCache
|
||||
from litellm.litellm_core_utils.litellm_logging import Logging as LiteLLMLoggingObj
|
||||
from litellm.llms.custom_httpx.http_handler import AsyncHTTPHandler, HTTPHandler
|
||||
from litellm.types.utils import EmbeddingResponse
|
||||
|
|
|
@ -6,7 +6,7 @@ from typing import Dict, List, Optional, Tuple
|
|||
import httpx
|
||||
|
||||
from litellm._logging import verbose_logger
|
||||
from litellm.caching import DualCache, InMemoryCache
|
||||
from litellm.caching.caching import DualCache, InMemoryCache
|
||||
from litellm.secret_managers.main import get_secret
|
||||
|
||||
from .base import BaseLLM
|
||||
|
|
|
@ -29,7 +29,7 @@ import requests # type: ignore
|
|||
|
||||
import litellm
|
||||
from litellm import verbose_logger
|
||||
from litellm.caching import InMemoryCache
|
||||
from litellm.caching.caching import InMemoryCache
|
||||
from litellm.litellm_core_utils.core_helpers import map_finish_reason
|
||||
from litellm.litellm_core_utils.litellm_logging import Logging
|
||||
from litellm.llms.custom_httpx.http_handler import (
|
||||
|
|
|
@ -8,7 +8,7 @@ from httpx import Response
|
|||
|
||||
import litellm
|
||||
from litellm import verbose_logger
|
||||
from litellm.caching import InMemoryCache
|
||||
from litellm.caching.caching import InMemoryCache
|
||||
from litellm.llms.custom_httpx.http_handler import (
|
||||
_get_httpx_client,
|
||||
get_async_httpx_client,
|
||||
|
|
|
@ -4,7 +4,7 @@ from typing import Callable, List, Literal, Optional, Tuple, Union
|
|||
import httpx
|
||||
|
||||
import litellm
|
||||
from litellm.caching import Cache
|
||||
from litellm.caching.caching import Cache
|
||||
from litellm.litellm_core_utils.litellm_logging import Logging
|
||||
from litellm.llms.custom_httpx.http_handler import AsyncHTTPHandler, HTTPHandler
|
||||
from litellm.llms.OpenAI.openai import AllMessageValues
|
||||
|
|
|
@ -65,7 +65,7 @@ from litellm.utils import (
|
|||
)
|
||||
|
||||
from ._logging import verbose_logger
|
||||
from .caching import disable_cache, enable_cache, update_cache
|
||||
from .caching.caching import disable_cache, enable_cache, update_cache
|
||||
from .llms import (
|
||||
aleph_alpha,
|
||||
baseten,
|
||||
|
|
|
@ -16,7 +16,7 @@ from pydantic import BaseModel
|
|||
|
||||
import litellm
|
||||
from litellm._logging import verbose_proxy_logger
|
||||
from litellm.caching import DualCache
|
||||
from litellm.caching.caching import DualCache
|
||||
from litellm.proxy._types import (
|
||||
LiteLLM_EndUserTable,
|
||||
LiteLLM_JWTAuth,
|
||||
|
|
|
@ -15,7 +15,7 @@ from cryptography.hazmat.backends import default_backend
|
|||
from cryptography.hazmat.primitives import serialization
|
||||
|
||||
from litellm._logging import verbose_proxy_logger
|
||||
from litellm.caching import DualCache
|
||||
from litellm.caching.caching import DualCache
|
||||
from litellm.llms.custom_httpx.httpx_handler import HTTPHandler
|
||||
from litellm.proxy._types import LiteLLM_JWTAuth, LiteLLM_UserTable
|
||||
from litellm.proxy.utils import PrismaClient
|
||||
|
|
|
@ -5,7 +5,7 @@ from fastapi import APIRouter, Depends, HTTPException, Request
|
|||
|
||||
import litellm
|
||||
from litellm._logging import verbose_proxy_logger
|
||||
from litellm.caching import RedisCache
|
||||
from litellm.caching.caching import RedisCache
|
||||
from litellm.proxy.auth.user_api_key_auth import user_api_key_auth
|
||||
|
||||
router = APIRouter(
|
||||
|
|
|
@ -2,7 +2,7 @@ from typing import Any, Dict, List, Literal, Optional, Union
|
|||
|
||||
import litellm
|
||||
from litellm._logging import verbose_proxy_logger
|
||||
from litellm.caching import DualCache
|
||||
from litellm.caching.caching import DualCache
|
||||
from litellm.integrations.custom_guardrail import CustomGuardrail
|
||||
from litellm.proxy._types import UserAPIKeyAuth
|
||||
from litellm.proxy.guardrails.guardrail_helpers import should_proceed_based_on_metadata
|
||||
|
|
|
@ -25,7 +25,7 @@ from fastapi import HTTPException
|
|||
|
||||
import litellm
|
||||
from litellm._logging import verbose_proxy_logger
|
||||
from litellm.caching import DualCache
|
||||
from litellm.caching.caching import DualCache
|
||||
from litellm.integrations.custom_guardrail import CustomGuardrail
|
||||
from litellm.litellm_core_utils.logging_utils import (
|
||||
convert_litellm_response_object_to_str,
|
||||
|
|
|
@ -25,7 +25,7 @@ from fastapi import HTTPException
|
|||
|
||||
import litellm
|
||||
from litellm._logging import verbose_proxy_logger
|
||||
from litellm.caching import DualCache
|
||||
from litellm.caching.caching import DualCache
|
||||
from litellm.integrations.custom_guardrail import CustomGuardrail
|
||||
from litellm.litellm_core_utils.logging_utils import (
|
||||
convert_litellm_response_object_to_str,
|
||||
|
|
|
@ -2,7 +2,7 @@ from typing import Any, Dict, List, Literal, Optional, Union
|
|||
|
||||
import litellm
|
||||
from litellm._logging import verbose_proxy_logger
|
||||
from litellm.caching import DualCache
|
||||
from litellm.caching.caching import DualCache
|
||||
from litellm.integrations.custom_guardrail import CustomGuardrail
|
||||
from litellm.proxy._types import UserAPIKeyAuth
|
||||
from litellm.proxy.guardrails.guardrail_helpers import should_proceed_based_on_metadata
|
||||
|
|
|
@ -21,7 +21,7 @@ from pydantic import BaseModel
|
|||
import litellm # noqa: E401
|
||||
from litellm import get_secret
|
||||
from litellm._logging import verbose_proxy_logger
|
||||
from litellm.caching import DualCache
|
||||
from litellm.caching.caching import DualCache
|
||||
from litellm.integrations.custom_guardrail import CustomGuardrail
|
||||
from litellm.proxy._types import UserAPIKeyAuth
|
||||
from litellm.utils import (
|
||||
|
|
|
@ -465,7 +465,7 @@ async def health_readiness():
|
|||
# check Cache
|
||||
cache_type = None
|
||||
if litellm.cache is not None:
|
||||
from litellm.caching import RedisSemanticCache
|
||||
from litellm.caching.caching import RedisSemanticCache
|
||||
|
||||
cache_type = litellm.cache.type
|
||||
|
||||
|
|
|
@ -7,7 +7,7 @@ from fastapi import HTTPException
|
|||
|
||||
import litellm
|
||||
from litellm._logging import verbose_proxy_logger
|
||||
from litellm.caching import DualCache
|
||||
from litellm.caching.caching import DualCache
|
||||
from litellm.integrations.custom_logger import CustomLogger
|
||||
from litellm.proxy._types import UserAPIKeyAuth
|
||||
|
||||
|
|
|
@ -11,7 +11,7 @@ from fastapi import HTTPException
|
|||
|
||||
import litellm
|
||||
from litellm._logging import verbose_proxy_logger
|
||||
from litellm.caching import DualCache, InMemoryCache, RedisCache
|
||||
from litellm.caching.caching import DualCache, InMemoryCache, RedisCache
|
||||
from litellm.integrations.custom_logger import CustomLogger
|
||||
from litellm.proxy._types import UserAPIKeyAuth
|
||||
|
||||
|
|
|
@ -7,7 +7,7 @@ from fastapi import HTTPException
|
|||
|
||||
import litellm
|
||||
from litellm import verbose_logger
|
||||
from litellm.caching import DualCache
|
||||
from litellm.caching.caching import DualCache
|
||||
from litellm.integrations.custom_logger import CustomLogger
|
||||
from litellm.proxy._types import UserAPIKeyAuth
|
||||
|
||||
|
|
|
@ -14,7 +14,7 @@ from fastapi import HTTPException
|
|||
import litellm
|
||||
from litellm import ModelResponse, Router
|
||||
from litellm._logging import verbose_proxy_logger
|
||||
from litellm.caching import DualCache
|
||||
from litellm.caching.caching import DualCache
|
||||
from litellm.integrations.custom_logger import CustomLogger
|
||||
from litellm.proxy._types import UserAPIKeyAuth
|
||||
from litellm.types.router import ModelGroupInfo
|
||||
|
|
|
@ -4,7 +4,7 @@ from fastapi import HTTPException
|
|||
|
||||
import litellm
|
||||
from litellm import verbose_logger
|
||||
from litellm.caching import DualCache
|
||||
from litellm.caching.caching import DualCache
|
||||
from litellm.integrations.custom_logger import CustomLogger
|
||||
from litellm.proxy._types import UserAPIKeyAuth
|
||||
|
||||
|
|
|
@ -9,7 +9,7 @@ from pydantic import BaseModel
|
|||
import litellm
|
||||
from litellm import ModelResponse
|
||||
from litellm._logging import verbose_proxy_logger
|
||||
from litellm.caching import DualCache
|
||||
from litellm.caching.caching import DualCache
|
||||
from litellm.integrations.custom_logger import CustomLogger
|
||||
from litellm.litellm_core_utils.core_helpers import _get_parent_otel_span_from_kwargs
|
||||
from litellm.proxy._types import CurrentItemRateLimit, UserAPIKeyAuth
|
||||
|
|
|
@ -19,7 +19,7 @@ from fastapi import HTTPException
|
|||
|
||||
import litellm # noqa: E401
|
||||
from litellm._logging import verbose_proxy_logger
|
||||
from litellm.caching import DualCache
|
||||
from litellm.caching.caching import DualCache
|
||||
from litellm.integrations.custom_logger import CustomLogger
|
||||
from litellm.proxy._types import UserAPIKeyAuth
|
||||
from litellm.utils import (
|
||||
|
|
|
@ -18,7 +18,7 @@ from typing_extensions import overload
|
|||
|
||||
import litellm
|
||||
from litellm._logging import verbose_proxy_logger
|
||||
from litellm.caching import DualCache
|
||||
from litellm.caching.caching import DualCache
|
||||
from litellm.integrations.custom_logger import CustomLogger
|
||||
from litellm.llms.prompt_templates.factory import prompt_injection_detection_default_pt
|
||||
from litellm.proxy._types import LiteLLMPromptInjectionParams, UserAPIKeyAuth
|
||||
|
|
|
@ -112,7 +112,7 @@ from litellm import (
|
|||
RetrieveBatchRequest,
|
||||
)
|
||||
from litellm._logging import verbose_proxy_logger, verbose_router_logger
|
||||
from litellm.caching import DualCache, RedisCache
|
||||
from litellm.caching.caching import DualCache, RedisCache
|
||||
from litellm.exceptions import RejectedRequestError
|
||||
from litellm.integrations.SlackAlerting.slack_alerting import SlackAlerting
|
||||
from litellm.litellm_core_utils.core_helpers import (
|
||||
|
@ -1554,7 +1554,7 @@ class ProxyConfig:
|
|||
for key, value in litellm_settings.items():
|
||||
if key == "cache" and value is True:
|
||||
print(f"{blue_color_code}\nSetting Cache on Proxy") # noqa
|
||||
from litellm.caching import Cache
|
||||
from litellm.caching.caching import Cache
|
||||
|
||||
cache_params = {}
|
||||
if "cache_params" in litellm_settings:
|
||||
|
|
|
@ -49,7 +49,7 @@ from litellm import (
|
|||
)
|
||||
from litellm._logging import verbose_proxy_logger
|
||||
from litellm._service_logger import ServiceLogging, ServiceTypes
|
||||
from litellm.caching import DualCache, RedisCache
|
||||
from litellm.caching.caching import DualCache, RedisCache
|
||||
from litellm.exceptions import RejectedRequestError
|
||||
from litellm.integrations.custom_guardrail import CustomGuardrail
|
||||
from litellm.integrations.custom_logger import CustomLogger
|
||||
|
|
|
@ -37,7 +37,7 @@ import litellm.litellm_core_utils.exception_mapping_utils
|
|||
from litellm import get_secret_str
|
||||
from litellm._logging import verbose_router_logger
|
||||
from litellm.assistants.main import AssistantDeleted
|
||||
from litellm.caching import DualCache, InMemoryCache, RedisCache
|
||||
from litellm.caching.caching import DualCache, InMemoryCache, RedisCache
|
||||
from litellm.integrations.custom_logger import CustomLogger
|
||||
from litellm.litellm_core_utils.litellm_logging import Logging as LiteLLMLogging
|
||||
from litellm.llms.AzureOpenAI.azure import get_azure_ad_token_from_oidc
|
||||
|
|
|
@ -14,7 +14,7 @@ from typing import Optional
|
|||
import dotenv # type: ignore
|
||||
import requests
|
||||
|
||||
from litellm.caching import DualCache
|
||||
from litellm.caching.caching import DualCache
|
||||
from litellm.integrations.custom_logger import CustomLogger
|
||||
|
||||
|
||||
|
|
|
@ -9,7 +9,7 @@ from pydantic import BaseModel
|
|||
import litellm
|
||||
from litellm import ModelResponse, token_counter, verbose_logger
|
||||
from litellm._logging import verbose_router_logger
|
||||
from litellm.caching import DualCache
|
||||
from litellm.caching.caching import DualCache
|
||||
from litellm.integrations.custom_logger import CustomLogger
|
||||
|
||||
|
||||
|
|
|
@ -9,7 +9,7 @@ from pydantic import BaseModel
|
|||
|
||||
import litellm
|
||||
from litellm import ModelResponse, token_counter, verbose_logger
|
||||
from litellm.caching import DualCache
|
||||
from litellm.caching.caching import DualCache
|
||||
from litellm.integrations.custom_logger import CustomLogger
|
||||
|
||||
|
||||
|
|
|
@ -12,7 +12,7 @@ from pydantic import BaseModel
|
|||
|
||||
from litellm import token_counter
|
||||
from litellm._logging import verbose_router_logger
|
||||
from litellm.caching import DualCache
|
||||
from litellm.caching.caching import DualCache
|
||||
from litellm.integrations.custom_logger import CustomLogger
|
||||
from litellm.utils import print_verbose
|
||||
|
||||
|
|
|
@ -10,7 +10,7 @@ from pydantic import BaseModel
|
|||
import litellm
|
||||
from litellm import token_counter
|
||||
from litellm._logging import verbose_logger, verbose_router_logger
|
||||
from litellm.caching import DualCache
|
||||
from litellm.caching.caching import DualCache
|
||||
from litellm.integrations.custom_logger import CustomLogger
|
||||
from litellm.types.router import RouterErrors
|
||||
from litellm.utils import get_utc_datetime, print_verbose
|
||||
|
|
|
@ -7,7 +7,7 @@ import time
|
|||
from typing import List, Optional, Tuple, TypedDict
|
||||
|
||||
from litellm import verbose_logger
|
||||
from litellm.caching import DualCache
|
||||
from litellm.caching.caching import DualCache
|
||||
|
||||
|
||||
class CooldownCacheValue(TypedDict):
|
||||
|
|
|
@ -1,9 +1,11 @@
|
|||
import heapq
|
||||
from pydantic import BaseModel
|
||||
from typing import Optional
|
||||
import enum
|
||||
from litellm.caching import DualCache, RedisCache
|
||||
import heapq
|
||||
from typing import Optional
|
||||
|
||||
from pydantic import BaseModel
|
||||
|
||||
from litellm import print_verbose
|
||||
from litellm.caching.caching import DualCache, RedisCache
|
||||
|
||||
|
||||
class SchedulerCacheKeys(enum.Enum):
|
||||
|
|
|
@ -4,7 +4,7 @@ from typing import Optional
|
|||
|
||||
import litellm
|
||||
from litellm._logging import verbose_logger
|
||||
from litellm.caching import InMemoryCache
|
||||
from litellm.caching.caching import InMemoryCache
|
||||
from litellm.integrations.gcs_bucket.gcs_bucket_base import GCSBucketBase
|
||||
from litellm.llms.custom_httpx.http_handler import _get_httpx_client
|
||||
from litellm.proxy._types import CommonProxyErrors, KeyManagementSystem
|
||||
|
|
|
@ -12,7 +12,7 @@ from dotenv import load_dotenv
|
|||
|
||||
import litellm
|
||||
from litellm._logging import print_verbose, verbose_logger
|
||||
from litellm.caching import DualCache
|
||||
from litellm.caching.caching import DualCache
|
||||
from litellm.llms.custom_httpx.http_handler import AsyncHTTPHandler, HTTPHandler
|
||||
from litellm.proxy._types import KeyManagementSystem
|
||||
|
||||
|
|
383
litellm/utils.py
383
litellm/utils.py
|
@ -56,7 +56,10 @@ import litellm._service_logger # for storing API inputs, outputs, and metadata
|
|||
import litellm.litellm_core_utils
|
||||
import litellm.litellm_core_utils.audio_utils.utils
|
||||
import litellm.litellm_core_utils.json_validation_rule
|
||||
from litellm.caching import DualCache
|
||||
from litellm.caching.caching import DualCache
|
||||
from litellm.caching.caching_handler import CachingHandlerResponse, LLMCachingHandler
|
||||
|
||||
_llm_caching_handler = LLMCachingHandler()
|
||||
from litellm.integrations.custom_logger import CustomLogger
|
||||
from litellm.litellm_core_utils.core_helpers import map_finish_reason
|
||||
from litellm.litellm_core_utils.exception_mapping_utils import (
|
||||
|
@ -146,7 +149,13 @@ from typing import (
|
|||
from openai import OpenAIError as OriginalError
|
||||
|
||||
from ._logging import verbose_logger
|
||||
from .caching import Cache, QdrantSemanticCache, RedisCache, RedisSemanticCache, S3Cache
|
||||
from .caching.caching import (
|
||||
Cache,
|
||||
QdrantSemanticCache,
|
||||
RedisCache,
|
||||
RedisSemanticCache,
|
||||
S3Cache,
|
||||
)
|
||||
from .exceptions import (
|
||||
APIConnectionError,
|
||||
APIError,
|
||||
|
@ -1121,299 +1130,26 @@ def client(original_function):
|
|||
print_verbose(
|
||||
f"ASYNC kwargs[caching]: {kwargs.get('caching', False)}; litellm.cache: {litellm.cache}; kwargs.get('cache'): {kwargs.get('cache', None)}"
|
||||
)
|
||||
# if caching is false, don't run this
|
||||
final_embedding_cached_response = None
|
||||
|
||||
if (
|
||||
(kwargs.get("caching", None) is None and litellm.cache is not None)
|
||||
or kwargs.get("caching", False) is True
|
||||
) and (
|
||||
kwargs.get("cache", {}).get("no-cache", False) is not True
|
||||
): # allow users to control returning cached responses from the completion function
|
||||
# checking cache
|
||||
print_verbose("INSIDE CHECKING CACHE")
|
||||
if (
|
||||
litellm.cache is not None
|
||||
and litellm.cache.supported_call_types is not None
|
||||
and str(original_function.__name__)
|
||||
in litellm.cache.supported_call_types
|
||||
):
|
||||
print_verbose("Checking Cache")
|
||||
if call_type == CallTypes.aembedding.value and isinstance(
|
||||
kwargs["input"], list
|
||||
):
|
||||
tasks = []
|
||||
for idx, i in enumerate(kwargs["input"]):
|
||||
preset_cache_key = litellm.cache.get_cache_key(
|
||||
*args, **{**kwargs, "input": i}
|
||||
)
|
||||
tasks.append(
|
||||
litellm.cache.async_get_cache(
|
||||
cache_key=preset_cache_key
|
||||
)
|
||||
)
|
||||
cached_result = await asyncio.gather(*tasks)
|
||||
## check if cached result is None ##
|
||||
if cached_result is not None and isinstance(
|
||||
cached_result, list
|
||||
):
|
||||
if len(cached_result) == 1 and cached_result[0] is None:
|
||||
cached_result = None
|
||||
elif isinstance(
|
||||
litellm.cache.cache, RedisSemanticCache
|
||||
) or isinstance(litellm.cache.cache, RedisCache):
|
||||
preset_cache_key = litellm.cache.get_cache_key(*args, **kwargs)
|
||||
kwargs["preset_cache_key"] = (
|
||||
preset_cache_key # for streaming calls, we need to pass the preset_cache_key
|
||||
)
|
||||
cached_result = await litellm.cache.async_get_cache(
|
||||
*args, **kwargs
|
||||
)
|
||||
elif isinstance(litellm.cache.cache, QdrantSemanticCache):
|
||||
preset_cache_key = litellm.cache.get_cache_key(*args, **kwargs)
|
||||
kwargs["preset_cache_key"] = (
|
||||
preset_cache_key # for streaming calls, we need to pass the preset_cache_key
|
||||
)
|
||||
cached_result = await litellm.cache.async_get_cache(
|
||||
*args, **kwargs
|
||||
)
|
||||
else: # for s3 caching. [NOT RECOMMENDED IN PROD - this will slow down responses since boto3 is sync]
|
||||
preset_cache_key = litellm.cache.get_cache_key(*args, **kwargs)
|
||||
kwargs["preset_cache_key"] = (
|
||||
preset_cache_key # for streaming calls, we need to pass the preset_cache_key
|
||||
)
|
||||
cached_result = litellm.cache.get_cache(*args, **kwargs)
|
||||
if cached_result is not None and not isinstance(
|
||||
cached_result, list
|
||||
):
|
||||
print_verbose("Cache Hit!", log_level="INFO")
|
||||
cache_hit = True
|
||||
end_time = datetime.datetime.now()
|
||||
(
|
||||
model,
|
||||
custom_llm_provider,
|
||||
dynamic_api_key,
|
||||
api_base,
|
||||
) = litellm.get_llm_provider(
|
||||
_caching_handler_response: CachingHandlerResponse = (
|
||||
await _llm_caching_handler._async_get_cache(
|
||||
model=model,
|
||||
custom_llm_provider=kwargs.get("custom_llm_provider", None),
|
||||
api_base=kwargs.get("api_base", None),
|
||||
api_key=kwargs.get("api_key", None),
|
||||
)
|
||||
print_verbose(
|
||||
f"Async Wrapper: Completed Call, calling async_success_handler: {logging_obj.async_success_handler}"
|
||||
)
|
||||
logging_obj.update_environment_variables(
|
||||
model=model,
|
||||
user=kwargs.get("user", None),
|
||||
optional_params={},
|
||||
litellm_params={
|
||||
"logger_fn": kwargs.get("logger_fn", None),
|
||||
"acompletion": True,
|
||||
"metadata": kwargs.get("metadata", {}),
|
||||
"model_info": kwargs.get("model_info", {}),
|
||||
"proxy_server_request": kwargs.get(
|
||||
"proxy_server_request", None
|
||||
),
|
||||
"preset_cache_key": kwargs.get(
|
||||
"preset_cache_key", None
|
||||
),
|
||||
"stream_response": kwargs.get("stream_response", {}),
|
||||
"api_base": kwargs.get("api_base", ""),
|
||||
},
|
||||
input=kwargs.get("messages", ""),
|
||||
api_key=kwargs.get("api_key", None),
|
||||
original_response=str(cached_result),
|
||||
additional_args=None,
|
||||
stream=kwargs.get("stream", False),
|
||||
)
|
||||
call_type = original_function.__name__
|
||||
if call_type == CallTypes.acompletion.value and isinstance(
|
||||
cached_result, dict
|
||||
):
|
||||
if kwargs.get("stream", False) is True:
|
||||
cached_result = convert_to_streaming_response_async(
|
||||
response_object=cached_result,
|
||||
)
|
||||
cached_result = CustomStreamWrapper(
|
||||
completion_stream=cached_result,
|
||||
model=model,
|
||||
custom_llm_provider="cached_response",
|
||||
original_function=original_function,
|
||||
logging_obj=logging_obj,
|
||||
start_time=start_time,
|
||||
call_type=call_type,
|
||||
kwargs=kwargs,
|
||||
args=args,
|
||||
)
|
||||
else:
|
||||
cached_result = convert_to_model_response_object(
|
||||
response_object=cached_result,
|
||||
model_response_object=ModelResponse(),
|
||||
)
|
||||
if (
|
||||
call_type == CallTypes.atext_completion.value
|
||||
and isinstance(cached_result, dict)
|
||||
_caching_handler_response.cached_result is not None
|
||||
and _caching_handler_response.final_embedding_cached_response is None
|
||||
):
|
||||
if kwargs.get("stream", False) is True:
|
||||
cached_result = convert_to_streaming_response_async(
|
||||
response_object=cached_result,
|
||||
)
|
||||
cached_result = CustomStreamWrapper(
|
||||
completion_stream=cached_result,
|
||||
model=model,
|
||||
custom_llm_provider="cached_response",
|
||||
logging_obj=logging_obj,
|
||||
)
|
||||
else:
|
||||
cached_result = TextCompletionResponse(**cached_result)
|
||||
elif call_type == CallTypes.aembedding.value and isinstance(
|
||||
cached_result, dict
|
||||
):
|
||||
cached_result = convert_to_model_response_object(
|
||||
response_object=cached_result,
|
||||
model_response_object=EmbeddingResponse(),
|
||||
response_type="embedding",
|
||||
)
|
||||
elif call_type == CallTypes.arerank.value and isinstance(
|
||||
cached_result, dict
|
||||
):
|
||||
cached_result = convert_to_model_response_object(
|
||||
response_object=cached_result,
|
||||
model_response_object=None,
|
||||
response_type="rerank",
|
||||
)
|
||||
elif call_type == CallTypes.atranscription.value and isinstance(
|
||||
cached_result, dict
|
||||
):
|
||||
hidden_params = {
|
||||
"model": "whisper-1",
|
||||
"custom_llm_provider": custom_llm_provider,
|
||||
"cache_hit": True,
|
||||
}
|
||||
cached_result = convert_to_model_response_object(
|
||||
response_object=cached_result,
|
||||
model_response_object=TranscriptionResponse(),
|
||||
response_type="audio_transcription",
|
||||
hidden_params=hidden_params,
|
||||
)
|
||||
if kwargs.get("stream", False) is False:
|
||||
# LOG SUCCESS
|
||||
asyncio.create_task(
|
||||
logging_obj.async_success_handler(
|
||||
cached_result, start_time, end_time, cache_hit
|
||||
)
|
||||
)
|
||||
threading.Thread(
|
||||
target=logging_obj.success_handler,
|
||||
args=(cached_result, start_time, end_time, cache_hit),
|
||||
).start()
|
||||
cache_key = kwargs.get("preset_cache_key", None)
|
||||
if (
|
||||
isinstance(cached_result, BaseModel)
|
||||
or isinstance(cached_result, CustomStreamWrapper)
|
||||
) and hasattr(cached_result, "_hidden_params"):
|
||||
cached_result._hidden_params["cache_key"] = cache_key # type: ignore
|
||||
return cached_result
|
||||
elif (
|
||||
call_type == CallTypes.aembedding.value
|
||||
and cached_result is not None
|
||||
and isinstance(cached_result, list)
|
||||
and litellm.cache is not None
|
||||
and not isinstance(
|
||||
litellm.cache.cache, S3Cache
|
||||
) # s3 doesn't support bulk writing. Exclude.
|
||||
):
|
||||
remaining_list = []
|
||||
non_null_list = []
|
||||
for idx, cr in enumerate(cached_result):
|
||||
if cr is None:
|
||||
remaining_list.append(kwargs["input"][idx])
|
||||
else:
|
||||
non_null_list.append((idx, cr))
|
||||
original_kwargs_input = kwargs["input"]
|
||||
kwargs["input"] = remaining_list
|
||||
if len(non_null_list) > 0:
|
||||
print_verbose(
|
||||
f"EMBEDDING CACHE HIT! - {len(non_null_list)}"
|
||||
)
|
||||
final_embedding_cached_response = EmbeddingResponse(
|
||||
model=kwargs.get("model"),
|
||||
data=[None] * len(original_kwargs_input),
|
||||
)
|
||||
final_embedding_cached_response._hidden_params[
|
||||
"cache_hit"
|
||||
] = True
|
||||
return _caching_handler_response.cached_result
|
||||
|
||||
elif _caching_handler_response.embedding_all_elements_cache_hit is True:
|
||||
return _caching_handler_response.final_embedding_cached_response
|
||||
|
||||
for val in non_null_list:
|
||||
idx, cr = val # (idx, cr) tuple
|
||||
if cr is not None:
|
||||
final_embedding_cached_response.data[idx] = (
|
||||
Embedding(
|
||||
embedding=cr["embedding"],
|
||||
index=idx,
|
||||
object="embedding",
|
||||
)
|
||||
)
|
||||
if len(remaining_list) == 0:
|
||||
# LOG SUCCESS
|
||||
cache_hit = True
|
||||
end_time = datetime.datetime.now()
|
||||
(
|
||||
model,
|
||||
custom_llm_provider,
|
||||
dynamic_api_key,
|
||||
api_base,
|
||||
) = litellm.get_llm_provider(
|
||||
model=model,
|
||||
custom_llm_provider=kwargs.get(
|
||||
"custom_llm_provider", None
|
||||
),
|
||||
api_base=kwargs.get("api_base", None),
|
||||
api_key=kwargs.get("api_key", None),
|
||||
)
|
||||
print_verbose(
|
||||
f"Async Wrapper: Completed Call, calling async_success_handler: {logging_obj.async_success_handler}"
|
||||
)
|
||||
logging_obj.update_environment_variables(
|
||||
model=model,
|
||||
user=kwargs.get("user", None),
|
||||
optional_params={},
|
||||
litellm_params={
|
||||
"logger_fn": kwargs.get("logger_fn", None),
|
||||
"acompletion": True,
|
||||
"metadata": kwargs.get("metadata", {}),
|
||||
"model_info": kwargs.get("model_info", {}),
|
||||
"proxy_server_request": kwargs.get(
|
||||
"proxy_server_request", None
|
||||
),
|
||||
"preset_cache_key": kwargs.get(
|
||||
"preset_cache_key", None
|
||||
),
|
||||
"stream_response": kwargs.get(
|
||||
"stream_response", {}
|
||||
),
|
||||
"api_base": "",
|
||||
},
|
||||
input=kwargs.get("messages", ""),
|
||||
api_key=kwargs.get("api_key", None),
|
||||
original_response=str(final_embedding_cached_response),
|
||||
additional_args=None,
|
||||
stream=kwargs.get("stream", False),
|
||||
)
|
||||
asyncio.create_task(
|
||||
logging_obj.async_success_handler(
|
||||
final_embedding_cached_response,
|
||||
start_time,
|
||||
end_time,
|
||||
cache_hit,
|
||||
)
|
||||
)
|
||||
threading.Thread(
|
||||
target=logging_obj.success_handler,
|
||||
args=(
|
||||
final_embedding_cached_response,
|
||||
start_time,
|
||||
end_time,
|
||||
cache_hit,
|
||||
),
|
||||
).start()
|
||||
return final_embedding_cached_response
|
||||
# MODEL CALL
|
||||
result = await original_function(*args, **kwargs)
|
||||
end_time = datetime.datetime.now()
|
||||
|
@ -1467,51 +1203,14 @@ def client(original_function):
|
|||
original_response=result, model=model, optional_params=kwargs
|
||||
)
|
||||
|
||||
# [OPTIONAL] ADD TO CACHE
|
||||
if (
|
||||
(litellm.cache is not None)
|
||||
and litellm.cache.supported_call_types is not None
|
||||
and (
|
||||
str(original_function.__name__)
|
||||
in litellm.cache.supported_call_types
|
||||
)
|
||||
and (kwargs.get("cache", {}).get("no-store", False) is not True)
|
||||
):
|
||||
if (
|
||||
isinstance(result, litellm.ModelResponse)
|
||||
or isinstance(result, litellm.EmbeddingResponse)
|
||||
or isinstance(result, TranscriptionResponse)
|
||||
or isinstance(result, RerankResponse)
|
||||
):
|
||||
if (
|
||||
isinstance(result, EmbeddingResponse)
|
||||
and isinstance(kwargs["input"], list)
|
||||
and litellm.cache is not None
|
||||
and not isinstance(
|
||||
litellm.cache.cache, S3Cache
|
||||
) # s3 doesn't support bulk writing. Exclude.
|
||||
):
|
||||
asyncio.create_task(
|
||||
litellm.cache.async_add_cache_pipeline(
|
||||
result, *args, **kwargs
|
||||
)
|
||||
)
|
||||
elif isinstance(litellm.cache.cache, S3Cache):
|
||||
threading.Thread(
|
||||
target=litellm.cache.add_cache,
|
||||
args=(result,) + args,
|
||||
## Add response to cache
|
||||
await _llm_caching_handler._async_set_cache(
|
||||
result=result,
|
||||
original_function=original_function,
|
||||
kwargs=kwargs,
|
||||
).start()
|
||||
else:
|
||||
asyncio.create_task(
|
||||
litellm.cache.async_add_cache(
|
||||
result.json(), *args, **kwargs
|
||||
)
|
||||
)
|
||||
else:
|
||||
asyncio.create_task(
|
||||
litellm.cache.async_add_cache(result, *args, **kwargs)
|
||||
args=args,
|
||||
)
|
||||
|
||||
# LOG SUCCESS - handle streaming success logging in the _next_ object
|
||||
print_verbose(
|
||||
f"Async Wrapper: Completed Call, calling async_success_handler: {logging_obj.async_success_handler}"
|
||||
|
@ -1528,24 +1227,32 @@ def client(original_function):
|
|||
# REBUILD EMBEDDING CACHING
|
||||
if (
|
||||
isinstance(result, EmbeddingResponse)
|
||||
and final_embedding_cached_response is not None
|
||||
and final_embedding_cached_response.data is not None
|
||||
and _caching_handler_response.final_embedding_cached_response
|
||||
is not None
|
||||
and _caching_handler_response.final_embedding_cached_response.data
|
||||
is not None
|
||||
):
|
||||
idx = 0
|
||||
final_data_list = []
|
||||
for item in final_embedding_cached_response.data:
|
||||
for (
|
||||
item
|
||||
) in _caching_handler_response.final_embedding_cached_response.data:
|
||||
if item is None and result.data is not None:
|
||||
final_data_list.append(result.data[idx])
|
||||
idx += 1
|
||||
else:
|
||||
final_data_list.append(item)
|
||||
|
||||
final_embedding_cached_response.data = final_data_list
|
||||
final_embedding_cached_response._hidden_params["cache_hit"] = True
|
||||
final_embedding_cached_response._response_ms = (
|
||||
_caching_handler_response.final_embedding_cached_response.data = (
|
||||
final_data_list
|
||||
)
|
||||
_caching_handler_response.final_embedding_cached_response._hidden_params[
|
||||
"cache_hit"
|
||||
] = True
|
||||
_caching_handler_response.final_embedding_cached_response._response_ms = (
|
||||
end_time - start_time
|
||||
).total_seconds() * 1000
|
||||
return final_embedding_cached_response
|
||||
return _caching_handler_response.final_embedding_cached_response
|
||||
|
||||
return result
|
||||
except Exception as e:
|
||||
|
|
|
@ -19,7 +19,7 @@ from litellm._logging import verbose_proxy_logger
|
|||
from litellm.proxy.utils import PrismaClient, ProxyLogging
|
||||
|
||||
verbose_proxy_logger.setLevel(level=logging.DEBUG)
|
||||
from litellm.caching import DualCache
|
||||
from litellm.caching.caching import DualCache
|
||||
from litellm.router import (
|
||||
Deployment,
|
||||
updateDeployment,
|
||||
|
|
|
@ -28,7 +28,7 @@ import pytest
|
|||
from openai import APIError
|
||||
|
||||
import litellm
|
||||
from litellm.caching import DualCache, RedisCache
|
||||
from litellm.caching.caching import DualCache, RedisCache
|
||||
from litellm.integrations.SlackAlerting.slack_alerting import (
|
||||
DeploymentMetrics,
|
||||
SlackAlerting,
|
||||
|
|
|
@ -13,7 +13,7 @@ sys.path.insert(
|
|||
) # Adds the parent directory to the system path
|
||||
import pytest, litellm
|
||||
from litellm.proxy.auth.auth_checks import get_end_user_object
|
||||
from litellm.caching import DualCache
|
||||
from litellm.caching.caching import DualCache
|
||||
from litellm.proxy._types import LiteLLM_EndUserTable, LiteLLM_BudgetTable
|
||||
from litellm.proxy.utils import PrismaClient
|
||||
|
||||
|
|
|
@ -21,7 +21,7 @@ import pytest
|
|||
|
||||
import litellm
|
||||
from litellm import Router, mock_completion
|
||||
from litellm.caching import DualCache
|
||||
from litellm.caching.caching import DualCache
|
||||
from litellm.proxy._types import UserAPIKeyAuth
|
||||
from litellm.proxy.utils import ProxyLogging
|
||||
|
||||
|
|
|
@ -21,7 +21,7 @@ from litellm.proxy.enterprise.enterprise_hooks.banned_keywords import (
|
|||
from litellm import Router, mock_completion
|
||||
from litellm.proxy.utils import ProxyLogging, hash_token
|
||||
from litellm.proxy._types import UserAPIKeyAuth
|
||||
from litellm.caching import DualCache
|
||||
from litellm.caching.caching import DualCache
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
|
|
|
@ -27,7 +27,7 @@ import pytest
|
|||
import litellm
|
||||
from litellm import Router, mock_completion
|
||||
from litellm._logging import verbose_proxy_logger
|
||||
from litellm.caching import DualCache
|
||||
from litellm.caching.caching import DualCache
|
||||
from litellm.proxy._types import UserAPIKeyAuth
|
||||
from litellm.proxy.enterprise.enterprise_hooks.blocked_user_list import (
|
||||
_ENTERPRISE_BlockedUserList,
|
||||
|
@ -56,7 +56,7 @@ verbose_proxy_logger.setLevel(level=logging.DEBUG)
|
|||
|
||||
from starlette.datastructures import URL
|
||||
|
||||
from litellm.caching import DualCache
|
||||
from litellm.caching.caching import DualCache
|
||||
from litellm.proxy._types import (
|
||||
BlockUsers,
|
||||
DynamoDBArgs,
|
||||
|
|
|
@ -21,7 +21,7 @@ import pytest
|
|||
|
||||
import litellm
|
||||
from litellm import aembedding, completion, embedding
|
||||
from litellm.caching import Cache
|
||||
from litellm.caching.caching import Cache
|
||||
|
||||
from unittest.mock import AsyncMock, patch, MagicMock
|
||||
import datetime
|
||||
|
@ -52,7 +52,7 @@ async def test_dual_cache_async_batch_get_cache():
|
|||
- hit redis for the other -> expect to return None
|
||||
- expect result = [in_memory_result, None]
|
||||
"""
|
||||
from litellm.caching import DualCache, InMemoryCache, RedisCache
|
||||
from litellm.caching.caching import DualCache, InMemoryCache, RedisCache
|
||||
|
||||
in_memory_cache = InMemoryCache()
|
||||
redis_cache = RedisCache() # get credentials from environment
|
||||
|
@ -74,7 +74,7 @@ def test_dual_cache_batch_get_cache():
|
|||
- hit redis for the other -> expect to return None
|
||||
- expect result = [in_memory_result, None]
|
||||
"""
|
||||
from litellm.caching import DualCache, InMemoryCache, RedisCache
|
||||
from litellm.caching.caching import DualCache, InMemoryCache, RedisCache
|
||||
|
||||
in_memory_cache = InMemoryCache()
|
||||
redis_cache = RedisCache() # get credentials from environment
|
||||
|
@ -520,6 +520,7 @@ async def test_embedding_caching_azure_individual_items_reordered():
|
|||
assert embedding_val_1[0]["id"] == embedding_val_2[0]["id"]
|
||||
```
|
||||
"""
|
||||
litellm.set_verbose = True
|
||||
litellm.cache = Cache()
|
||||
common_msg = f"{uuid.uuid4()}"
|
||||
common_msg_2 = f"hey how's it going {uuid.uuid4()}"
|
||||
|
@ -532,9 +533,11 @@ async def test_embedding_caching_azure_individual_items_reordered():
|
|||
embedding_val_1 = await aembedding(
|
||||
model="azure/azure-embedding-model", input=embedding_1, caching=True
|
||||
)
|
||||
print("embedding val 1", embedding_val_1)
|
||||
embedding_val_2 = await aembedding(
|
||||
model="azure/azure-embedding-model", input=embedding_2, caching=True
|
||||
)
|
||||
print("embedding val 2", embedding_val_2)
|
||||
print(f"embedding_val_2._hidden_params: {embedding_val_2._hidden_params}")
|
||||
assert embedding_val_2._hidden_params["cache_hit"] == True
|
||||
|
||||
|
@ -866,7 +869,7 @@ async def test_redis_cache_cluster_init_unit_test():
|
|||
from redis.asyncio import RedisCluster as AsyncRedisCluster
|
||||
from redis.cluster import RedisCluster
|
||||
|
||||
from litellm.caching import RedisCache
|
||||
from litellm.caching.caching import RedisCache
|
||||
|
||||
litellm.set_verbose = True
|
||||
|
||||
|
@ -900,7 +903,7 @@ async def test_redis_cache_cluster_init_with_env_vars_unit_test():
|
|||
from redis.asyncio import RedisCluster as AsyncRedisCluster
|
||||
from redis.cluster import RedisCluster
|
||||
|
||||
from litellm.caching import RedisCache
|
||||
from litellm.caching.caching import RedisCache
|
||||
|
||||
litellm.set_verbose = True
|
||||
|
||||
|
@ -1554,7 +1557,7 @@ def test_custom_redis_cache_params():
|
|||
|
||||
|
||||
def test_get_cache_key():
|
||||
from litellm.caching import Cache
|
||||
from litellm.caching.caching import Cache
|
||||
|
||||
try:
|
||||
print("Testing get_cache_key")
|
||||
|
@ -1989,7 +1992,7 @@ async def test_cache_default_off_acompletion():
|
|||
|
||||
verbose_logger.setLevel(logging.DEBUG)
|
||||
|
||||
from litellm.caching import CacheMode
|
||||
from litellm.caching.caching import CacheMode
|
||||
|
||||
random_number = random.randint(
|
||||
1, 100000
|
||||
|
@ -2072,7 +2075,7 @@ async def test_dual_cache_uses_redis():
|
|||
- Assert that value from redis is used
|
||||
"""
|
||||
litellm.set_verbose = True
|
||||
from litellm.caching import DualCache, RedisCache
|
||||
from litellm.caching.caching import DualCache, RedisCache
|
||||
|
||||
current_usage = uuid.uuid4()
|
||||
|
||||
|
@ -2095,7 +2098,7 @@ async def test_proxy_logging_setup():
|
|||
"""
|
||||
Assert always_read_redis is True when used by internal usage cache
|
||||
"""
|
||||
from litellm.caching import DualCache
|
||||
from litellm.caching.caching import DualCache
|
||||
from litellm.proxy.utils import ProxyLogging
|
||||
|
||||
pl_obj = ProxyLogging(user_api_key_cache=DualCache())
|
||||
|
@ -2165,7 +2168,7 @@ async def test_redis_proxy_batch_redis_get_cache():
|
|||
- make 2nd call -> expect hit
|
||||
"""
|
||||
|
||||
from litellm.caching import Cache, DualCache
|
||||
from litellm.caching.caching import Cache, DualCache
|
||||
from litellm.proxy._types import UserAPIKeyAuth
|
||||
from litellm.proxy.hooks.batch_redis_get import _PROXY_BatchRedisRequests
|
||||
|
||||
|
|
|
@ -15,7 +15,7 @@ sys.path.insert(
|
|||
import pytest
|
||||
import litellm
|
||||
from litellm import embedding, completion, Router
|
||||
from litellm.caching import Cache
|
||||
from litellm.caching.caching import Cache
|
||||
|
||||
messages = [{"role": "user", "content": f"who is ishaan {time.time()}"}]
|
||||
|
||||
|
|
|
@ -151,7 +151,7 @@ async def test_datadog_log_redis_failures():
|
|||
Test that poorly configured Redis is logged as Warning on DataDog
|
||||
"""
|
||||
try:
|
||||
from litellm.caching import Cache
|
||||
from litellm.caching.caching import Cache
|
||||
from litellm.integrations.datadog.datadog import DataDogLogger
|
||||
|
||||
litellm.cache = Cache(
|
||||
|
|
|
@ -24,7 +24,7 @@ import pytest
|
|||
from fastapi import Request
|
||||
|
||||
import litellm
|
||||
from litellm.caching import DualCache
|
||||
from litellm.caching.caching import DualCache
|
||||
from litellm.proxy._types import LiteLLM_JWTAuth, LiteLLM_UserTable, LiteLLMRoutes
|
||||
from litellm.proxy.auth.handle_jwt import JWTHandler
|
||||
from litellm.proxy.management_endpoints.team_endpoints import new_team
|
||||
|
|
|
@ -89,7 +89,7 @@ verbose_proxy_logger.setLevel(level=logging.DEBUG)
|
|||
|
||||
from starlette.datastructures import URL
|
||||
|
||||
from litellm.caching import DualCache
|
||||
from litellm.caching.caching import DualCache
|
||||
from litellm.proxy._types import (
|
||||
DynamoDBArgs,
|
||||
GenerateKeyRequest,
|
||||
|
@ -1444,7 +1444,7 @@ def test_call_with_key_over_budget(prisma_client):
|
|||
|
||||
# update spend using track_cost callback, make 2nd request, it should fail
|
||||
from litellm import Choices, Message, ModelResponse, Usage
|
||||
from litellm.caching import Cache
|
||||
from litellm.caching.caching import Cache
|
||||
from litellm.proxy.proxy_server import (
|
||||
_PROXY_track_cost_callback as track_cost_callback,
|
||||
)
|
||||
|
@ -1564,7 +1564,7 @@ def test_call_with_key_over_budget_no_cache(prisma_client):
|
|||
setattr(litellm.proxy.proxy_server, "proxy_batch_write_at", 1)
|
||||
|
||||
from litellm import Choices, Message, ModelResponse, Usage
|
||||
from litellm.caching import Cache
|
||||
from litellm.caching.caching import Cache
|
||||
|
||||
litellm.cache = Cache()
|
||||
import time
|
||||
|
@ -1685,7 +1685,7 @@ def test_call_with_key_over_model_budget(prisma_client):
|
|||
|
||||
# update spend using track_cost callback, make 2nd request, it should fail
|
||||
from litellm import Choices, Message, ModelResponse, Usage
|
||||
from litellm.caching import Cache
|
||||
from litellm.caching.caching import Cache
|
||||
from litellm.proxy.proxy_server import (
|
||||
_PROXY_track_cost_callback as track_cost_callback,
|
||||
)
|
||||
|
|
|
@ -25,7 +25,7 @@ import pytest
|
|||
|
||||
import litellm
|
||||
from litellm._logging import verbose_proxy_logger
|
||||
from litellm.caching import DualCache
|
||||
from litellm.caching.caching import DualCache
|
||||
from litellm.proxy._types import UserAPIKeyAuth
|
||||
from litellm.proxy.guardrails.guardrail_hooks.lakera_ai import lakeraAI_Moderation
|
||||
from litellm.proxy.proxy_server import embeddings
|
||||
|
|
|
@ -20,7 +20,7 @@ import pytest
|
|||
|
||||
import litellm
|
||||
from litellm import Router
|
||||
from litellm.caching import DualCache
|
||||
from litellm.caching.caching import DualCache
|
||||
from litellm.router_strategy.least_busy import LeastBusyLoggingHandler
|
||||
|
||||
### UNIT TESTS FOR LEAST BUSY LOGGING ###
|
||||
|
|
|
@ -20,7 +20,7 @@ from litellm.proxy.enterprise.enterprise_hooks.llm_guard import _ENTERPRISE_LLMG
|
|||
from litellm import Router, mock_completion
|
||||
from litellm.proxy.utils import ProxyLogging, hash_token
|
||||
from litellm.proxy._types import UserAPIKeyAuth
|
||||
from litellm.caching import DualCache
|
||||
from litellm.caching.caching import DualCache
|
||||
|
||||
### UNIT TESTS FOR LLM GUARD ###
|
||||
|
||||
|
|
|
@ -10,7 +10,7 @@
|
|||
# import asyncio
|
||||
# from litellm import Router, Timeout
|
||||
# import time
|
||||
# from litellm.caching import Cache
|
||||
# from litellm.caching.caching import Cache
|
||||
# import litellm
|
||||
|
||||
# litellm.cache = Cache(
|
||||
|
|
|
@ -15,7 +15,7 @@ sys.path.insert(
|
|||
import pytest
|
||||
from litellm import Router
|
||||
from litellm.router_strategy.lowest_cost import LowestCostLoggingHandler
|
||||
from litellm.caching import DualCache
|
||||
from litellm.caching.caching import DualCache
|
||||
|
||||
### UNIT TESTS FOR cost ROUTING ###
|
||||
|
||||
|
|
|
@ -22,7 +22,7 @@ import pytest
|
|||
|
||||
import litellm
|
||||
from litellm import Router
|
||||
from litellm.caching import DualCache
|
||||
from litellm.caching.caching import DualCache
|
||||
from litellm.router_strategy.lowest_latency import LowestLatencyLoggingHandler
|
||||
|
||||
### UNIT TESTS FOR LATENCY ROUTING ###
|
||||
|
|
|
@ -19,7 +19,7 @@
|
|||
# from litellm import Router
|
||||
# from litellm.proxy.utils import ProxyLogging, hash_token
|
||||
# from litellm.proxy._types import UserAPIKeyAuth
|
||||
# from litellm.caching import DualCache, RedisCache
|
||||
# from litellm.caching.caching import DualCache, RedisCache
|
||||
# from litellm.proxy.hooks.tpm_rpm_limiter import _PROXY_MaxTPMRPMLimiter
|
||||
# from datetime import datetime
|
||||
|
||||
|
|
|
@ -22,7 +22,7 @@ from litellm.proxy.enterprise.enterprise_hooks.openai_moderation import (
|
|||
from litellm import Router, mock_completion
|
||||
from litellm.proxy.utils import ProxyLogging, hash_token
|
||||
from litellm.proxy._types import UserAPIKeyAuth
|
||||
from litellm.caching import DualCache
|
||||
from litellm.caching.caching import DualCache
|
||||
|
||||
### UNIT TESTS FOR OpenAI Moderation ###
|
||||
|
||||
|
|
|
@ -23,7 +23,7 @@ import pytest
|
|||
|
||||
import litellm
|
||||
from litellm import Router
|
||||
from litellm.caching import DualCache
|
||||
from litellm.caching.caching import DualCache
|
||||
from litellm.proxy._types import UserAPIKeyAuth
|
||||
from litellm.proxy.hooks.parallel_request_limiter import (
|
||||
_PROXY_MaxParallelRequestsHandler as MaxParallelRequestsHandler,
|
||||
|
|
|
@ -22,7 +22,7 @@ import pytest
|
|||
|
||||
import litellm
|
||||
from litellm import Router, mock_completion
|
||||
from litellm.caching import DualCache
|
||||
from litellm.caching.caching import DualCache
|
||||
from litellm.proxy._types import UserAPIKeyAuth
|
||||
from litellm.proxy.hooks.presidio_pii_masking import _OPTIONAL_PresidioPIIMasking
|
||||
from litellm.proxy.utils import ProxyLogging
|
||||
|
|
|
@ -67,7 +67,7 @@ async def test_completion_with_caching_bad_call():
|
|||
litellm.set_verbose = True
|
||||
|
||||
try:
|
||||
from litellm.caching import RedisCache
|
||||
from litellm.caching.caching import RedisCache
|
||||
|
||||
litellm.service_callback = ["prometheus_system"]
|
||||
sl = ServiceLogging(mock_testing=True)
|
||||
|
|
|
@ -20,7 +20,7 @@ from litellm.proxy.hooks.prompt_injection_detection import (
|
|||
from litellm import Router, mock_completion
|
||||
from litellm.proxy.utils import ProxyLogging
|
||||
from litellm.proxy._types import UserAPIKeyAuth, LiteLLMPromptInjectionParams
|
||||
from litellm.caching import DualCache
|
||||
from litellm.caching.caching import DualCache
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
|
|
|
@ -31,7 +31,7 @@ from starlette.datastructures import URL
|
|||
|
||||
import litellm
|
||||
from litellm import Router, mock_completion
|
||||
from litellm.caching import DualCache
|
||||
from litellm.caching.caching import DualCache
|
||||
from litellm.integrations.custom_logger import CustomLogger
|
||||
from litellm.proxy._types import UserAPIKeyAuth
|
||||
from litellm.proxy.enterprise.enterprise_hooks.secret_detection import (
|
||||
|
|
|
@ -745,7 +745,7 @@ async def test_team_update_redis():
|
|||
"""
|
||||
Tests if team update, updates the redis cache if set
|
||||
"""
|
||||
from litellm.caching import DualCache, RedisCache
|
||||
from litellm.caching.caching import DualCache, RedisCache
|
||||
from litellm.proxy._types import LiteLLM_TeamTableCachedObj
|
||||
from litellm.proxy.auth.auth_checks import _cache_team_object
|
||||
|
||||
|
@ -775,7 +775,7 @@ async def test_get_team_redis(client_no_auth):
|
|||
"""
|
||||
Tests if get_team_object gets value from redis cache, if set
|
||||
"""
|
||||
from litellm.caching import DualCache, RedisCache
|
||||
from litellm.caching.caching import DualCache, RedisCache
|
||||
from litellm.proxy.auth.auth_checks import get_team_object
|
||||
|
||||
proxy_logging_obj: ProxyLogging = getattr(
|
||||
|
|
|
@ -26,7 +26,7 @@ from starlette.datastructures import URL
|
|||
|
||||
import litellm
|
||||
from litellm import Router, mock_completion
|
||||
from litellm.caching import DualCache
|
||||
from litellm.caching.caching import DualCache
|
||||
from litellm.integrations.custom_logger import CustomLogger
|
||||
from litellm.proxy._types import UserAPIKeyAuth
|
||||
from litellm.proxy.enterprise.enterprise_hooks.secret_detection import (
|
||||
|
|
|
@ -3128,7 +3128,7 @@ async def test_azure_astreaming_and_function_calling():
|
|||
"content": f"What is the weather like in Boston? {uuid.uuid4()}",
|
||||
}
|
||||
]
|
||||
from litellm.caching import Cache
|
||||
from litellm.caching.caching import Cache
|
||||
|
||||
litellm.cache = Cache(
|
||||
type="redis",
|
||||
|
|
|
@ -23,7 +23,7 @@ import pytest
|
|||
|
||||
import litellm
|
||||
from litellm import Router
|
||||
from litellm.caching import DualCache
|
||||
from litellm.caching.caching import DualCache
|
||||
from litellm.router_strategy.lowest_tpm_rpm_v2 import (
|
||||
LowestTPMLoggingHandler_v2 as LowestTPMLoggingHandler,
|
||||
)
|
||||
|
|
|
@ -27,7 +27,7 @@ import pytest
|
|||
import litellm
|
||||
from litellm import Router, mock_completion
|
||||
from litellm._logging import verbose_proxy_logger
|
||||
from litellm.caching import DualCache
|
||||
from litellm.caching.caching import DualCache
|
||||
from litellm.proxy._types import UserAPIKeyAuth
|
||||
from litellm.proxy.management_endpoints.internal_user_endpoints import (
|
||||
new_user,
|
||||
|
@ -53,7 +53,7 @@ verbose_proxy_logger.setLevel(level=logging.DEBUG)
|
|||
|
||||
from starlette.datastructures import URL
|
||||
|
||||
from litellm.caching import DualCache
|
||||
from litellm.caching.caching import DualCache
|
||||
from litellm.proxy._types import (
|
||||
BlockUsers,
|
||||
DynamoDBArgs,
|
||||
|
|
|
@ -157,7 +157,7 @@ async def test_transcription_on_router():
|
|||
@pytest.mark.asyncio()
|
||||
async def test_transcription_caching():
|
||||
import litellm
|
||||
from litellm.caching import Cache
|
||||
from litellm.caching.caching import Cache
|
||||
|
||||
litellm.set_verbose = True
|
||||
litellm.cache = Cache()
|
||||
|
|
|
@ -71,7 +71,7 @@ verbose_proxy_logger.setLevel(level=logging.DEBUG)
|
|||
|
||||
from starlette.datastructures import URL
|
||||
|
||||
from litellm.caching import DualCache
|
||||
from litellm.caching.caching import DualCache
|
||||
from litellm.proxy._types import (
|
||||
DynamoDBArgs,
|
||||
GenerateKeyRequest,
|
||||
|
|
|
@ -78,7 +78,7 @@ verbose_proxy_logger.setLevel(level=logging.DEBUG)
|
|||
|
||||
from starlette.datastructures import URL
|
||||
|
||||
from litellm.caching import DualCache
|
||||
from litellm.caching.caching import DualCache
|
||||
from litellm.proxy._types import *
|
||||
|
||||
proxy_logging_obj = ProxyLogging(user_api_key_cache=DualCache())
|
||||
|
|
|
@ -17,7 +17,7 @@ from litellm.proxy._types import LitellmUserRoles
|
|||
import os
|
||||
import jwt
|
||||
import time
|
||||
from litellm.caching import DualCache
|
||||
from litellm.caching.caching import DualCache
|
||||
|
||||
proxy_logging_obj = ProxyLogging(user_api_key_cache=DualCache())
|
||||
|
||||
|
|
|
@ -85,7 +85,7 @@ verbose_proxy_logger.setLevel(level=logging.DEBUG)
|
|||
|
||||
from starlette.datastructures import URL
|
||||
|
||||
from litellm.caching import DualCache
|
||||
from litellm.caching.caching import DualCache
|
||||
from litellm.proxy._types import (
|
||||
DynamoDBArgs,
|
||||
GenerateKeyRequest,
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue