forked from phoenix/litellm-mirror
(refactor) caching use LLMCachingHandler for async_get_cache and set_cache (#6208)
* use folder for caching * fix importing caching * fix clickhouse pyright * fix linting * fix correctly pass kwargs and args * fix test case for embedding * fix linting * fix embedding caching logic * fix refactor handle utils.py * fix test_embedding_caching_azure_individual_items_reordered
This commit is contained in:
parent
20e50d7002
commit
4d1b4beb3d
96 changed files with 690 additions and 489 deletions
|
@ -10,7 +10,7 @@ sys.path.insert(
|
||||||
import asyncio
|
import asyncio
|
||||||
from litellm import Router, Timeout
|
from litellm import Router, Timeout
|
||||||
import time
|
import time
|
||||||
from litellm.caching import Cache
|
from litellm.caching.caching import Cache
|
||||||
import litellm
|
import litellm
|
||||||
import openai
|
import openai
|
||||||
|
|
||||||
|
|
|
@ -10,7 +10,7 @@ sys.path.insert(
|
||||||
import asyncio
|
import asyncio
|
||||||
from litellm import Router, Timeout
|
from litellm import Router, Timeout
|
||||||
import time
|
import time
|
||||||
from litellm.caching import Cache
|
from litellm.caching.caching import Cache
|
||||||
import litellm
|
import litellm
|
||||||
|
|
||||||
litellm.cache = Cache(
|
litellm.cache = Cache(
|
||||||
|
|
|
@ -3,7 +3,7 @@ import TabItem from '@theme/TabItem';
|
||||||
|
|
||||||
# Caching - In-Memory, Redis, s3, Redis Semantic Cache, Disk
|
# Caching - In-Memory, Redis, s3, Redis Semantic Cache, Disk
|
||||||
|
|
||||||
[**See Code**](https://github.com/BerriAI/litellm/blob/main/litellm/caching.py)
|
[**See Code**](https://github.com/BerriAI/litellm/blob/main/litellm.caching.caching.py)
|
||||||
|
|
||||||
:::info
|
:::info
|
||||||
|
|
||||||
|
@ -31,7 +31,7 @@ For the hosted version you can setup your own Redis DB here: https://app.redisla
|
||||||
```python
|
```python
|
||||||
import litellm
|
import litellm
|
||||||
from litellm import completion
|
from litellm import completion
|
||||||
from litellm.caching import Cache
|
from litellm.caching.caching import Cache
|
||||||
|
|
||||||
litellm.cache = Cache(type="redis", host=<host>, port=<port>, password=<password>)
|
litellm.cache = Cache(type="redis", host=<host>, port=<port>, password=<password>)
|
||||||
|
|
||||||
|
@ -68,7 +68,7 @@ AWS_SECRET_ACCESS_KEY = "WOl*****"
|
||||||
```python
|
```python
|
||||||
import litellm
|
import litellm
|
||||||
from litellm import completion
|
from litellm import completion
|
||||||
from litellm.caching import Cache
|
from litellm.caching.caching import Cache
|
||||||
|
|
||||||
# pass s3-bucket name
|
# pass s3-bucket name
|
||||||
litellm.cache = Cache(type="s3", s3_bucket_name="cache-bucket-litellm", s3_region_name="us-west-2")
|
litellm.cache = Cache(type="s3", s3_bucket_name="cache-bucket-litellm", s3_region_name="us-west-2")
|
||||||
|
@ -101,7 +101,7 @@ For the hosted version you can setup your own Redis DB here: https://app.redisla
|
||||||
```python
|
```python
|
||||||
import litellm
|
import litellm
|
||||||
from litellm import completion
|
from litellm import completion
|
||||||
from litellm.caching import Cache
|
from litellm.caching.caching import Cache
|
||||||
|
|
||||||
random_number = random.randint(
|
random_number = random.randint(
|
||||||
1, 100000
|
1, 100000
|
||||||
|
@ -155,7 +155,7 @@ To set up a Qdrant cluster locally follow: https://qdrant.tech/documentation/qui
|
||||||
```python
|
```python
|
||||||
import litellm
|
import litellm
|
||||||
from litellm import completion
|
from litellm import completion
|
||||||
from litellm.caching import Cache
|
from litellm.caching.caching import Cache
|
||||||
|
|
||||||
random_number = random.randint(
|
random_number = random.randint(
|
||||||
1, 100000
|
1, 100000
|
||||||
|
@ -210,7 +210,7 @@ assert response1.id == response2.id
|
||||||
```python
|
```python
|
||||||
import litellm
|
import litellm
|
||||||
from litellm import completion
|
from litellm import completion
|
||||||
from litellm.caching import Cache
|
from litellm.caching.caching import Cache
|
||||||
litellm.cache = Cache()
|
litellm.cache = Cache()
|
||||||
|
|
||||||
# Make completion calls
|
# Make completion calls
|
||||||
|
@ -246,7 +246,7 @@ Then you can use the disk cache as follows.
|
||||||
```python
|
```python
|
||||||
import litellm
|
import litellm
|
||||||
from litellm import completion
|
from litellm import completion
|
||||||
from litellm.caching import Cache
|
from litellm.caching.caching import Cache
|
||||||
litellm.cache = Cache(type="disk")
|
litellm.cache = Cache(type="disk")
|
||||||
|
|
||||||
# Make completion calls
|
# Make completion calls
|
||||||
|
@ -422,7 +422,7 @@ def custom_get_cache_key(*args, **kwargs):
|
||||||
|
|
||||||
Set your function as litellm.cache.get_cache_key
|
Set your function as litellm.cache.get_cache_key
|
||||||
```python
|
```python
|
||||||
from litellm.caching import Cache
|
from litellm.caching.caching import Cache
|
||||||
|
|
||||||
cache = Cache(type="redis", host=os.environ['REDIS_HOST'], port=os.environ['REDIS_PORT'], password=os.environ['REDIS_PASSWORD'])
|
cache = Cache(type="redis", host=os.environ['REDIS_HOST'], port=os.environ['REDIS_PORT'], password=os.environ['REDIS_PASSWORD'])
|
||||||
|
|
||||||
|
@ -434,7 +434,7 @@ litellm.cache = cache # set litellm.cache to your cache
|
||||||
## How to write custom add/get cache functions
|
## How to write custom add/get cache functions
|
||||||
### 1. Init Cache
|
### 1. Init Cache
|
||||||
```python
|
```python
|
||||||
from litellm.caching import Cache
|
from litellm.caching.caching import Cache
|
||||||
cache = Cache()
|
cache = Cache()
|
||||||
```
|
```
|
||||||
|
|
||||||
|
|
|
@ -6,7 +6,7 @@ Use api.litellm.ai for caching `completion()` and `embedding()` responses
|
||||||
```python
|
```python
|
||||||
import litellm
|
import litellm
|
||||||
from litellm import completion
|
from litellm import completion
|
||||||
from litellm.caching import Cache
|
from litellm.caching.caching import Cache
|
||||||
litellm.cache = Cache(type="hosted") # init cache to use api.litellm.ai
|
litellm.cache = Cache(type="hosted") # init cache to use api.litellm.ai
|
||||||
|
|
||||||
# Make completion calls
|
# Make completion calls
|
||||||
|
@ -31,7 +31,7 @@ response2 = completion(
|
||||||
import time
|
import time
|
||||||
import litellm
|
import litellm
|
||||||
from litellm import completion, embedding
|
from litellm import completion, embedding
|
||||||
from litellm.caching import Cache
|
from litellm.caching.caching import Cache
|
||||||
litellm.cache = Cache(type="hosted")
|
litellm.cache = Cache(type="hosted")
|
||||||
|
|
||||||
start_time = time.time()
|
start_time = time.time()
|
||||||
|
@ -53,7 +53,7 @@ LiteLLM can cache your streamed responses for you
|
||||||
import litellm
|
import litellm
|
||||||
import time
|
import time
|
||||||
from litellm import completion
|
from litellm import completion
|
||||||
from litellm.caching import Cache
|
from litellm.caching.caching import Cache
|
||||||
|
|
||||||
litellm.cache = Cache(type="hosted")
|
litellm.cache = Cache(type="hosted")
|
||||||
|
|
||||||
|
|
|
@ -13,7 +13,7 @@ Keys in the cache are `model`, the following example will lead to a cache hit
|
||||||
```python
|
```python
|
||||||
import litellm
|
import litellm
|
||||||
from litellm import completion
|
from litellm import completion
|
||||||
from litellm.caching import Cache
|
from litellm.caching.caching import Cache
|
||||||
litellm.cache = Cache()
|
litellm.cache = Cache()
|
||||||
|
|
||||||
# Make completion calls
|
# Make completion calls
|
||||||
|
@ -35,7 +35,7 @@ response2 = completion(
|
||||||
Add custom key-value pairs to your cache.
|
Add custom key-value pairs to your cache.
|
||||||
|
|
||||||
```python
|
```python
|
||||||
from litellm.caching import Cache
|
from litellm.caching.caching import Cache
|
||||||
cache = Cache()
|
cache = Cache()
|
||||||
|
|
||||||
cache.add_cache(cache_key="test-key", result="1234")
|
cache.add_cache(cache_key="test-key", result="1234")
|
||||||
|
@ -50,7 +50,7 @@ LiteLLM can cache your streamed responses for you
|
||||||
```python
|
```python
|
||||||
import litellm
|
import litellm
|
||||||
from litellm import completion
|
from litellm import completion
|
||||||
from litellm.caching import Cache
|
from litellm.caching.caching import Cache
|
||||||
litellm.cache = Cache()
|
litellm.cache = Cache()
|
||||||
|
|
||||||
# Make completion calls
|
# Make completion calls
|
||||||
|
@ -77,7 +77,7 @@ Keys in the cache are `model`, the following example will lead to a cache hit
|
||||||
import time
|
import time
|
||||||
import litellm
|
import litellm
|
||||||
from litellm import embedding
|
from litellm import embedding
|
||||||
from litellm.caching import Cache
|
from litellm.caching.caching import Cache
|
||||||
litellm.cache = Cache()
|
litellm.cache = Cache()
|
||||||
|
|
||||||
start_time = time.time()
|
start_time = time.time()
|
||||||
|
|
|
@ -49,13 +49,13 @@ litellm_settings:
|
||||||
cache: true
|
cache: true
|
||||||
cache_params: # set cache params for redis
|
cache_params: # set cache params for redis
|
||||||
type: redis
|
type: redis
|
||||||
namespace: "litellm_caching"
|
namespace: "litellm.caching.caching"
|
||||||
```
|
```
|
||||||
|
|
||||||
and keys will be stored like:
|
and keys will be stored like:
|
||||||
|
|
||||||
```
|
```
|
||||||
litellm_caching:<hash>
|
litellm.caching.caching:<hash>
|
||||||
```
|
```
|
||||||
|
|
||||||
#### Redis Cluster
|
#### Redis Cluster
|
||||||
|
|
|
@ -645,7 +645,7 @@ litellm_settings:
|
||||||
host: "localhost" # The host address for the Redis cache. Required if type is "redis".
|
host: "localhost" # The host address for the Redis cache. Required if type is "redis".
|
||||||
port: 6379 # The port number for the Redis cache. Required if type is "redis".
|
port: 6379 # The port number for the Redis cache. Required if type is "redis".
|
||||||
password: "your_password" # The password for the Redis cache. Required if type is "redis".
|
password: "your_password" # The password for the Redis cache. Required if type is "redis".
|
||||||
namespace: "litellm_caching" # namespace for redis cache
|
namespace: "litellm.caching.caching" # namespace for redis cache
|
||||||
|
|
||||||
# Optional - Redis Cluster Settings
|
# Optional - Redis Cluster Settings
|
||||||
redis_startup_nodes: [{"host": "127.0.0.1", "port": "7001"}]
|
redis_startup_nodes: [{"host": "127.0.0.1", "port": "7001"}]
|
||||||
|
|
|
@ -25,7 +25,7 @@ from typing import Any, Dict, List, Literal, Optional, Union
|
||||||
|
|
||||||
import litellm
|
import litellm
|
||||||
from litellm._logging import verbose_proxy_logger
|
from litellm._logging import verbose_proxy_logger
|
||||||
from litellm.caching import DualCache
|
from litellm.caching.caching import DualCache
|
||||||
from litellm.integrations.custom_guardrail import CustomGuardrail
|
from litellm.integrations.custom_guardrail import CustomGuardrail
|
||||||
from litellm.proxy._types import UserAPIKeyAuth
|
from litellm.proxy._types import UserAPIKeyAuth
|
||||||
from litellm.proxy.guardrails.guardrail_helpers import should_proceed_based_on_metadata
|
from litellm.proxy.guardrails.guardrail_helpers import should_proceed_based_on_metadata
|
||||||
|
|
|
@ -6,7 +6,7 @@ import dotenv, os
|
||||||
import requests
|
import requests
|
||||||
|
|
||||||
from litellm.proxy._types import UserAPIKeyAuth
|
from litellm.proxy._types import UserAPIKeyAuth
|
||||||
from litellm.caching import DualCache
|
from litellm.caching.caching import DualCache
|
||||||
|
|
||||||
from typing import Literal, Union, Optional
|
from typing import Literal, Union, Optional
|
||||||
|
|
||||||
|
|
|
@ -13,7 +13,7 @@ sys.path.insert(
|
||||||
) # Adds the parent directory to the system path
|
) # Adds the parent directory to the system path
|
||||||
from typing import Optional, Literal, Union, Any
|
from typing import Optional, Literal, Union, Any
|
||||||
import litellm, traceback, sys, uuid
|
import litellm, traceback, sys, uuid
|
||||||
from litellm.caching import DualCache
|
from litellm.caching.caching import DualCache
|
||||||
from litellm.proxy._types import UserAPIKeyAuth
|
from litellm.proxy._types import UserAPIKeyAuth
|
||||||
from litellm.integrations.custom_guardrail import CustomGuardrail
|
from litellm.integrations.custom_guardrail import CustomGuardrail
|
||||||
from fastapi import HTTPException
|
from fastapi import HTTPException
|
||||||
|
|
|
@ -9,7 +9,7 @@
|
||||||
|
|
||||||
from typing import Optional, Literal
|
from typing import Optional, Literal
|
||||||
import litellm
|
import litellm
|
||||||
from litellm.caching import DualCache
|
from litellm.caching.caching import DualCache
|
||||||
from litellm.proxy._types import UserAPIKeyAuth
|
from litellm.proxy._types import UserAPIKeyAuth
|
||||||
from litellm.integrations.custom_logger import CustomLogger
|
from litellm.integrations.custom_logger import CustomLogger
|
||||||
from litellm._logging import verbose_proxy_logger
|
from litellm._logging import verbose_proxy_logger
|
||||||
|
|
|
@ -10,7 +10,7 @@
|
||||||
from typing import Optional, Literal
|
from typing import Optional, Literal
|
||||||
import litellm
|
import litellm
|
||||||
from litellm.proxy.utils import PrismaClient
|
from litellm.proxy.utils import PrismaClient
|
||||||
from litellm.caching import DualCache
|
from litellm.caching.caching import DualCache
|
||||||
from litellm.proxy._types import UserAPIKeyAuth, LiteLLM_EndUserTable
|
from litellm.proxy._types import UserAPIKeyAuth, LiteLLM_EndUserTable
|
||||||
from litellm.integrations.custom_logger import CustomLogger
|
from litellm.integrations.custom_logger import CustomLogger
|
||||||
from litellm._logging import verbose_proxy_logger
|
from litellm._logging import verbose_proxy_logger
|
||||||
|
|
|
@ -9,7 +9,7 @@
|
||||||
|
|
||||||
from typing import Optional, Literal, Union
|
from typing import Optional, Literal, Union
|
||||||
import litellm, traceback, sys, uuid
|
import litellm, traceback, sys, uuid
|
||||||
from litellm.caching import DualCache
|
from litellm.caching.caching import DualCache
|
||||||
from litellm.proxy._types import UserAPIKeyAuth
|
from litellm.proxy._types import UserAPIKeyAuth
|
||||||
from litellm.integrations.custom_logger import CustomLogger
|
from litellm.integrations.custom_logger import CustomLogger
|
||||||
from fastapi import HTTPException
|
from fastapi import HTTPException
|
||||||
|
|
|
@ -15,7 +15,7 @@ sys.path.insert(
|
||||||
) # Adds the parent directory to the system path
|
) # Adds the parent directory to the system path
|
||||||
from typing import Optional, Literal, Union
|
from typing import Optional, Literal, Union
|
||||||
import litellm, traceback, sys, uuid
|
import litellm, traceback, sys, uuid
|
||||||
from litellm.caching import DualCache
|
from litellm.caching.caching import DualCache
|
||||||
from litellm.proxy._types import UserAPIKeyAuth
|
from litellm.proxy._types import UserAPIKeyAuth
|
||||||
from litellm.integrations.custom_logger import CustomLogger
|
from litellm.integrations.custom_logger import CustomLogger
|
||||||
from fastapi import HTTPException
|
from fastapi import HTTPException
|
||||||
|
|
|
@ -13,7 +13,7 @@ import traceback
|
||||||
import sys
|
import sys
|
||||||
import uuid
|
import uuid
|
||||||
import os
|
import os
|
||||||
from litellm.caching import DualCache
|
from litellm.caching.caching import DualCache
|
||||||
from litellm.proxy._types import UserAPIKeyAuth
|
from litellm.proxy._types import UserAPIKeyAuth
|
||||||
from litellm.integrations.custom_logger import CustomLogger
|
from litellm.integrations.custom_logger import CustomLogger
|
||||||
from fastapi import HTTPException
|
from fastapi import HTTPException
|
||||||
|
|
|
@ -12,7 +12,7 @@ sys.path.insert(
|
||||||
) # Adds the parent directory to the system path
|
) # Adds the parent directory to the system path
|
||||||
from typing import Optional, Literal, Union
|
from typing import Optional, Literal, Union
|
||||||
import litellm, traceback, sys, uuid
|
import litellm, traceback, sys, uuid
|
||||||
from litellm.caching import DualCache
|
from litellm.caching.caching import DualCache
|
||||||
from litellm.proxy._types import UserAPIKeyAuth
|
from litellm.proxy._types import UserAPIKeyAuth
|
||||||
from litellm.integrations.custom_logger import CustomLogger
|
from litellm.integrations.custom_logger import CustomLogger
|
||||||
from fastapi import HTTPException
|
from fastapi import HTTPException
|
||||||
|
|
|
@ -12,7 +12,7 @@ sys.path.insert(
|
||||||
0, os.path.abspath("../..")
|
0, os.path.abspath("../..")
|
||||||
) # Adds the parent directory to the system path
|
) # Adds the parent directory to the system path
|
||||||
from typing import Optional
|
from typing import Optional
|
||||||
from litellm.caching import DualCache
|
from litellm.caching.caching import DualCache
|
||||||
from litellm.proxy._types import UserAPIKeyAuth
|
from litellm.proxy._types import UserAPIKeyAuth
|
||||||
from litellm._logging import verbose_proxy_logger
|
from litellm._logging import verbose_proxy_logger
|
||||||
import tempfile
|
import tempfile
|
||||||
|
|
|
@ -7,7 +7,7 @@ import threading
|
||||||
import os
|
import os
|
||||||
from typing import Callable, List, Optional, Dict, Union, Any, Literal, get_args
|
from typing import Callable, List, Optional, Dict, Union, Any, Literal, get_args
|
||||||
from litellm.llms.custom_httpx.http_handler import AsyncHTTPHandler, HTTPHandler
|
from litellm.llms.custom_httpx.http_handler import AsyncHTTPHandler, HTTPHandler
|
||||||
from litellm.caching import Cache
|
from litellm.caching.caching import Cache
|
||||||
from litellm._logging import (
|
from litellm._logging import (
|
||||||
set_verbose,
|
set_verbose,
|
||||||
_turn_on_debug,
|
_turn_on_debug,
|
||||||
|
|
|
@ -2,7 +2,6 @@ from concurrent.futures import FIRST_COMPLETED, ThreadPoolExecutor, wait
|
||||||
from typing import List, Optional
|
from typing import List, Optional
|
||||||
|
|
||||||
import litellm
|
import litellm
|
||||||
from litellm import completion
|
|
||||||
from litellm._logging import print_verbose
|
from litellm._logging import print_verbose
|
||||||
from litellm.utils import get_optional_params
|
from litellm.utils import get_optional_params
|
||||||
|
|
||||||
|
@ -108,7 +107,7 @@ def batch_completion(
|
||||||
if "kwargs" in kwargs_modified:
|
if "kwargs" in kwargs_modified:
|
||||||
original_kwargs = kwargs_modified.pop("kwargs")
|
original_kwargs = kwargs_modified.pop("kwargs")
|
||||||
future = executor.submit(
|
future = executor.submit(
|
||||||
completion, **kwargs_modified, **original_kwargs
|
litellm.completion, **kwargs_modified, **original_kwargs
|
||||||
)
|
)
|
||||||
completions.append(future)
|
completions.append(future)
|
||||||
|
|
||||||
|
@ -156,7 +155,7 @@ def batch_completion_models(*args, **kwargs):
|
||||||
with ThreadPoolExecutor(max_workers=len(models)) as executor:
|
with ThreadPoolExecutor(max_workers=len(models)) as executor:
|
||||||
for model in models:
|
for model in models:
|
||||||
futures[model] = executor.submit(
|
futures[model] = executor.submit(
|
||||||
completion, *args, model=model, **kwargs
|
litellm.completion, *args, model=model, **kwargs
|
||||||
)
|
)
|
||||||
|
|
||||||
for model, future in sorted(
|
for model, future in sorted(
|
||||||
|
@ -178,7 +177,9 @@ def batch_completion_models(*args, **kwargs):
|
||||||
): # don't override deployment values e.g. model name, api base, etc.
|
): # don't override deployment values e.g. model name, api base, etc.
|
||||||
deployment[key] = kwargs[key]
|
deployment[key] = kwargs[key]
|
||||||
kwargs = {**deployment, **nested_kwargs}
|
kwargs = {**deployment, **nested_kwargs}
|
||||||
futures[deployment["model"]] = executor.submit(completion, **kwargs)
|
futures[deployment["model"]] = executor.submit(
|
||||||
|
litellm.completion, **kwargs
|
||||||
|
)
|
||||||
|
|
||||||
while futures:
|
while futures:
|
||||||
# wait for the first returned future
|
# wait for the first returned future
|
||||||
|
@ -246,7 +247,7 @@ def batch_completion_models_all_responses(*args, **kwargs):
|
||||||
|
|
||||||
with concurrent.futures.ThreadPoolExecutor(max_workers=len(models)) as executor:
|
with concurrent.futures.ThreadPoolExecutor(max_workers=len(models)) as executor:
|
||||||
for idx, model in enumerate(models):
|
for idx, model in enumerate(models):
|
||||||
future = executor.submit(completion, *args, model=model, **kwargs)
|
future = executor.submit(litellm.completion, *args, model=model, **kwargs)
|
||||||
if future.result() is not None:
|
if future.result() is not None:
|
||||||
responses.append(future.result())
|
responses.append(future.result())
|
||||||
|
|
||||||
|
|
|
@ -212,7 +212,7 @@ class RedisCache(BaseCache):
|
||||||
|
|
||||||
from litellm._service_logger import ServiceLogging
|
from litellm._service_logger import ServiceLogging
|
||||||
|
|
||||||
from ._redis import get_redis_client, get_redis_connection_pool
|
from .._redis import get_redis_client, get_redis_connection_pool
|
||||||
|
|
||||||
redis_kwargs = {}
|
redis_kwargs = {}
|
||||||
if host is not None:
|
if host is not None:
|
||||||
|
@ -276,7 +276,7 @@ class RedisCache(BaseCache):
|
||||||
)
|
)
|
||||||
|
|
||||||
def init_async_client(self):
|
def init_async_client(self):
|
||||||
from ._redis import get_redis_async_client
|
from .._redis import get_redis_async_client
|
||||||
|
|
||||||
return get_redis_async_client(
|
return get_redis_async_client(
|
||||||
connection_pool=self.async_redis_conn_pool, **self.redis_kwargs
|
connection_pool=self.async_redis_conn_pool, **self.redis_kwargs
|
||||||
|
@ -302,7 +302,7 @@ class RedisCache(BaseCache):
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
# NON blocking - notify users Redis is throwing an exception
|
# NON blocking - notify users Redis is throwing an exception
|
||||||
print_verbose(
|
print_verbose(
|
||||||
f"LiteLLM Caching: set() - Got exception from REDIS : {str(e)}"
|
f"litellm.caching.caching: set() - Got exception from REDIS : {str(e)}"
|
||||||
)
|
)
|
||||||
|
|
||||||
def increment_cache(
|
def increment_cache(
|
||||||
|
@ -705,7 +705,7 @@ class RedisCache(BaseCache):
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
# NON blocking - notify users Redis is throwing an exception
|
# NON blocking - notify users Redis is throwing an exception
|
||||||
verbose_logger.error(
|
verbose_logger.error(
|
||||||
"LiteLLM Caching: get() - Got exception from REDIS: ", e
|
"litellm.caching.caching: get() - Got exception from REDIS: ", e
|
||||||
)
|
)
|
||||||
|
|
||||||
def batch_get_cache(self, key_list) -> dict:
|
def batch_get_cache(self, key_list) -> dict:
|
||||||
|
@ -781,7 +781,7 @@ class RedisCache(BaseCache):
|
||||||
)
|
)
|
||||||
# NON blocking - notify users Redis is throwing an exception
|
# NON blocking - notify users Redis is throwing an exception
|
||||||
print_verbose(
|
print_verbose(
|
||||||
f"LiteLLM Caching: async get() - Got exception from REDIS: {str(e)}"
|
f"litellm.caching.caching: async get() - Got exception from REDIS: {str(e)}"
|
||||||
)
|
)
|
||||||
|
|
||||||
async def async_batch_get_cache(self, key_list) -> dict:
|
async def async_batch_get_cache(self, key_list) -> dict:
|
440
litellm/caching/caching_handler.py
Normal file
440
litellm/caching/caching_handler.py
Normal file
|
@ -0,0 +1,440 @@
|
||||||
|
"""
|
||||||
|
This contains LLMCachingHandler
|
||||||
|
|
||||||
|
This exposes two methods:
|
||||||
|
- async_get_cache
|
||||||
|
- async_set_cache
|
||||||
|
|
||||||
|
This file is a wrapper around caching.py
|
||||||
|
|
||||||
|
In each method it will call the appropriate method from caching.py
|
||||||
|
"""
|
||||||
|
|
||||||
|
import asyncio
|
||||||
|
import datetime
|
||||||
|
import threading
|
||||||
|
from typing import TYPE_CHECKING, Any, Callable, Dict, List, Optional, Tuple
|
||||||
|
|
||||||
|
from pydantic import BaseModel
|
||||||
|
|
||||||
|
import litellm
|
||||||
|
from litellm._logging import print_verbose
|
||||||
|
from litellm.caching.caching import (
|
||||||
|
Cache,
|
||||||
|
QdrantSemanticCache,
|
||||||
|
RedisCache,
|
||||||
|
RedisSemanticCache,
|
||||||
|
S3Cache,
|
||||||
|
)
|
||||||
|
from litellm.types.rerank import RerankResponse
|
||||||
|
from litellm.types.utils import (
|
||||||
|
CallTypes,
|
||||||
|
Embedding,
|
||||||
|
EmbeddingResponse,
|
||||||
|
ModelResponse,
|
||||||
|
TextCompletionResponse,
|
||||||
|
TranscriptionResponse,
|
||||||
|
)
|
||||||
|
|
||||||
|
if TYPE_CHECKING:
|
||||||
|
from litellm.litellm_core_utils.litellm_logging import Logging as LiteLLMLoggingObj
|
||||||
|
else:
|
||||||
|
LiteLLMLoggingObj = Any
|
||||||
|
|
||||||
|
|
||||||
|
class CachingHandlerResponse(BaseModel):
|
||||||
|
"""
|
||||||
|
This is the response object for the caching handler. We need to separate embedding cached responses and (completion / text_completion / transcription) cached responses
|
||||||
|
|
||||||
|
For embeddings there can be a cache hit for some of the inputs in the list and a cache miss for others
|
||||||
|
"""
|
||||||
|
|
||||||
|
cached_result: Optional[Any] = None
|
||||||
|
final_embedding_cached_response: Optional[EmbeddingResponse] = None
|
||||||
|
embedding_all_elements_cache_hit: bool = (
|
||||||
|
False # this is set to True when all elements in the list have a cache hit in the embedding cache, if true return the final_embedding_cached_response no need to make an API call
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class LLMCachingHandler:
|
||||||
|
def __init__(self):
|
||||||
|
pass
|
||||||
|
|
||||||
|
async def _async_get_cache(
|
||||||
|
self,
|
||||||
|
model: str,
|
||||||
|
original_function: Callable,
|
||||||
|
logging_obj: LiteLLMLoggingObj,
|
||||||
|
start_time: datetime.datetime,
|
||||||
|
call_type: str,
|
||||||
|
kwargs: Dict[str, Any],
|
||||||
|
args: Optional[Tuple[Any, ...]] = None,
|
||||||
|
) -> CachingHandlerResponse:
|
||||||
|
"""
|
||||||
|
Internal method to get from the cache.
|
||||||
|
Handles different call types (embeddings, chat/completions, text_completion, transcription)
|
||||||
|
and accordingly returns the cached response
|
||||||
|
|
||||||
|
Args:
|
||||||
|
model: str:
|
||||||
|
original_function: Callable:
|
||||||
|
logging_obj: LiteLLMLoggingObj:
|
||||||
|
start_time: datetime.datetime:
|
||||||
|
call_type: str:
|
||||||
|
kwargs: Dict[str, Any]:
|
||||||
|
args: Optional[Tuple[Any, ...]] = None:
|
||||||
|
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
CachingHandlerResponse:
|
||||||
|
Raises:
|
||||||
|
None
|
||||||
|
"""
|
||||||
|
from litellm.utils import (
|
||||||
|
CustomStreamWrapper,
|
||||||
|
convert_to_model_response_object,
|
||||||
|
convert_to_streaming_response_async,
|
||||||
|
)
|
||||||
|
|
||||||
|
args = args or ()
|
||||||
|
|
||||||
|
final_embedding_cached_response: Optional[EmbeddingResponse] = None
|
||||||
|
cached_result: Optional[Any] = None
|
||||||
|
if (
|
||||||
|
(kwargs.get("caching", None) is None and litellm.cache is not None)
|
||||||
|
or kwargs.get("caching", False) is True
|
||||||
|
) and (
|
||||||
|
kwargs.get("cache", {}).get("no-cache", False) is not True
|
||||||
|
): # allow users to control returning cached responses from the completion function
|
||||||
|
# checking cache
|
||||||
|
print_verbose("INSIDE CHECKING CACHE")
|
||||||
|
if (
|
||||||
|
litellm.cache is not None
|
||||||
|
and litellm.cache.supported_call_types is not None
|
||||||
|
and str(original_function.__name__)
|
||||||
|
in litellm.cache.supported_call_types
|
||||||
|
):
|
||||||
|
print_verbose("Checking Cache")
|
||||||
|
if call_type == CallTypes.aembedding.value and isinstance(
|
||||||
|
kwargs["input"], list
|
||||||
|
):
|
||||||
|
tasks = []
|
||||||
|
for idx, i in enumerate(kwargs["input"]):
|
||||||
|
preset_cache_key = litellm.cache.get_cache_key(
|
||||||
|
*args, **{**kwargs, "input": i}
|
||||||
|
)
|
||||||
|
tasks.append(
|
||||||
|
litellm.cache.async_get_cache(cache_key=preset_cache_key)
|
||||||
|
)
|
||||||
|
cached_result = await asyncio.gather(*tasks)
|
||||||
|
## check if cached result is None ##
|
||||||
|
if cached_result is not None and isinstance(cached_result, list):
|
||||||
|
# set cached_result to None if all elements are None
|
||||||
|
if all(result is None for result in cached_result):
|
||||||
|
cached_result = None
|
||||||
|
elif isinstance(litellm.cache.cache, RedisSemanticCache) or isinstance(
|
||||||
|
litellm.cache.cache, RedisCache
|
||||||
|
):
|
||||||
|
preset_cache_key = litellm.cache.get_cache_key(*args, **kwargs)
|
||||||
|
kwargs["preset_cache_key"] = (
|
||||||
|
preset_cache_key # for streaming calls, we need to pass the preset_cache_key
|
||||||
|
)
|
||||||
|
cached_result = await litellm.cache.async_get_cache(*args, **kwargs)
|
||||||
|
elif isinstance(litellm.cache.cache, QdrantSemanticCache):
|
||||||
|
preset_cache_key = litellm.cache.get_cache_key(*args, **kwargs)
|
||||||
|
kwargs["preset_cache_key"] = (
|
||||||
|
preset_cache_key # for streaming calls, we need to pass the preset_cache_key
|
||||||
|
)
|
||||||
|
cached_result = await litellm.cache.async_get_cache(*args, **kwargs)
|
||||||
|
else: # for s3 caching. [NOT RECOMMENDED IN PROD - this will slow down responses since boto3 is sync]
|
||||||
|
preset_cache_key = litellm.cache.get_cache_key(*args, **kwargs)
|
||||||
|
kwargs["preset_cache_key"] = (
|
||||||
|
preset_cache_key # for streaming calls, we need to pass the preset_cache_key
|
||||||
|
)
|
||||||
|
cached_result = litellm.cache.get_cache(*args, **kwargs)
|
||||||
|
if cached_result is not None and not isinstance(cached_result, list):
|
||||||
|
print_verbose("Cache Hit!")
|
||||||
|
cache_hit = True
|
||||||
|
end_time = datetime.datetime.now()
|
||||||
|
(
|
||||||
|
model,
|
||||||
|
custom_llm_provider,
|
||||||
|
dynamic_api_key,
|
||||||
|
api_base,
|
||||||
|
) = litellm.get_llm_provider(
|
||||||
|
model=model,
|
||||||
|
custom_llm_provider=kwargs.get("custom_llm_provider", None),
|
||||||
|
api_base=kwargs.get("api_base", None),
|
||||||
|
api_key=kwargs.get("api_key", None),
|
||||||
|
)
|
||||||
|
print_verbose(
|
||||||
|
f"Async Wrapper: Completed Call, calling async_success_handler: {logging_obj.async_success_handler}"
|
||||||
|
)
|
||||||
|
logging_obj.update_environment_variables(
|
||||||
|
model=model,
|
||||||
|
user=kwargs.get("user", None),
|
||||||
|
optional_params={},
|
||||||
|
litellm_params={
|
||||||
|
"logger_fn": kwargs.get("logger_fn", None),
|
||||||
|
"acompletion": True,
|
||||||
|
"metadata": kwargs.get("metadata", {}),
|
||||||
|
"model_info": kwargs.get("model_info", {}),
|
||||||
|
"proxy_server_request": kwargs.get(
|
||||||
|
"proxy_server_request", None
|
||||||
|
),
|
||||||
|
"preset_cache_key": kwargs.get("preset_cache_key", None),
|
||||||
|
"stream_response": kwargs.get("stream_response", {}),
|
||||||
|
"api_base": kwargs.get("api_base", ""),
|
||||||
|
},
|
||||||
|
input=kwargs.get("messages", ""),
|
||||||
|
api_key=kwargs.get("api_key", None),
|
||||||
|
original_response=str(cached_result),
|
||||||
|
additional_args=None,
|
||||||
|
stream=kwargs.get("stream", False),
|
||||||
|
)
|
||||||
|
call_type = original_function.__name__
|
||||||
|
if call_type == CallTypes.acompletion.value and isinstance(
|
||||||
|
cached_result, dict
|
||||||
|
):
|
||||||
|
if kwargs.get("stream", False) is True:
|
||||||
|
cached_result = convert_to_streaming_response_async(
|
||||||
|
response_object=cached_result,
|
||||||
|
)
|
||||||
|
cached_result = CustomStreamWrapper(
|
||||||
|
completion_stream=cached_result,
|
||||||
|
model=model,
|
||||||
|
custom_llm_provider="cached_response",
|
||||||
|
logging_obj=logging_obj,
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
cached_result = convert_to_model_response_object(
|
||||||
|
response_object=cached_result,
|
||||||
|
model_response_object=ModelResponse(),
|
||||||
|
)
|
||||||
|
if call_type == CallTypes.atext_completion.value and isinstance(
|
||||||
|
cached_result, dict
|
||||||
|
):
|
||||||
|
if kwargs.get("stream", False) is True:
|
||||||
|
cached_result = convert_to_streaming_response_async(
|
||||||
|
response_object=cached_result,
|
||||||
|
)
|
||||||
|
cached_result = CustomStreamWrapper(
|
||||||
|
completion_stream=cached_result,
|
||||||
|
model=model,
|
||||||
|
custom_llm_provider="cached_response",
|
||||||
|
logging_obj=logging_obj,
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
cached_result = TextCompletionResponse(**cached_result)
|
||||||
|
elif call_type == CallTypes.aembedding.value and isinstance(
|
||||||
|
cached_result, dict
|
||||||
|
):
|
||||||
|
cached_result = convert_to_model_response_object(
|
||||||
|
response_object=cached_result,
|
||||||
|
model_response_object=EmbeddingResponse(),
|
||||||
|
response_type="embedding",
|
||||||
|
)
|
||||||
|
elif call_type == CallTypes.arerank.value and isinstance(
|
||||||
|
cached_result, dict
|
||||||
|
):
|
||||||
|
cached_result = convert_to_model_response_object(
|
||||||
|
response_object=cached_result,
|
||||||
|
model_response_object=None,
|
||||||
|
response_type="rerank",
|
||||||
|
)
|
||||||
|
elif call_type == CallTypes.atranscription.value and isinstance(
|
||||||
|
cached_result, dict
|
||||||
|
):
|
||||||
|
hidden_params = {
|
||||||
|
"model": "whisper-1",
|
||||||
|
"custom_llm_provider": custom_llm_provider,
|
||||||
|
"cache_hit": True,
|
||||||
|
}
|
||||||
|
cached_result = convert_to_model_response_object(
|
||||||
|
response_object=cached_result,
|
||||||
|
model_response_object=TranscriptionResponse(),
|
||||||
|
response_type="audio_transcription",
|
||||||
|
hidden_params=hidden_params,
|
||||||
|
)
|
||||||
|
if kwargs.get("stream", False) is False:
|
||||||
|
# LOG SUCCESS
|
||||||
|
asyncio.create_task(
|
||||||
|
logging_obj.async_success_handler(
|
||||||
|
cached_result, start_time, end_time, cache_hit
|
||||||
|
)
|
||||||
|
)
|
||||||
|
threading.Thread(
|
||||||
|
target=logging_obj.success_handler,
|
||||||
|
args=(cached_result, start_time, end_time, cache_hit),
|
||||||
|
).start()
|
||||||
|
cache_key = kwargs.get("preset_cache_key", None)
|
||||||
|
if (
|
||||||
|
isinstance(cached_result, BaseModel)
|
||||||
|
or isinstance(cached_result, CustomStreamWrapper)
|
||||||
|
) and hasattr(cached_result, "_hidden_params"):
|
||||||
|
cached_result._hidden_params["cache_key"] = cache_key # type: ignore
|
||||||
|
return CachingHandlerResponse(cached_result=cached_result)
|
||||||
|
elif (
|
||||||
|
call_type == CallTypes.aembedding.value
|
||||||
|
and cached_result is not None
|
||||||
|
and isinstance(cached_result, list)
|
||||||
|
and litellm.cache is not None
|
||||||
|
and not isinstance(
|
||||||
|
litellm.cache.cache, S3Cache
|
||||||
|
) # s3 doesn't support bulk writing. Exclude.
|
||||||
|
):
|
||||||
|
remaining_list = []
|
||||||
|
non_null_list = []
|
||||||
|
for idx, cr in enumerate(cached_result):
|
||||||
|
if cr is None:
|
||||||
|
remaining_list.append(kwargs["input"][idx])
|
||||||
|
else:
|
||||||
|
non_null_list.append((idx, cr))
|
||||||
|
original_kwargs_input = kwargs["input"]
|
||||||
|
kwargs["input"] = remaining_list
|
||||||
|
if len(non_null_list) > 0:
|
||||||
|
print_verbose(f"EMBEDDING CACHE HIT! - {len(non_null_list)}")
|
||||||
|
final_embedding_cached_response = EmbeddingResponse(
|
||||||
|
model=kwargs.get("model"),
|
||||||
|
data=[None] * len(original_kwargs_input),
|
||||||
|
)
|
||||||
|
final_embedding_cached_response._hidden_params["cache_hit"] = (
|
||||||
|
True
|
||||||
|
)
|
||||||
|
|
||||||
|
for val in non_null_list:
|
||||||
|
idx, cr = val # (idx, cr) tuple
|
||||||
|
if cr is not None:
|
||||||
|
final_embedding_cached_response.data[idx] = Embedding(
|
||||||
|
embedding=cr["embedding"],
|
||||||
|
index=idx,
|
||||||
|
object="embedding",
|
||||||
|
)
|
||||||
|
if len(remaining_list) == 0:
|
||||||
|
# LOG SUCCESS
|
||||||
|
cache_hit = True
|
||||||
|
end_time = datetime.datetime.now()
|
||||||
|
(
|
||||||
|
model,
|
||||||
|
custom_llm_provider,
|
||||||
|
dynamic_api_key,
|
||||||
|
api_base,
|
||||||
|
) = litellm.get_llm_provider(
|
||||||
|
model=model,
|
||||||
|
custom_llm_provider=kwargs.get("custom_llm_provider", None),
|
||||||
|
api_base=kwargs.get("api_base", None),
|
||||||
|
api_key=kwargs.get("api_key", None),
|
||||||
|
)
|
||||||
|
print_verbose(
|
||||||
|
f"Async Wrapper: Completed Call, calling async_success_handler: {logging_obj.async_success_handler}"
|
||||||
|
)
|
||||||
|
logging_obj.update_environment_variables(
|
||||||
|
model=model,
|
||||||
|
user=kwargs.get("user", None),
|
||||||
|
optional_params={},
|
||||||
|
litellm_params={
|
||||||
|
"logger_fn": kwargs.get("logger_fn", None),
|
||||||
|
"acompletion": True,
|
||||||
|
"metadata": kwargs.get("metadata", {}),
|
||||||
|
"model_info": kwargs.get("model_info", {}),
|
||||||
|
"proxy_server_request": kwargs.get(
|
||||||
|
"proxy_server_request", None
|
||||||
|
),
|
||||||
|
"preset_cache_key": kwargs.get(
|
||||||
|
"preset_cache_key", None
|
||||||
|
),
|
||||||
|
"stream_response": kwargs.get("stream_response", {}),
|
||||||
|
"api_base": "",
|
||||||
|
},
|
||||||
|
input=kwargs.get("messages", ""),
|
||||||
|
api_key=kwargs.get("api_key", None),
|
||||||
|
original_response=str(final_embedding_cached_response),
|
||||||
|
additional_args=None,
|
||||||
|
stream=kwargs.get("stream", False),
|
||||||
|
)
|
||||||
|
asyncio.create_task(
|
||||||
|
logging_obj.async_success_handler(
|
||||||
|
final_embedding_cached_response,
|
||||||
|
start_time,
|
||||||
|
end_time,
|
||||||
|
cache_hit,
|
||||||
|
)
|
||||||
|
)
|
||||||
|
threading.Thread(
|
||||||
|
target=logging_obj.success_handler,
|
||||||
|
args=(
|
||||||
|
final_embedding_cached_response,
|
||||||
|
start_time,
|
||||||
|
end_time,
|
||||||
|
cache_hit,
|
||||||
|
),
|
||||||
|
).start()
|
||||||
|
return CachingHandlerResponse(
|
||||||
|
final_embedding_cached_response=final_embedding_cached_response,
|
||||||
|
embedding_all_elements_cache_hit=True,
|
||||||
|
)
|
||||||
|
return CachingHandlerResponse(
|
||||||
|
cached_result=cached_result,
|
||||||
|
final_embedding_cached_response=final_embedding_cached_response,
|
||||||
|
)
|
||||||
|
|
||||||
|
async def _async_set_cache(
|
||||||
|
self,
|
||||||
|
result: Any,
|
||||||
|
original_function: Callable,
|
||||||
|
kwargs: Dict[str, Any],
|
||||||
|
args: Optional[Tuple[Any, ...]] = None,
|
||||||
|
):
|
||||||
|
"""
|
||||||
|
Internal method to check the type of the result & cache used and adds the result to the cache accordingly
|
||||||
|
|
||||||
|
Args:
|
||||||
|
result: Any:
|
||||||
|
original_function: Callable:
|
||||||
|
kwargs: Dict[str, Any]:
|
||||||
|
args: Optional[Tuple[Any, ...]] = None:
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
None
|
||||||
|
Raises:
|
||||||
|
None
|
||||||
|
"""
|
||||||
|
args = args or ()
|
||||||
|
# [OPTIONAL] ADD TO CACHE
|
||||||
|
if (
|
||||||
|
(litellm.cache is not None)
|
||||||
|
and litellm.cache.supported_call_types is not None
|
||||||
|
and (str(original_function.__name__) in litellm.cache.supported_call_types)
|
||||||
|
and (kwargs.get("cache", {}).get("no-store", False) is not True)
|
||||||
|
):
|
||||||
|
if (
|
||||||
|
isinstance(result, litellm.ModelResponse)
|
||||||
|
or isinstance(result, litellm.EmbeddingResponse)
|
||||||
|
or isinstance(result, TranscriptionResponse)
|
||||||
|
or isinstance(result, RerankResponse)
|
||||||
|
):
|
||||||
|
if (
|
||||||
|
isinstance(result, EmbeddingResponse)
|
||||||
|
and isinstance(kwargs["input"], list)
|
||||||
|
and litellm.cache is not None
|
||||||
|
and not isinstance(
|
||||||
|
litellm.cache.cache, S3Cache
|
||||||
|
) # s3 doesn't support bulk writing. Exclude.
|
||||||
|
):
|
||||||
|
asyncio.create_task(
|
||||||
|
litellm.cache.async_add_cache_pipeline(result, *args, **kwargs)
|
||||||
|
)
|
||||||
|
elif isinstance(litellm.cache.cache, S3Cache):
|
||||||
|
threading.Thread(
|
||||||
|
target=litellm.cache.add_cache,
|
||||||
|
args=(result,) + args,
|
||||||
|
kwargs=kwargs,
|
||||||
|
).start()
|
||||||
|
else:
|
||||||
|
asyncio.create_task(
|
||||||
|
litellm.cache.async_add_cache(result.json(), *args, **kwargs)
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
asyncio.create_task(
|
||||||
|
litellm.cache.async_add_cache(result, *args, **kwargs)
|
||||||
|
)
|
|
@ -43,7 +43,7 @@
|
||||||
# ### REDIS
|
# ### REDIS
|
||||||
# # if len(os.getenv("REDIS_HOST", "")) > 0 and len(os.getenv("REDIS_PORT", "")) > 0 and len(os.getenv("REDIS_PASSWORD", "")) > 0:
|
# # if len(os.getenv("REDIS_HOST", "")) > 0 and len(os.getenv("REDIS_PORT", "")) > 0 and len(os.getenv("REDIS_PASSWORD", "")) > 0:
|
||||||
# # print(f"redis host: {os.getenv('REDIS_HOST')}; redis port: {os.getenv('REDIS_PORT')}; password: {os.getenv('REDIS_PASSWORD')}")
|
# # print(f"redis host: {os.getenv('REDIS_HOST')}; redis port: {os.getenv('REDIS_PORT')}; password: {os.getenv('REDIS_PASSWORD')}")
|
||||||
# # from litellm.caching import Cache
|
# # from litellm.caching.caching import Cache
|
||||||
# # litellm.cache = Cache(type="redis", host=os.getenv("REDIS_HOST"), port=os.getenv("REDIS_PORT"), password=os.getenv("REDIS_PASSWORD"))
|
# # litellm.cache = Cache(type="redis", host=os.getenv("REDIS_HOST"), port=os.getenv("REDIS_PORT"), password=os.getenv("REDIS_PASSWORD"))
|
||||||
# # print("\033[92mLiteLLM: Switched on Redis caching\033[0m")
|
# # print("\033[92mLiteLLM: Switched on Redis caching\033[0m")
|
||||||
|
|
||||||
|
|
|
@ -21,7 +21,7 @@ import litellm.litellm_core_utils
|
||||||
import litellm.litellm_core_utils.litellm_logging
|
import litellm.litellm_core_utils.litellm_logging
|
||||||
import litellm.types
|
import litellm.types
|
||||||
from litellm._logging import verbose_logger, verbose_proxy_logger
|
from litellm._logging import verbose_logger, verbose_proxy_logger
|
||||||
from litellm.caching import DualCache
|
from litellm.caching.caching import DualCache
|
||||||
from litellm.integrations.custom_batch_logger import CustomBatchLogger
|
from litellm.integrations.custom_batch_logger import CustomBatchLogger
|
||||||
from litellm.litellm_core_utils.exception_mapping_utils import (
|
from litellm.litellm_core_utils.exception_mapping_utils import (
|
||||||
_add_key_name_and_team_to_alert,
|
_add_key_name_and_team_to_alert,
|
||||||
|
|
|
@ -13,7 +13,7 @@ import requests
|
||||||
|
|
||||||
import litellm
|
import litellm
|
||||||
from litellm._logging import verbose_logger
|
from litellm._logging import verbose_logger
|
||||||
from litellm.caching import DualCache
|
from litellm.caching.caching import DualCache
|
||||||
from litellm.proxy._types import UserAPIKeyAuth
|
from litellm.proxy._types import UserAPIKeyAuth
|
||||||
from litellm.types.utils import StandardLoggingPayload
|
from litellm.types.utils import StandardLoggingPayload
|
||||||
|
|
||||||
|
@ -29,14 +29,30 @@ def create_client():
|
||||||
clickhouse_host = os.getenv("CLICKHOUSE_HOST")
|
clickhouse_host = os.getenv("CLICKHOUSE_HOST")
|
||||||
if clickhouse_host is not None:
|
if clickhouse_host is not None:
|
||||||
verbose_logger.debug("setting up clickhouse")
|
verbose_logger.debug("setting up clickhouse")
|
||||||
|
|
||||||
|
port = os.getenv("CLICKHOUSE_PORT")
|
||||||
if port is not None and isinstance(port, str):
|
if port is not None and isinstance(port, str):
|
||||||
port = int(port)
|
port = int(port)
|
||||||
|
|
||||||
|
host: Optional[str] = os.getenv("CLICKHOUSE_HOST")
|
||||||
|
if host is None:
|
||||||
|
raise ValueError("CLICKHOUSE_HOST is not set")
|
||||||
|
|
||||||
|
username: Optional[str] = os.getenv("CLICKHOUSE_USERNAME")
|
||||||
|
if username is None:
|
||||||
|
raise ValueError("CLICKHOUSE_USERNAME is not set")
|
||||||
|
|
||||||
|
password: Optional[str] = os.getenv("CLICKHOUSE_PASSWORD")
|
||||||
|
if password is None:
|
||||||
|
raise ValueError("CLICKHOUSE_PASSWORD is not set")
|
||||||
|
if port is None:
|
||||||
|
raise ValueError("CLICKHOUSE_PORT is not set")
|
||||||
|
|
||||||
client = clickhouse_connect.get_client(
|
client = clickhouse_connect.get_client(
|
||||||
host=os.getenv("CLICKHOUSE_HOST"),
|
host=host,
|
||||||
port=port,
|
port=port,
|
||||||
username=os.getenv("CLICKHOUSE_USERNAME"),
|
username=username,
|
||||||
password=os.getenv("CLICKHOUSE_PASSWORD"),
|
password=password,
|
||||||
)
|
)
|
||||||
return client
|
return client
|
||||||
else:
|
else:
|
||||||
|
@ -176,11 +192,29 @@ def _start_clickhouse():
|
||||||
if port is not None and isinstance(port, str):
|
if port is not None and isinstance(port, str):
|
||||||
port = int(port)
|
port = int(port)
|
||||||
|
|
||||||
|
port = os.getenv("CLICKHOUSE_PORT")
|
||||||
|
if port is not None and isinstance(port, str):
|
||||||
|
port = int(port)
|
||||||
|
|
||||||
|
host: Optional[str] = os.getenv("CLICKHOUSE_HOST")
|
||||||
|
if host is None:
|
||||||
|
raise ValueError("CLICKHOUSE_HOST is not set")
|
||||||
|
|
||||||
|
username: Optional[str] = os.getenv("CLICKHOUSE_USERNAME")
|
||||||
|
if username is None:
|
||||||
|
raise ValueError("CLICKHOUSE_USERNAME is not set")
|
||||||
|
|
||||||
|
password: Optional[str] = os.getenv("CLICKHOUSE_PASSWORD")
|
||||||
|
if password is None:
|
||||||
|
raise ValueError("CLICKHOUSE_PASSWORD is not set")
|
||||||
|
if port is None:
|
||||||
|
raise ValueError("CLICKHOUSE_PORT is not set")
|
||||||
|
|
||||||
client = clickhouse_connect.get_client(
|
client = clickhouse_connect.get_client(
|
||||||
host=os.getenv("CLICKHOUSE_HOST"),
|
host=host,
|
||||||
port=port,
|
port=port,
|
||||||
username=os.getenv("CLICKHOUSE_USERNAME"),
|
username=username,
|
||||||
password=os.getenv("CLICKHOUSE_PASSWORD"),
|
password=password,
|
||||||
)
|
)
|
||||||
# view all tables in DB
|
# view all tables in DB
|
||||||
response = client.query("SHOW TABLES")
|
response = client.query("SHOW TABLES")
|
||||||
|
@ -241,11 +275,25 @@ class ClickhouseLogger:
|
||||||
if port is not None and isinstance(port, str):
|
if port is not None and isinstance(port, str):
|
||||||
port = int(port)
|
port = int(port)
|
||||||
|
|
||||||
|
host: Optional[str] = os.getenv("CLICKHOUSE_HOST")
|
||||||
|
if host is None:
|
||||||
|
raise ValueError("CLICKHOUSE_HOST is not set")
|
||||||
|
|
||||||
|
username: Optional[str] = os.getenv("CLICKHOUSE_USERNAME")
|
||||||
|
if username is None:
|
||||||
|
raise ValueError("CLICKHOUSE_USERNAME is not set")
|
||||||
|
|
||||||
|
password: Optional[str] = os.getenv("CLICKHOUSE_PASSWORD")
|
||||||
|
if password is None:
|
||||||
|
raise ValueError("CLICKHOUSE_PASSWORD is not set")
|
||||||
|
if port is None:
|
||||||
|
raise ValueError("CLICKHOUSE_PORT is not set")
|
||||||
|
|
||||||
client = clickhouse_connect.get_client(
|
client = clickhouse_connect.get_client(
|
||||||
host=os.getenv("CLICKHOUSE_HOST"),
|
host=host,
|
||||||
port=port,
|
port=port,
|
||||||
username=os.getenv("CLICKHOUSE_USERNAME"),
|
username=username,
|
||||||
password=os.getenv("CLICKHOUSE_PASSWORD"),
|
password=password,
|
||||||
)
|
)
|
||||||
self.client = client
|
self.client = client
|
||||||
|
|
||||||
|
|
|
@ -8,7 +8,7 @@ from typing import Any, Literal, Optional, Tuple, Union
|
||||||
import dotenv
|
import dotenv
|
||||||
from pydantic import BaseModel
|
from pydantic import BaseModel
|
||||||
|
|
||||||
from litellm.caching import DualCache
|
from litellm.caching.caching import DualCache
|
||||||
from litellm.proxy._types import UserAPIKeyAuth
|
from litellm.proxy._types import UserAPIKeyAuth
|
||||||
from litellm.types.llms.openai import ChatCompletionRequest
|
from litellm.types.llms.openai import ChatCompletionRequest
|
||||||
from litellm.types.services import ServiceLoggerPayload
|
from litellm.types.services import ServiceLoggerPayload
|
||||||
|
|
|
@ -23,7 +23,7 @@ from litellm import (
|
||||||
turn_off_message_logging,
|
turn_off_message_logging,
|
||||||
verbose_logger,
|
verbose_logger,
|
||||||
)
|
)
|
||||||
from litellm.caching import DualCache, InMemoryCache, S3Cache
|
from litellm.caching.caching import DualCache, InMemoryCache, S3Cache
|
||||||
from litellm.cost_calculator import _select_model_name_for_cost_calc
|
from litellm.cost_calculator import _select_model_name_for_cost_calc
|
||||||
from litellm.integrations.custom_guardrail import CustomGuardrail
|
from litellm.integrations.custom_guardrail import CustomGuardrail
|
||||||
from litellm.integrations.custom_logger import CustomLogger
|
from litellm.integrations.custom_logger import CustomLogger
|
||||||
|
|
|
@ -10,7 +10,7 @@ from openai import AsyncAzureOpenAI, AzureOpenAI
|
||||||
from typing_extensions import overload
|
from typing_extensions import overload
|
||||||
|
|
||||||
import litellm
|
import litellm
|
||||||
from litellm.caching import DualCache
|
from litellm.caching.caching import DualCache
|
||||||
from litellm.litellm_core_utils.litellm_logging import Logging as LiteLLMLoggingObj
|
from litellm.litellm_core_utils.litellm_logging import Logging as LiteLLMLoggingObj
|
||||||
from litellm.llms.custom_httpx.http_handler import AsyncHTTPHandler, HTTPHandler
|
from litellm.llms.custom_httpx.http_handler import AsyncHTTPHandler, HTTPHandler
|
||||||
from litellm.types.utils import EmbeddingResponse
|
from litellm.types.utils import EmbeddingResponse
|
||||||
|
|
|
@ -6,7 +6,7 @@ from typing import Dict, List, Optional, Tuple
|
||||||
import httpx
|
import httpx
|
||||||
|
|
||||||
from litellm._logging import verbose_logger
|
from litellm._logging import verbose_logger
|
||||||
from litellm.caching import DualCache, InMemoryCache
|
from litellm.caching.caching import DualCache, InMemoryCache
|
||||||
from litellm.secret_managers.main import get_secret
|
from litellm.secret_managers.main import get_secret
|
||||||
|
|
||||||
from .base import BaseLLM
|
from .base import BaseLLM
|
||||||
|
|
|
@ -29,7 +29,7 @@ import requests # type: ignore
|
||||||
|
|
||||||
import litellm
|
import litellm
|
||||||
from litellm import verbose_logger
|
from litellm import verbose_logger
|
||||||
from litellm.caching import InMemoryCache
|
from litellm.caching.caching import InMemoryCache
|
||||||
from litellm.litellm_core_utils.core_helpers import map_finish_reason
|
from litellm.litellm_core_utils.core_helpers import map_finish_reason
|
||||||
from litellm.litellm_core_utils.litellm_logging import Logging
|
from litellm.litellm_core_utils.litellm_logging import Logging
|
||||||
from litellm.llms.custom_httpx.http_handler import (
|
from litellm.llms.custom_httpx.http_handler import (
|
||||||
|
|
|
@ -8,7 +8,7 @@ from httpx import Response
|
||||||
|
|
||||||
import litellm
|
import litellm
|
||||||
from litellm import verbose_logger
|
from litellm import verbose_logger
|
||||||
from litellm.caching import InMemoryCache
|
from litellm.caching.caching import InMemoryCache
|
||||||
from litellm.llms.custom_httpx.http_handler import (
|
from litellm.llms.custom_httpx.http_handler import (
|
||||||
_get_httpx_client,
|
_get_httpx_client,
|
||||||
get_async_httpx_client,
|
get_async_httpx_client,
|
||||||
|
|
|
@ -4,7 +4,7 @@ from typing import Callable, List, Literal, Optional, Tuple, Union
|
||||||
import httpx
|
import httpx
|
||||||
|
|
||||||
import litellm
|
import litellm
|
||||||
from litellm.caching import Cache
|
from litellm.caching.caching import Cache
|
||||||
from litellm.litellm_core_utils.litellm_logging import Logging
|
from litellm.litellm_core_utils.litellm_logging import Logging
|
||||||
from litellm.llms.custom_httpx.http_handler import AsyncHTTPHandler, HTTPHandler
|
from litellm.llms.custom_httpx.http_handler import AsyncHTTPHandler, HTTPHandler
|
||||||
from litellm.llms.OpenAI.openai import AllMessageValues
|
from litellm.llms.OpenAI.openai import AllMessageValues
|
||||||
|
|
|
@ -65,7 +65,7 @@ from litellm.utils import (
|
||||||
)
|
)
|
||||||
|
|
||||||
from ._logging import verbose_logger
|
from ._logging import verbose_logger
|
||||||
from .caching import disable_cache, enable_cache, update_cache
|
from .caching.caching import disable_cache, enable_cache, update_cache
|
||||||
from .llms import (
|
from .llms import (
|
||||||
aleph_alpha,
|
aleph_alpha,
|
||||||
baseten,
|
baseten,
|
||||||
|
|
|
@ -16,7 +16,7 @@ from pydantic import BaseModel
|
||||||
|
|
||||||
import litellm
|
import litellm
|
||||||
from litellm._logging import verbose_proxy_logger
|
from litellm._logging import verbose_proxy_logger
|
||||||
from litellm.caching import DualCache
|
from litellm.caching.caching import DualCache
|
||||||
from litellm.proxy._types import (
|
from litellm.proxy._types import (
|
||||||
LiteLLM_EndUserTable,
|
LiteLLM_EndUserTable,
|
||||||
LiteLLM_JWTAuth,
|
LiteLLM_JWTAuth,
|
||||||
|
|
|
@ -15,7 +15,7 @@ from cryptography.hazmat.backends import default_backend
|
||||||
from cryptography.hazmat.primitives import serialization
|
from cryptography.hazmat.primitives import serialization
|
||||||
|
|
||||||
from litellm._logging import verbose_proxy_logger
|
from litellm._logging import verbose_proxy_logger
|
||||||
from litellm.caching import DualCache
|
from litellm.caching.caching import DualCache
|
||||||
from litellm.llms.custom_httpx.httpx_handler import HTTPHandler
|
from litellm.llms.custom_httpx.httpx_handler import HTTPHandler
|
||||||
from litellm.proxy._types import LiteLLM_JWTAuth, LiteLLM_UserTable
|
from litellm.proxy._types import LiteLLM_JWTAuth, LiteLLM_UserTable
|
||||||
from litellm.proxy.utils import PrismaClient
|
from litellm.proxy.utils import PrismaClient
|
||||||
|
|
|
@ -5,7 +5,7 @@ from fastapi import APIRouter, Depends, HTTPException, Request
|
||||||
|
|
||||||
import litellm
|
import litellm
|
||||||
from litellm._logging import verbose_proxy_logger
|
from litellm._logging import verbose_proxy_logger
|
||||||
from litellm.caching import RedisCache
|
from litellm.caching.caching import RedisCache
|
||||||
from litellm.proxy.auth.user_api_key_auth import user_api_key_auth
|
from litellm.proxy.auth.user_api_key_auth import user_api_key_auth
|
||||||
|
|
||||||
router = APIRouter(
|
router = APIRouter(
|
||||||
|
|
|
@ -2,7 +2,7 @@ from typing import Any, Dict, List, Literal, Optional, Union
|
||||||
|
|
||||||
import litellm
|
import litellm
|
||||||
from litellm._logging import verbose_proxy_logger
|
from litellm._logging import verbose_proxy_logger
|
||||||
from litellm.caching import DualCache
|
from litellm.caching.caching import DualCache
|
||||||
from litellm.integrations.custom_guardrail import CustomGuardrail
|
from litellm.integrations.custom_guardrail import CustomGuardrail
|
||||||
from litellm.proxy._types import UserAPIKeyAuth
|
from litellm.proxy._types import UserAPIKeyAuth
|
||||||
from litellm.proxy.guardrails.guardrail_helpers import should_proceed_based_on_metadata
|
from litellm.proxy.guardrails.guardrail_helpers import should_proceed_based_on_metadata
|
||||||
|
|
|
@ -25,7 +25,7 @@ from fastapi import HTTPException
|
||||||
|
|
||||||
import litellm
|
import litellm
|
||||||
from litellm._logging import verbose_proxy_logger
|
from litellm._logging import verbose_proxy_logger
|
||||||
from litellm.caching import DualCache
|
from litellm.caching.caching import DualCache
|
||||||
from litellm.integrations.custom_guardrail import CustomGuardrail
|
from litellm.integrations.custom_guardrail import CustomGuardrail
|
||||||
from litellm.litellm_core_utils.logging_utils import (
|
from litellm.litellm_core_utils.logging_utils import (
|
||||||
convert_litellm_response_object_to_str,
|
convert_litellm_response_object_to_str,
|
||||||
|
|
|
@ -25,7 +25,7 @@ from fastapi import HTTPException
|
||||||
|
|
||||||
import litellm
|
import litellm
|
||||||
from litellm._logging import verbose_proxy_logger
|
from litellm._logging import verbose_proxy_logger
|
||||||
from litellm.caching import DualCache
|
from litellm.caching.caching import DualCache
|
||||||
from litellm.integrations.custom_guardrail import CustomGuardrail
|
from litellm.integrations.custom_guardrail import CustomGuardrail
|
||||||
from litellm.litellm_core_utils.logging_utils import (
|
from litellm.litellm_core_utils.logging_utils import (
|
||||||
convert_litellm_response_object_to_str,
|
convert_litellm_response_object_to_str,
|
||||||
|
|
|
@ -2,7 +2,7 @@ from typing import Any, Dict, List, Literal, Optional, Union
|
||||||
|
|
||||||
import litellm
|
import litellm
|
||||||
from litellm._logging import verbose_proxy_logger
|
from litellm._logging import verbose_proxy_logger
|
||||||
from litellm.caching import DualCache
|
from litellm.caching.caching import DualCache
|
||||||
from litellm.integrations.custom_guardrail import CustomGuardrail
|
from litellm.integrations.custom_guardrail import CustomGuardrail
|
||||||
from litellm.proxy._types import UserAPIKeyAuth
|
from litellm.proxy._types import UserAPIKeyAuth
|
||||||
from litellm.proxy.guardrails.guardrail_helpers import should_proceed_based_on_metadata
|
from litellm.proxy.guardrails.guardrail_helpers import should_proceed_based_on_metadata
|
||||||
|
|
|
@ -21,7 +21,7 @@ from pydantic import BaseModel
|
||||||
import litellm # noqa: E401
|
import litellm # noqa: E401
|
||||||
from litellm import get_secret
|
from litellm import get_secret
|
||||||
from litellm._logging import verbose_proxy_logger
|
from litellm._logging import verbose_proxy_logger
|
||||||
from litellm.caching import DualCache
|
from litellm.caching.caching import DualCache
|
||||||
from litellm.integrations.custom_guardrail import CustomGuardrail
|
from litellm.integrations.custom_guardrail import CustomGuardrail
|
||||||
from litellm.proxy._types import UserAPIKeyAuth
|
from litellm.proxy._types import UserAPIKeyAuth
|
||||||
from litellm.utils import (
|
from litellm.utils import (
|
||||||
|
|
|
@ -465,7 +465,7 @@ async def health_readiness():
|
||||||
# check Cache
|
# check Cache
|
||||||
cache_type = None
|
cache_type = None
|
||||||
if litellm.cache is not None:
|
if litellm.cache is not None:
|
||||||
from litellm.caching import RedisSemanticCache
|
from litellm.caching.caching import RedisSemanticCache
|
||||||
|
|
||||||
cache_type = litellm.cache.type
|
cache_type = litellm.cache.type
|
||||||
|
|
||||||
|
|
|
@ -7,7 +7,7 @@ from fastapi import HTTPException
|
||||||
|
|
||||||
import litellm
|
import litellm
|
||||||
from litellm._logging import verbose_proxy_logger
|
from litellm._logging import verbose_proxy_logger
|
||||||
from litellm.caching import DualCache
|
from litellm.caching.caching import DualCache
|
||||||
from litellm.integrations.custom_logger import CustomLogger
|
from litellm.integrations.custom_logger import CustomLogger
|
||||||
from litellm.proxy._types import UserAPIKeyAuth
|
from litellm.proxy._types import UserAPIKeyAuth
|
||||||
|
|
||||||
|
|
|
@ -11,7 +11,7 @@ from fastapi import HTTPException
|
||||||
|
|
||||||
import litellm
|
import litellm
|
||||||
from litellm._logging import verbose_proxy_logger
|
from litellm._logging import verbose_proxy_logger
|
||||||
from litellm.caching import DualCache, InMemoryCache, RedisCache
|
from litellm.caching.caching import DualCache, InMemoryCache, RedisCache
|
||||||
from litellm.integrations.custom_logger import CustomLogger
|
from litellm.integrations.custom_logger import CustomLogger
|
||||||
from litellm.proxy._types import UserAPIKeyAuth
|
from litellm.proxy._types import UserAPIKeyAuth
|
||||||
|
|
||||||
|
|
|
@ -7,7 +7,7 @@ from fastapi import HTTPException
|
||||||
|
|
||||||
import litellm
|
import litellm
|
||||||
from litellm import verbose_logger
|
from litellm import verbose_logger
|
||||||
from litellm.caching import DualCache
|
from litellm.caching.caching import DualCache
|
||||||
from litellm.integrations.custom_logger import CustomLogger
|
from litellm.integrations.custom_logger import CustomLogger
|
||||||
from litellm.proxy._types import UserAPIKeyAuth
|
from litellm.proxy._types import UserAPIKeyAuth
|
||||||
|
|
||||||
|
|
|
@ -14,7 +14,7 @@ from fastapi import HTTPException
|
||||||
import litellm
|
import litellm
|
||||||
from litellm import ModelResponse, Router
|
from litellm import ModelResponse, Router
|
||||||
from litellm._logging import verbose_proxy_logger
|
from litellm._logging import verbose_proxy_logger
|
||||||
from litellm.caching import DualCache
|
from litellm.caching.caching import DualCache
|
||||||
from litellm.integrations.custom_logger import CustomLogger
|
from litellm.integrations.custom_logger import CustomLogger
|
||||||
from litellm.proxy._types import UserAPIKeyAuth
|
from litellm.proxy._types import UserAPIKeyAuth
|
||||||
from litellm.types.router import ModelGroupInfo
|
from litellm.types.router import ModelGroupInfo
|
||||||
|
|
|
@ -4,7 +4,7 @@ from fastapi import HTTPException
|
||||||
|
|
||||||
import litellm
|
import litellm
|
||||||
from litellm import verbose_logger
|
from litellm import verbose_logger
|
||||||
from litellm.caching import DualCache
|
from litellm.caching.caching import DualCache
|
||||||
from litellm.integrations.custom_logger import CustomLogger
|
from litellm.integrations.custom_logger import CustomLogger
|
||||||
from litellm.proxy._types import UserAPIKeyAuth
|
from litellm.proxy._types import UserAPIKeyAuth
|
||||||
|
|
||||||
|
|
|
@ -9,7 +9,7 @@ from pydantic import BaseModel
|
||||||
import litellm
|
import litellm
|
||||||
from litellm import ModelResponse
|
from litellm import ModelResponse
|
||||||
from litellm._logging import verbose_proxy_logger
|
from litellm._logging import verbose_proxy_logger
|
||||||
from litellm.caching import DualCache
|
from litellm.caching.caching import DualCache
|
||||||
from litellm.integrations.custom_logger import CustomLogger
|
from litellm.integrations.custom_logger import CustomLogger
|
||||||
from litellm.litellm_core_utils.core_helpers import _get_parent_otel_span_from_kwargs
|
from litellm.litellm_core_utils.core_helpers import _get_parent_otel_span_from_kwargs
|
||||||
from litellm.proxy._types import CurrentItemRateLimit, UserAPIKeyAuth
|
from litellm.proxy._types import CurrentItemRateLimit, UserAPIKeyAuth
|
||||||
|
|
|
@ -19,7 +19,7 @@ from fastapi import HTTPException
|
||||||
|
|
||||||
import litellm # noqa: E401
|
import litellm # noqa: E401
|
||||||
from litellm._logging import verbose_proxy_logger
|
from litellm._logging import verbose_proxy_logger
|
||||||
from litellm.caching import DualCache
|
from litellm.caching.caching import DualCache
|
||||||
from litellm.integrations.custom_logger import CustomLogger
|
from litellm.integrations.custom_logger import CustomLogger
|
||||||
from litellm.proxy._types import UserAPIKeyAuth
|
from litellm.proxy._types import UserAPIKeyAuth
|
||||||
from litellm.utils import (
|
from litellm.utils import (
|
||||||
|
|
|
@ -18,7 +18,7 @@ from typing_extensions import overload
|
||||||
|
|
||||||
import litellm
|
import litellm
|
||||||
from litellm._logging import verbose_proxy_logger
|
from litellm._logging import verbose_proxy_logger
|
||||||
from litellm.caching import DualCache
|
from litellm.caching.caching import DualCache
|
||||||
from litellm.integrations.custom_logger import CustomLogger
|
from litellm.integrations.custom_logger import CustomLogger
|
||||||
from litellm.llms.prompt_templates.factory import prompt_injection_detection_default_pt
|
from litellm.llms.prompt_templates.factory import prompt_injection_detection_default_pt
|
||||||
from litellm.proxy._types import LiteLLMPromptInjectionParams, UserAPIKeyAuth
|
from litellm.proxy._types import LiteLLMPromptInjectionParams, UserAPIKeyAuth
|
||||||
|
|
|
@ -112,7 +112,7 @@ from litellm import (
|
||||||
RetrieveBatchRequest,
|
RetrieveBatchRequest,
|
||||||
)
|
)
|
||||||
from litellm._logging import verbose_proxy_logger, verbose_router_logger
|
from litellm._logging import verbose_proxy_logger, verbose_router_logger
|
||||||
from litellm.caching import DualCache, RedisCache
|
from litellm.caching.caching import DualCache, RedisCache
|
||||||
from litellm.exceptions import RejectedRequestError
|
from litellm.exceptions import RejectedRequestError
|
||||||
from litellm.integrations.SlackAlerting.slack_alerting import SlackAlerting
|
from litellm.integrations.SlackAlerting.slack_alerting import SlackAlerting
|
||||||
from litellm.litellm_core_utils.core_helpers import (
|
from litellm.litellm_core_utils.core_helpers import (
|
||||||
|
@ -1554,7 +1554,7 @@ class ProxyConfig:
|
||||||
for key, value in litellm_settings.items():
|
for key, value in litellm_settings.items():
|
||||||
if key == "cache" and value is True:
|
if key == "cache" and value is True:
|
||||||
print(f"{blue_color_code}\nSetting Cache on Proxy") # noqa
|
print(f"{blue_color_code}\nSetting Cache on Proxy") # noqa
|
||||||
from litellm.caching import Cache
|
from litellm.caching.caching import Cache
|
||||||
|
|
||||||
cache_params = {}
|
cache_params = {}
|
||||||
if "cache_params" in litellm_settings:
|
if "cache_params" in litellm_settings:
|
||||||
|
|
|
@ -49,7 +49,7 @@ from litellm import (
|
||||||
)
|
)
|
||||||
from litellm._logging import verbose_proxy_logger
|
from litellm._logging import verbose_proxy_logger
|
||||||
from litellm._service_logger import ServiceLogging, ServiceTypes
|
from litellm._service_logger import ServiceLogging, ServiceTypes
|
||||||
from litellm.caching import DualCache, RedisCache
|
from litellm.caching.caching import DualCache, RedisCache
|
||||||
from litellm.exceptions import RejectedRequestError
|
from litellm.exceptions import RejectedRequestError
|
||||||
from litellm.integrations.custom_guardrail import CustomGuardrail
|
from litellm.integrations.custom_guardrail import CustomGuardrail
|
||||||
from litellm.integrations.custom_logger import CustomLogger
|
from litellm.integrations.custom_logger import CustomLogger
|
||||||
|
|
|
@ -37,7 +37,7 @@ import litellm.litellm_core_utils.exception_mapping_utils
|
||||||
from litellm import get_secret_str
|
from litellm import get_secret_str
|
||||||
from litellm._logging import verbose_router_logger
|
from litellm._logging import verbose_router_logger
|
||||||
from litellm.assistants.main import AssistantDeleted
|
from litellm.assistants.main import AssistantDeleted
|
||||||
from litellm.caching import DualCache, InMemoryCache, RedisCache
|
from litellm.caching.caching import DualCache, InMemoryCache, RedisCache
|
||||||
from litellm.integrations.custom_logger import CustomLogger
|
from litellm.integrations.custom_logger import CustomLogger
|
||||||
from litellm.litellm_core_utils.litellm_logging import Logging as LiteLLMLogging
|
from litellm.litellm_core_utils.litellm_logging import Logging as LiteLLMLogging
|
||||||
from litellm.llms.AzureOpenAI.azure import get_azure_ad_token_from_oidc
|
from litellm.llms.AzureOpenAI.azure import get_azure_ad_token_from_oidc
|
||||||
|
|
|
@ -14,7 +14,7 @@ from typing import Optional
|
||||||
import dotenv # type: ignore
|
import dotenv # type: ignore
|
||||||
import requests
|
import requests
|
||||||
|
|
||||||
from litellm.caching import DualCache
|
from litellm.caching.caching import DualCache
|
||||||
from litellm.integrations.custom_logger import CustomLogger
|
from litellm.integrations.custom_logger import CustomLogger
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -9,7 +9,7 @@ from pydantic import BaseModel
|
||||||
import litellm
|
import litellm
|
||||||
from litellm import ModelResponse, token_counter, verbose_logger
|
from litellm import ModelResponse, token_counter, verbose_logger
|
||||||
from litellm._logging import verbose_router_logger
|
from litellm._logging import verbose_router_logger
|
||||||
from litellm.caching import DualCache
|
from litellm.caching.caching import DualCache
|
||||||
from litellm.integrations.custom_logger import CustomLogger
|
from litellm.integrations.custom_logger import CustomLogger
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -9,7 +9,7 @@ from pydantic import BaseModel
|
||||||
|
|
||||||
import litellm
|
import litellm
|
||||||
from litellm import ModelResponse, token_counter, verbose_logger
|
from litellm import ModelResponse, token_counter, verbose_logger
|
||||||
from litellm.caching import DualCache
|
from litellm.caching.caching import DualCache
|
||||||
from litellm.integrations.custom_logger import CustomLogger
|
from litellm.integrations.custom_logger import CustomLogger
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -12,7 +12,7 @@ from pydantic import BaseModel
|
||||||
|
|
||||||
from litellm import token_counter
|
from litellm import token_counter
|
||||||
from litellm._logging import verbose_router_logger
|
from litellm._logging import verbose_router_logger
|
||||||
from litellm.caching import DualCache
|
from litellm.caching.caching import DualCache
|
||||||
from litellm.integrations.custom_logger import CustomLogger
|
from litellm.integrations.custom_logger import CustomLogger
|
||||||
from litellm.utils import print_verbose
|
from litellm.utils import print_verbose
|
||||||
|
|
||||||
|
|
|
@ -10,7 +10,7 @@ from pydantic import BaseModel
|
||||||
import litellm
|
import litellm
|
||||||
from litellm import token_counter
|
from litellm import token_counter
|
||||||
from litellm._logging import verbose_logger, verbose_router_logger
|
from litellm._logging import verbose_logger, verbose_router_logger
|
||||||
from litellm.caching import DualCache
|
from litellm.caching.caching import DualCache
|
||||||
from litellm.integrations.custom_logger import CustomLogger
|
from litellm.integrations.custom_logger import CustomLogger
|
||||||
from litellm.types.router import RouterErrors
|
from litellm.types.router import RouterErrors
|
||||||
from litellm.utils import get_utc_datetime, print_verbose
|
from litellm.utils import get_utc_datetime, print_verbose
|
||||||
|
|
|
@ -7,7 +7,7 @@ import time
|
||||||
from typing import List, Optional, Tuple, TypedDict
|
from typing import List, Optional, Tuple, TypedDict
|
||||||
|
|
||||||
from litellm import verbose_logger
|
from litellm import verbose_logger
|
||||||
from litellm.caching import DualCache
|
from litellm.caching.caching import DualCache
|
||||||
|
|
||||||
|
|
||||||
class CooldownCacheValue(TypedDict):
|
class CooldownCacheValue(TypedDict):
|
||||||
|
|
|
@ -1,9 +1,11 @@
|
||||||
import heapq
|
|
||||||
from pydantic import BaseModel
|
|
||||||
from typing import Optional
|
|
||||||
import enum
|
import enum
|
||||||
from litellm.caching import DualCache, RedisCache
|
import heapq
|
||||||
|
from typing import Optional
|
||||||
|
|
||||||
|
from pydantic import BaseModel
|
||||||
|
|
||||||
from litellm import print_verbose
|
from litellm import print_verbose
|
||||||
|
from litellm.caching.caching import DualCache, RedisCache
|
||||||
|
|
||||||
|
|
||||||
class SchedulerCacheKeys(enum.Enum):
|
class SchedulerCacheKeys(enum.Enum):
|
||||||
|
|
|
@ -4,7 +4,7 @@ from typing import Optional
|
||||||
|
|
||||||
import litellm
|
import litellm
|
||||||
from litellm._logging import verbose_logger
|
from litellm._logging import verbose_logger
|
||||||
from litellm.caching import InMemoryCache
|
from litellm.caching.caching import InMemoryCache
|
||||||
from litellm.integrations.gcs_bucket.gcs_bucket_base import GCSBucketBase
|
from litellm.integrations.gcs_bucket.gcs_bucket_base import GCSBucketBase
|
||||||
from litellm.llms.custom_httpx.http_handler import _get_httpx_client
|
from litellm.llms.custom_httpx.http_handler import _get_httpx_client
|
||||||
from litellm.proxy._types import CommonProxyErrors, KeyManagementSystem
|
from litellm.proxy._types import CommonProxyErrors, KeyManagementSystem
|
||||||
|
|
|
@ -12,7 +12,7 @@ from dotenv import load_dotenv
|
||||||
|
|
||||||
import litellm
|
import litellm
|
||||||
from litellm._logging import print_verbose, verbose_logger
|
from litellm._logging import print_verbose, verbose_logger
|
||||||
from litellm.caching import DualCache
|
from litellm.caching.caching import DualCache
|
||||||
from litellm.llms.custom_httpx.http_handler import AsyncHTTPHandler, HTTPHandler
|
from litellm.llms.custom_httpx.http_handler import AsyncHTTPHandler, HTTPHandler
|
||||||
from litellm.proxy._types import KeyManagementSystem
|
from litellm.proxy._types import KeyManagementSystem
|
||||||
|
|
||||||
|
|
397
litellm/utils.py
397
litellm/utils.py
|
@ -56,7 +56,10 @@ import litellm._service_logger # for storing API inputs, outputs, and metadata
|
||||||
import litellm.litellm_core_utils
|
import litellm.litellm_core_utils
|
||||||
import litellm.litellm_core_utils.audio_utils.utils
|
import litellm.litellm_core_utils.audio_utils.utils
|
||||||
import litellm.litellm_core_utils.json_validation_rule
|
import litellm.litellm_core_utils.json_validation_rule
|
||||||
from litellm.caching import DualCache
|
from litellm.caching.caching import DualCache
|
||||||
|
from litellm.caching.caching_handler import CachingHandlerResponse, LLMCachingHandler
|
||||||
|
|
||||||
|
_llm_caching_handler = LLMCachingHandler()
|
||||||
from litellm.integrations.custom_logger import CustomLogger
|
from litellm.integrations.custom_logger import CustomLogger
|
||||||
from litellm.litellm_core_utils.core_helpers import map_finish_reason
|
from litellm.litellm_core_utils.core_helpers import map_finish_reason
|
||||||
from litellm.litellm_core_utils.exception_mapping_utils import (
|
from litellm.litellm_core_utils.exception_mapping_utils import (
|
||||||
|
@ -146,7 +149,13 @@ from typing import (
|
||||||
from openai import OpenAIError as OriginalError
|
from openai import OpenAIError as OriginalError
|
||||||
|
|
||||||
from ._logging import verbose_logger
|
from ._logging import verbose_logger
|
||||||
from .caching import Cache, QdrantSemanticCache, RedisCache, RedisSemanticCache, S3Cache
|
from .caching.caching import (
|
||||||
|
Cache,
|
||||||
|
QdrantSemanticCache,
|
||||||
|
RedisCache,
|
||||||
|
RedisSemanticCache,
|
||||||
|
S3Cache,
|
||||||
|
)
|
||||||
from .exceptions import (
|
from .exceptions import (
|
||||||
APIConnectionError,
|
APIConnectionError,
|
||||||
APIError,
|
APIError,
|
||||||
|
@ -1121,299 +1130,26 @@ def client(original_function):
|
||||||
print_verbose(
|
print_verbose(
|
||||||
f"ASYNC kwargs[caching]: {kwargs.get('caching', False)}; litellm.cache: {litellm.cache}; kwargs.get('cache'): {kwargs.get('cache', None)}"
|
f"ASYNC kwargs[caching]: {kwargs.get('caching', False)}; litellm.cache: {litellm.cache}; kwargs.get('cache'): {kwargs.get('cache', None)}"
|
||||||
)
|
)
|
||||||
# if caching is false, don't run this
|
_caching_handler_response: CachingHandlerResponse = (
|
||||||
final_embedding_cached_response = None
|
await _llm_caching_handler._async_get_cache(
|
||||||
|
model=model,
|
||||||
|
original_function=original_function,
|
||||||
|
logging_obj=logging_obj,
|
||||||
|
start_time=start_time,
|
||||||
|
call_type=call_type,
|
||||||
|
kwargs=kwargs,
|
||||||
|
args=args,
|
||||||
|
)
|
||||||
|
)
|
||||||
if (
|
if (
|
||||||
(kwargs.get("caching", None) is None and litellm.cache is not None)
|
_caching_handler_response.cached_result is not None
|
||||||
or kwargs.get("caching", False) is True
|
and _caching_handler_response.final_embedding_cached_response is None
|
||||||
) and (
|
):
|
||||||
kwargs.get("cache", {}).get("no-cache", False) is not True
|
return _caching_handler_response.cached_result
|
||||||
): # allow users to control returning cached responses from the completion function
|
|
||||||
# checking cache
|
elif _caching_handler_response.embedding_all_elements_cache_hit is True:
|
||||||
print_verbose("INSIDE CHECKING CACHE")
|
return _caching_handler_response.final_embedding_cached_response
|
||||||
if (
|
|
||||||
litellm.cache is not None
|
|
||||||
and litellm.cache.supported_call_types is not None
|
|
||||||
and str(original_function.__name__)
|
|
||||||
in litellm.cache.supported_call_types
|
|
||||||
):
|
|
||||||
print_verbose("Checking Cache")
|
|
||||||
if call_type == CallTypes.aembedding.value and isinstance(
|
|
||||||
kwargs["input"], list
|
|
||||||
):
|
|
||||||
tasks = []
|
|
||||||
for idx, i in enumerate(kwargs["input"]):
|
|
||||||
preset_cache_key = litellm.cache.get_cache_key(
|
|
||||||
*args, **{**kwargs, "input": i}
|
|
||||||
)
|
|
||||||
tasks.append(
|
|
||||||
litellm.cache.async_get_cache(
|
|
||||||
cache_key=preset_cache_key
|
|
||||||
)
|
|
||||||
)
|
|
||||||
cached_result = await asyncio.gather(*tasks)
|
|
||||||
## check if cached result is None ##
|
|
||||||
if cached_result is not None and isinstance(
|
|
||||||
cached_result, list
|
|
||||||
):
|
|
||||||
if len(cached_result) == 1 and cached_result[0] is None:
|
|
||||||
cached_result = None
|
|
||||||
elif isinstance(
|
|
||||||
litellm.cache.cache, RedisSemanticCache
|
|
||||||
) or isinstance(litellm.cache.cache, RedisCache):
|
|
||||||
preset_cache_key = litellm.cache.get_cache_key(*args, **kwargs)
|
|
||||||
kwargs["preset_cache_key"] = (
|
|
||||||
preset_cache_key # for streaming calls, we need to pass the preset_cache_key
|
|
||||||
)
|
|
||||||
cached_result = await litellm.cache.async_get_cache(
|
|
||||||
*args, **kwargs
|
|
||||||
)
|
|
||||||
elif isinstance(litellm.cache.cache, QdrantSemanticCache):
|
|
||||||
preset_cache_key = litellm.cache.get_cache_key(*args, **kwargs)
|
|
||||||
kwargs["preset_cache_key"] = (
|
|
||||||
preset_cache_key # for streaming calls, we need to pass the preset_cache_key
|
|
||||||
)
|
|
||||||
cached_result = await litellm.cache.async_get_cache(
|
|
||||||
*args, **kwargs
|
|
||||||
)
|
|
||||||
else: # for s3 caching. [NOT RECOMMENDED IN PROD - this will slow down responses since boto3 is sync]
|
|
||||||
preset_cache_key = litellm.cache.get_cache_key(*args, **kwargs)
|
|
||||||
kwargs["preset_cache_key"] = (
|
|
||||||
preset_cache_key # for streaming calls, we need to pass the preset_cache_key
|
|
||||||
)
|
|
||||||
cached_result = litellm.cache.get_cache(*args, **kwargs)
|
|
||||||
if cached_result is not None and not isinstance(
|
|
||||||
cached_result, list
|
|
||||||
):
|
|
||||||
print_verbose("Cache Hit!", log_level="INFO")
|
|
||||||
cache_hit = True
|
|
||||||
end_time = datetime.datetime.now()
|
|
||||||
(
|
|
||||||
model,
|
|
||||||
custom_llm_provider,
|
|
||||||
dynamic_api_key,
|
|
||||||
api_base,
|
|
||||||
) = litellm.get_llm_provider(
|
|
||||||
model=model,
|
|
||||||
custom_llm_provider=kwargs.get("custom_llm_provider", None),
|
|
||||||
api_base=kwargs.get("api_base", None),
|
|
||||||
api_key=kwargs.get("api_key", None),
|
|
||||||
)
|
|
||||||
print_verbose(
|
|
||||||
f"Async Wrapper: Completed Call, calling async_success_handler: {logging_obj.async_success_handler}"
|
|
||||||
)
|
|
||||||
logging_obj.update_environment_variables(
|
|
||||||
model=model,
|
|
||||||
user=kwargs.get("user", None),
|
|
||||||
optional_params={},
|
|
||||||
litellm_params={
|
|
||||||
"logger_fn": kwargs.get("logger_fn", None),
|
|
||||||
"acompletion": True,
|
|
||||||
"metadata": kwargs.get("metadata", {}),
|
|
||||||
"model_info": kwargs.get("model_info", {}),
|
|
||||||
"proxy_server_request": kwargs.get(
|
|
||||||
"proxy_server_request", None
|
|
||||||
),
|
|
||||||
"preset_cache_key": kwargs.get(
|
|
||||||
"preset_cache_key", None
|
|
||||||
),
|
|
||||||
"stream_response": kwargs.get("stream_response", {}),
|
|
||||||
"api_base": kwargs.get("api_base", ""),
|
|
||||||
},
|
|
||||||
input=kwargs.get("messages", ""),
|
|
||||||
api_key=kwargs.get("api_key", None),
|
|
||||||
original_response=str(cached_result),
|
|
||||||
additional_args=None,
|
|
||||||
stream=kwargs.get("stream", False),
|
|
||||||
)
|
|
||||||
call_type = original_function.__name__
|
|
||||||
if call_type == CallTypes.acompletion.value and isinstance(
|
|
||||||
cached_result, dict
|
|
||||||
):
|
|
||||||
if kwargs.get("stream", False) is True:
|
|
||||||
cached_result = convert_to_streaming_response_async(
|
|
||||||
response_object=cached_result,
|
|
||||||
)
|
|
||||||
cached_result = CustomStreamWrapper(
|
|
||||||
completion_stream=cached_result,
|
|
||||||
model=model,
|
|
||||||
custom_llm_provider="cached_response",
|
|
||||||
logging_obj=logging_obj,
|
|
||||||
)
|
|
||||||
else:
|
|
||||||
cached_result = convert_to_model_response_object(
|
|
||||||
response_object=cached_result,
|
|
||||||
model_response_object=ModelResponse(),
|
|
||||||
)
|
|
||||||
if (
|
|
||||||
call_type == CallTypes.atext_completion.value
|
|
||||||
and isinstance(cached_result, dict)
|
|
||||||
):
|
|
||||||
if kwargs.get("stream", False) is True:
|
|
||||||
cached_result = convert_to_streaming_response_async(
|
|
||||||
response_object=cached_result,
|
|
||||||
)
|
|
||||||
cached_result = CustomStreamWrapper(
|
|
||||||
completion_stream=cached_result,
|
|
||||||
model=model,
|
|
||||||
custom_llm_provider="cached_response",
|
|
||||||
logging_obj=logging_obj,
|
|
||||||
)
|
|
||||||
else:
|
|
||||||
cached_result = TextCompletionResponse(**cached_result)
|
|
||||||
elif call_type == CallTypes.aembedding.value and isinstance(
|
|
||||||
cached_result, dict
|
|
||||||
):
|
|
||||||
cached_result = convert_to_model_response_object(
|
|
||||||
response_object=cached_result,
|
|
||||||
model_response_object=EmbeddingResponse(),
|
|
||||||
response_type="embedding",
|
|
||||||
)
|
|
||||||
elif call_type == CallTypes.arerank.value and isinstance(
|
|
||||||
cached_result, dict
|
|
||||||
):
|
|
||||||
cached_result = convert_to_model_response_object(
|
|
||||||
response_object=cached_result,
|
|
||||||
model_response_object=None,
|
|
||||||
response_type="rerank",
|
|
||||||
)
|
|
||||||
elif call_type == CallTypes.atranscription.value and isinstance(
|
|
||||||
cached_result, dict
|
|
||||||
):
|
|
||||||
hidden_params = {
|
|
||||||
"model": "whisper-1",
|
|
||||||
"custom_llm_provider": custom_llm_provider,
|
|
||||||
"cache_hit": True,
|
|
||||||
}
|
|
||||||
cached_result = convert_to_model_response_object(
|
|
||||||
response_object=cached_result,
|
|
||||||
model_response_object=TranscriptionResponse(),
|
|
||||||
response_type="audio_transcription",
|
|
||||||
hidden_params=hidden_params,
|
|
||||||
)
|
|
||||||
if kwargs.get("stream", False) is False:
|
|
||||||
# LOG SUCCESS
|
|
||||||
asyncio.create_task(
|
|
||||||
logging_obj.async_success_handler(
|
|
||||||
cached_result, start_time, end_time, cache_hit
|
|
||||||
)
|
|
||||||
)
|
|
||||||
threading.Thread(
|
|
||||||
target=logging_obj.success_handler,
|
|
||||||
args=(cached_result, start_time, end_time, cache_hit),
|
|
||||||
).start()
|
|
||||||
cache_key = kwargs.get("preset_cache_key", None)
|
|
||||||
if (
|
|
||||||
isinstance(cached_result, BaseModel)
|
|
||||||
or isinstance(cached_result, CustomStreamWrapper)
|
|
||||||
) and hasattr(cached_result, "_hidden_params"):
|
|
||||||
cached_result._hidden_params["cache_key"] = cache_key # type: ignore
|
|
||||||
return cached_result
|
|
||||||
elif (
|
|
||||||
call_type == CallTypes.aembedding.value
|
|
||||||
and cached_result is not None
|
|
||||||
and isinstance(cached_result, list)
|
|
||||||
and litellm.cache is not None
|
|
||||||
and not isinstance(
|
|
||||||
litellm.cache.cache, S3Cache
|
|
||||||
) # s3 doesn't support bulk writing. Exclude.
|
|
||||||
):
|
|
||||||
remaining_list = []
|
|
||||||
non_null_list = []
|
|
||||||
for idx, cr in enumerate(cached_result):
|
|
||||||
if cr is None:
|
|
||||||
remaining_list.append(kwargs["input"][idx])
|
|
||||||
else:
|
|
||||||
non_null_list.append((idx, cr))
|
|
||||||
original_kwargs_input = kwargs["input"]
|
|
||||||
kwargs["input"] = remaining_list
|
|
||||||
if len(non_null_list) > 0:
|
|
||||||
print_verbose(
|
|
||||||
f"EMBEDDING CACHE HIT! - {len(non_null_list)}"
|
|
||||||
)
|
|
||||||
final_embedding_cached_response = EmbeddingResponse(
|
|
||||||
model=kwargs.get("model"),
|
|
||||||
data=[None] * len(original_kwargs_input),
|
|
||||||
)
|
|
||||||
final_embedding_cached_response._hidden_params[
|
|
||||||
"cache_hit"
|
|
||||||
] = True
|
|
||||||
|
|
||||||
for val in non_null_list:
|
|
||||||
idx, cr = val # (idx, cr) tuple
|
|
||||||
if cr is not None:
|
|
||||||
final_embedding_cached_response.data[idx] = (
|
|
||||||
Embedding(
|
|
||||||
embedding=cr["embedding"],
|
|
||||||
index=idx,
|
|
||||||
object="embedding",
|
|
||||||
)
|
|
||||||
)
|
|
||||||
if len(remaining_list) == 0:
|
|
||||||
# LOG SUCCESS
|
|
||||||
cache_hit = True
|
|
||||||
end_time = datetime.datetime.now()
|
|
||||||
(
|
|
||||||
model,
|
|
||||||
custom_llm_provider,
|
|
||||||
dynamic_api_key,
|
|
||||||
api_base,
|
|
||||||
) = litellm.get_llm_provider(
|
|
||||||
model=model,
|
|
||||||
custom_llm_provider=kwargs.get(
|
|
||||||
"custom_llm_provider", None
|
|
||||||
),
|
|
||||||
api_base=kwargs.get("api_base", None),
|
|
||||||
api_key=kwargs.get("api_key", None),
|
|
||||||
)
|
|
||||||
print_verbose(
|
|
||||||
f"Async Wrapper: Completed Call, calling async_success_handler: {logging_obj.async_success_handler}"
|
|
||||||
)
|
|
||||||
logging_obj.update_environment_variables(
|
|
||||||
model=model,
|
|
||||||
user=kwargs.get("user", None),
|
|
||||||
optional_params={},
|
|
||||||
litellm_params={
|
|
||||||
"logger_fn": kwargs.get("logger_fn", None),
|
|
||||||
"acompletion": True,
|
|
||||||
"metadata": kwargs.get("metadata", {}),
|
|
||||||
"model_info": kwargs.get("model_info", {}),
|
|
||||||
"proxy_server_request": kwargs.get(
|
|
||||||
"proxy_server_request", None
|
|
||||||
),
|
|
||||||
"preset_cache_key": kwargs.get(
|
|
||||||
"preset_cache_key", None
|
|
||||||
),
|
|
||||||
"stream_response": kwargs.get(
|
|
||||||
"stream_response", {}
|
|
||||||
),
|
|
||||||
"api_base": "",
|
|
||||||
},
|
|
||||||
input=kwargs.get("messages", ""),
|
|
||||||
api_key=kwargs.get("api_key", None),
|
|
||||||
original_response=str(final_embedding_cached_response),
|
|
||||||
additional_args=None,
|
|
||||||
stream=kwargs.get("stream", False),
|
|
||||||
)
|
|
||||||
asyncio.create_task(
|
|
||||||
logging_obj.async_success_handler(
|
|
||||||
final_embedding_cached_response,
|
|
||||||
start_time,
|
|
||||||
end_time,
|
|
||||||
cache_hit,
|
|
||||||
)
|
|
||||||
)
|
|
||||||
threading.Thread(
|
|
||||||
target=logging_obj.success_handler,
|
|
||||||
args=(
|
|
||||||
final_embedding_cached_response,
|
|
||||||
start_time,
|
|
||||||
end_time,
|
|
||||||
cache_hit,
|
|
||||||
),
|
|
||||||
).start()
|
|
||||||
return final_embedding_cached_response
|
|
||||||
# MODEL CALL
|
# MODEL CALL
|
||||||
result = await original_function(*args, **kwargs)
|
result = await original_function(*args, **kwargs)
|
||||||
end_time = datetime.datetime.now()
|
end_time = datetime.datetime.now()
|
||||||
|
@ -1467,51 +1203,14 @@ def client(original_function):
|
||||||
original_response=result, model=model, optional_params=kwargs
|
original_response=result, model=model, optional_params=kwargs
|
||||||
)
|
)
|
||||||
|
|
||||||
# [OPTIONAL] ADD TO CACHE
|
## Add response to cache
|
||||||
if (
|
await _llm_caching_handler._async_set_cache(
|
||||||
(litellm.cache is not None)
|
result=result,
|
||||||
and litellm.cache.supported_call_types is not None
|
original_function=original_function,
|
||||||
and (
|
kwargs=kwargs,
|
||||||
str(original_function.__name__)
|
args=args,
|
||||||
in litellm.cache.supported_call_types
|
)
|
||||||
)
|
|
||||||
and (kwargs.get("cache", {}).get("no-store", False) is not True)
|
|
||||||
):
|
|
||||||
if (
|
|
||||||
isinstance(result, litellm.ModelResponse)
|
|
||||||
or isinstance(result, litellm.EmbeddingResponse)
|
|
||||||
or isinstance(result, TranscriptionResponse)
|
|
||||||
or isinstance(result, RerankResponse)
|
|
||||||
):
|
|
||||||
if (
|
|
||||||
isinstance(result, EmbeddingResponse)
|
|
||||||
and isinstance(kwargs["input"], list)
|
|
||||||
and litellm.cache is not None
|
|
||||||
and not isinstance(
|
|
||||||
litellm.cache.cache, S3Cache
|
|
||||||
) # s3 doesn't support bulk writing. Exclude.
|
|
||||||
):
|
|
||||||
asyncio.create_task(
|
|
||||||
litellm.cache.async_add_cache_pipeline(
|
|
||||||
result, *args, **kwargs
|
|
||||||
)
|
|
||||||
)
|
|
||||||
elif isinstance(litellm.cache.cache, S3Cache):
|
|
||||||
threading.Thread(
|
|
||||||
target=litellm.cache.add_cache,
|
|
||||||
args=(result,) + args,
|
|
||||||
kwargs=kwargs,
|
|
||||||
).start()
|
|
||||||
else:
|
|
||||||
asyncio.create_task(
|
|
||||||
litellm.cache.async_add_cache(
|
|
||||||
result.json(), *args, **kwargs
|
|
||||||
)
|
|
||||||
)
|
|
||||||
else:
|
|
||||||
asyncio.create_task(
|
|
||||||
litellm.cache.async_add_cache(result, *args, **kwargs)
|
|
||||||
)
|
|
||||||
# LOG SUCCESS - handle streaming success logging in the _next_ object
|
# LOG SUCCESS - handle streaming success logging in the _next_ object
|
||||||
print_verbose(
|
print_verbose(
|
||||||
f"Async Wrapper: Completed Call, calling async_success_handler: {logging_obj.async_success_handler}"
|
f"Async Wrapper: Completed Call, calling async_success_handler: {logging_obj.async_success_handler}"
|
||||||
|
@ -1528,24 +1227,32 @@ def client(original_function):
|
||||||
# REBUILD EMBEDDING CACHING
|
# REBUILD EMBEDDING CACHING
|
||||||
if (
|
if (
|
||||||
isinstance(result, EmbeddingResponse)
|
isinstance(result, EmbeddingResponse)
|
||||||
and final_embedding_cached_response is not None
|
and _caching_handler_response.final_embedding_cached_response
|
||||||
and final_embedding_cached_response.data is not None
|
is not None
|
||||||
|
and _caching_handler_response.final_embedding_cached_response.data
|
||||||
|
is not None
|
||||||
):
|
):
|
||||||
idx = 0
|
idx = 0
|
||||||
final_data_list = []
|
final_data_list = []
|
||||||
for item in final_embedding_cached_response.data:
|
for (
|
||||||
|
item
|
||||||
|
) in _caching_handler_response.final_embedding_cached_response.data:
|
||||||
if item is None and result.data is not None:
|
if item is None and result.data is not None:
|
||||||
final_data_list.append(result.data[idx])
|
final_data_list.append(result.data[idx])
|
||||||
idx += 1
|
idx += 1
|
||||||
else:
|
else:
|
||||||
final_data_list.append(item)
|
final_data_list.append(item)
|
||||||
|
|
||||||
final_embedding_cached_response.data = final_data_list
|
_caching_handler_response.final_embedding_cached_response.data = (
|
||||||
final_embedding_cached_response._hidden_params["cache_hit"] = True
|
final_data_list
|
||||||
final_embedding_cached_response._response_ms = (
|
)
|
||||||
|
_caching_handler_response.final_embedding_cached_response._hidden_params[
|
||||||
|
"cache_hit"
|
||||||
|
] = True
|
||||||
|
_caching_handler_response.final_embedding_cached_response._response_ms = (
|
||||||
end_time - start_time
|
end_time - start_time
|
||||||
).total_seconds() * 1000
|
).total_seconds() * 1000
|
||||||
return final_embedding_cached_response
|
return _caching_handler_response.final_embedding_cached_response
|
||||||
|
|
||||||
return result
|
return result
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
|
|
|
@ -19,7 +19,7 @@ from litellm._logging import verbose_proxy_logger
|
||||||
from litellm.proxy.utils import PrismaClient, ProxyLogging
|
from litellm.proxy.utils import PrismaClient, ProxyLogging
|
||||||
|
|
||||||
verbose_proxy_logger.setLevel(level=logging.DEBUG)
|
verbose_proxy_logger.setLevel(level=logging.DEBUG)
|
||||||
from litellm.caching import DualCache
|
from litellm.caching.caching import DualCache
|
||||||
from litellm.router import (
|
from litellm.router import (
|
||||||
Deployment,
|
Deployment,
|
||||||
updateDeployment,
|
updateDeployment,
|
||||||
|
|
|
@ -28,7 +28,7 @@ import pytest
|
||||||
from openai import APIError
|
from openai import APIError
|
||||||
|
|
||||||
import litellm
|
import litellm
|
||||||
from litellm.caching import DualCache, RedisCache
|
from litellm.caching.caching import DualCache, RedisCache
|
||||||
from litellm.integrations.SlackAlerting.slack_alerting import (
|
from litellm.integrations.SlackAlerting.slack_alerting import (
|
||||||
DeploymentMetrics,
|
DeploymentMetrics,
|
||||||
SlackAlerting,
|
SlackAlerting,
|
||||||
|
|
|
@ -13,7 +13,7 @@ sys.path.insert(
|
||||||
) # Adds the parent directory to the system path
|
) # Adds the parent directory to the system path
|
||||||
import pytest, litellm
|
import pytest, litellm
|
||||||
from litellm.proxy.auth.auth_checks import get_end_user_object
|
from litellm.proxy.auth.auth_checks import get_end_user_object
|
||||||
from litellm.caching import DualCache
|
from litellm.caching.caching import DualCache
|
||||||
from litellm.proxy._types import LiteLLM_EndUserTable, LiteLLM_BudgetTable
|
from litellm.proxy._types import LiteLLM_EndUserTable, LiteLLM_BudgetTable
|
||||||
from litellm.proxy.utils import PrismaClient
|
from litellm.proxy.utils import PrismaClient
|
||||||
|
|
||||||
|
|
|
@ -21,7 +21,7 @@ import pytest
|
||||||
|
|
||||||
import litellm
|
import litellm
|
||||||
from litellm import Router, mock_completion
|
from litellm import Router, mock_completion
|
||||||
from litellm.caching import DualCache
|
from litellm.caching.caching import DualCache
|
||||||
from litellm.proxy._types import UserAPIKeyAuth
|
from litellm.proxy._types import UserAPIKeyAuth
|
||||||
from litellm.proxy.utils import ProxyLogging
|
from litellm.proxy.utils import ProxyLogging
|
||||||
|
|
||||||
|
|
|
@ -21,7 +21,7 @@ from litellm.proxy.enterprise.enterprise_hooks.banned_keywords import (
|
||||||
from litellm import Router, mock_completion
|
from litellm import Router, mock_completion
|
||||||
from litellm.proxy.utils import ProxyLogging, hash_token
|
from litellm.proxy.utils import ProxyLogging, hash_token
|
||||||
from litellm.proxy._types import UserAPIKeyAuth
|
from litellm.proxy._types import UserAPIKeyAuth
|
||||||
from litellm.caching import DualCache
|
from litellm.caching.caching import DualCache
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
|
|
|
@ -27,7 +27,7 @@ import pytest
|
||||||
import litellm
|
import litellm
|
||||||
from litellm import Router, mock_completion
|
from litellm import Router, mock_completion
|
||||||
from litellm._logging import verbose_proxy_logger
|
from litellm._logging import verbose_proxy_logger
|
||||||
from litellm.caching import DualCache
|
from litellm.caching.caching import DualCache
|
||||||
from litellm.proxy._types import UserAPIKeyAuth
|
from litellm.proxy._types import UserAPIKeyAuth
|
||||||
from litellm.proxy.enterprise.enterprise_hooks.blocked_user_list import (
|
from litellm.proxy.enterprise.enterprise_hooks.blocked_user_list import (
|
||||||
_ENTERPRISE_BlockedUserList,
|
_ENTERPRISE_BlockedUserList,
|
||||||
|
@ -56,7 +56,7 @@ verbose_proxy_logger.setLevel(level=logging.DEBUG)
|
||||||
|
|
||||||
from starlette.datastructures import URL
|
from starlette.datastructures import URL
|
||||||
|
|
||||||
from litellm.caching import DualCache
|
from litellm.caching.caching import DualCache
|
||||||
from litellm.proxy._types import (
|
from litellm.proxy._types import (
|
||||||
BlockUsers,
|
BlockUsers,
|
||||||
DynamoDBArgs,
|
DynamoDBArgs,
|
||||||
|
|
|
@ -21,7 +21,7 @@ import pytest
|
||||||
|
|
||||||
import litellm
|
import litellm
|
||||||
from litellm import aembedding, completion, embedding
|
from litellm import aembedding, completion, embedding
|
||||||
from litellm.caching import Cache
|
from litellm.caching.caching import Cache
|
||||||
|
|
||||||
from unittest.mock import AsyncMock, patch, MagicMock
|
from unittest.mock import AsyncMock, patch, MagicMock
|
||||||
import datetime
|
import datetime
|
||||||
|
@ -52,7 +52,7 @@ async def test_dual_cache_async_batch_get_cache():
|
||||||
- hit redis for the other -> expect to return None
|
- hit redis for the other -> expect to return None
|
||||||
- expect result = [in_memory_result, None]
|
- expect result = [in_memory_result, None]
|
||||||
"""
|
"""
|
||||||
from litellm.caching import DualCache, InMemoryCache, RedisCache
|
from litellm.caching.caching import DualCache, InMemoryCache, RedisCache
|
||||||
|
|
||||||
in_memory_cache = InMemoryCache()
|
in_memory_cache = InMemoryCache()
|
||||||
redis_cache = RedisCache() # get credentials from environment
|
redis_cache = RedisCache() # get credentials from environment
|
||||||
|
@ -74,7 +74,7 @@ def test_dual_cache_batch_get_cache():
|
||||||
- hit redis for the other -> expect to return None
|
- hit redis for the other -> expect to return None
|
||||||
- expect result = [in_memory_result, None]
|
- expect result = [in_memory_result, None]
|
||||||
"""
|
"""
|
||||||
from litellm.caching import DualCache, InMemoryCache, RedisCache
|
from litellm.caching.caching import DualCache, InMemoryCache, RedisCache
|
||||||
|
|
||||||
in_memory_cache = InMemoryCache()
|
in_memory_cache = InMemoryCache()
|
||||||
redis_cache = RedisCache() # get credentials from environment
|
redis_cache = RedisCache() # get credentials from environment
|
||||||
|
@ -520,6 +520,7 @@ async def test_embedding_caching_azure_individual_items_reordered():
|
||||||
assert embedding_val_1[0]["id"] == embedding_val_2[0]["id"]
|
assert embedding_val_1[0]["id"] == embedding_val_2[0]["id"]
|
||||||
```
|
```
|
||||||
"""
|
"""
|
||||||
|
litellm.set_verbose = True
|
||||||
litellm.cache = Cache()
|
litellm.cache = Cache()
|
||||||
common_msg = f"{uuid.uuid4()}"
|
common_msg = f"{uuid.uuid4()}"
|
||||||
common_msg_2 = f"hey how's it going {uuid.uuid4()}"
|
common_msg_2 = f"hey how's it going {uuid.uuid4()}"
|
||||||
|
@ -532,9 +533,11 @@ async def test_embedding_caching_azure_individual_items_reordered():
|
||||||
embedding_val_1 = await aembedding(
|
embedding_val_1 = await aembedding(
|
||||||
model="azure/azure-embedding-model", input=embedding_1, caching=True
|
model="azure/azure-embedding-model", input=embedding_1, caching=True
|
||||||
)
|
)
|
||||||
|
print("embedding val 1", embedding_val_1)
|
||||||
embedding_val_2 = await aembedding(
|
embedding_val_2 = await aembedding(
|
||||||
model="azure/azure-embedding-model", input=embedding_2, caching=True
|
model="azure/azure-embedding-model", input=embedding_2, caching=True
|
||||||
)
|
)
|
||||||
|
print("embedding val 2", embedding_val_2)
|
||||||
print(f"embedding_val_2._hidden_params: {embedding_val_2._hidden_params}")
|
print(f"embedding_val_2._hidden_params: {embedding_val_2._hidden_params}")
|
||||||
assert embedding_val_2._hidden_params["cache_hit"] == True
|
assert embedding_val_2._hidden_params["cache_hit"] == True
|
||||||
|
|
||||||
|
@ -866,7 +869,7 @@ async def test_redis_cache_cluster_init_unit_test():
|
||||||
from redis.asyncio import RedisCluster as AsyncRedisCluster
|
from redis.asyncio import RedisCluster as AsyncRedisCluster
|
||||||
from redis.cluster import RedisCluster
|
from redis.cluster import RedisCluster
|
||||||
|
|
||||||
from litellm.caching import RedisCache
|
from litellm.caching.caching import RedisCache
|
||||||
|
|
||||||
litellm.set_verbose = True
|
litellm.set_verbose = True
|
||||||
|
|
||||||
|
@ -900,7 +903,7 @@ async def test_redis_cache_cluster_init_with_env_vars_unit_test():
|
||||||
from redis.asyncio import RedisCluster as AsyncRedisCluster
|
from redis.asyncio import RedisCluster as AsyncRedisCluster
|
||||||
from redis.cluster import RedisCluster
|
from redis.cluster import RedisCluster
|
||||||
|
|
||||||
from litellm.caching import RedisCache
|
from litellm.caching.caching import RedisCache
|
||||||
|
|
||||||
litellm.set_verbose = True
|
litellm.set_verbose = True
|
||||||
|
|
||||||
|
@ -1554,7 +1557,7 @@ def test_custom_redis_cache_params():
|
||||||
|
|
||||||
|
|
||||||
def test_get_cache_key():
|
def test_get_cache_key():
|
||||||
from litellm.caching import Cache
|
from litellm.caching.caching import Cache
|
||||||
|
|
||||||
try:
|
try:
|
||||||
print("Testing get_cache_key")
|
print("Testing get_cache_key")
|
||||||
|
@ -1989,7 +1992,7 @@ async def test_cache_default_off_acompletion():
|
||||||
|
|
||||||
verbose_logger.setLevel(logging.DEBUG)
|
verbose_logger.setLevel(logging.DEBUG)
|
||||||
|
|
||||||
from litellm.caching import CacheMode
|
from litellm.caching.caching import CacheMode
|
||||||
|
|
||||||
random_number = random.randint(
|
random_number = random.randint(
|
||||||
1, 100000
|
1, 100000
|
||||||
|
@ -2072,7 +2075,7 @@ async def test_dual_cache_uses_redis():
|
||||||
- Assert that value from redis is used
|
- Assert that value from redis is used
|
||||||
"""
|
"""
|
||||||
litellm.set_verbose = True
|
litellm.set_verbose = True
|
||||||
from litellm.caching import DualCache, RedisCache
|
from litellm.caching.caching import DualCache, RedisCache
|
||||||
|
|
||||||
current_usage = uuid.uuid4()
|
current_usage = uuid.uuid4()
|
||||||
|
|
||||||
|
@ -2095,7 +2098,7 @@ async def test_proxy_logging_setup():
|
||||||
"""
|
"""
|
||||||
Assert always_read_redis is True when used by internal usage cache
|
Assert always_read_redis is True when used by internal usage cache
|
||||||
"""
|
"""
|
||||||
from litellm.caching import DualCache
|
from litellm.caching.caching import DualCache
|
||||||
from litellm.proxy.utils import ProxyLogging
|
from litellm.proxy.utils import ProxyLogging
|
||||||
|
|
||||||
pl_obj = ProxyLogging(user_api_key_cache=DualCache())
|
pl_obj = ProxyLogging(user_api_key_cache=DualCache())
|
||||||
|
@ -2165,7 +2168,7 @@ async def test_redis_proxy_batch_redis_get_cache():
|
||||||
- make 2nd call -> expect hit
|
- make 2nd call -> expect hit
|
||||||
"""
|
"""
|
||||||
|
|
||||||
from litellm.caching import Cache, DualCache
|
from litellm.caching.caching import Cache, DualCache
|
||||||
from litellm.proxy._types import UserAPIKeyAuth
|
from litellm.proxy._types import UserAPIKeyAuth
|
||||||
from litellm.proxy.hooks.batch_redis_get import _PROXY_BatchRedisRequests
|
from litellm.proxy.hooks.batch_redis_get import _PROXY_BatchRedisRequests
|
||||||
|
|
||||||
|
|
|
@ -15,7 +15,7 @@ sys.path.insert(
|
||||||
import pytest
|
import pytest
|
||||||
import litellm
|
import litellm
|
||||||
from litellm import embedding, completion, Router
|
from litellm import embedding, completion, Router
|
||||||
from litellm.caching import Cache
|
from litellm.caching.caching import Cache
|
||||||
|
|
||||||
messages = [{"role": "user", "content": f"who is ishaan {time.time()}"}]
|
messages = [{"role": "user", "content": f"who is ishaan {time.time()}"}]
|
||||||
|
|
||||||
|
|
|
@ -151,7 +151,7 @@ async def test_datadog_log_redis_failures():
|
||||||
Test that poorly configured Redis is logged as Warning on DataDog
|
Test that poorly configured Redis is logged as Warning on DataDog
|
||||||
"""
|
"""
|
||||||
try:
|
try:
|
||||||
from litellm.caching import Cache
|
from litellm.caching.caching import Cache
|
||||||
from litellm.integrations.datadog.datadog import DataDogLogger
|
from litellm.integrations.datadog.datadog import DataDogLogger
|
||||||
|
|
||||||
litellm.cache = Cache(
|
litellm.cache = Cache(
|
||||||
|
|
|
@ -24,7 +24,7 @@ import pytest
|
||||||
from fastapi import Request
|
from fastapi import Request
|
||||||
|
|
||||||
import litellm
|
import litellm
|
||||||
from litellm.caching import DualCache
|
from litellm.caching.caching import DualCache
|
||||||
from litellm.proxy._types import LiteLLM_JWTAuth, LiteLLM_UserTable, LiteLLMRoutes
|
from litellm.proxy._types import LiteLLM_JWTAuth, LiteLLM_UserTable, LiteLLMRoutes
|
||||||
from litellm.proxy.auth.handle_jwt import JWTHandler
|
from litellm.proxy.auth.handle_jwt import JWTHandler
|
||||||
from litellm.proxy.management_endpoints.team_endpoints import new_team
|
from litellm.proxy.management_endpoints.team_endpoints import new_team
|
||||||
|
|
|
@ -89,7 +89,7 @@ verbose_proxy_logger.setLevel(level=logging.DEBUG)
|
||||||
|
|
||||||
from starlette.datastructures import URL
|
from starlette.datastructures import URL
|
||||||
|
|
||||||
from litellm.caching import DualCache
|
from litellm.caching.caching import DualCache
|
||||||
from litellm.proxy._types import (
|
from litellm.proxy._types import (
|
||||||
DynamoDBArgs,
|
DynamoDBArgs,
|
||||||
GenerateKeyRequest,
|
GenerateKeyRequest,
|
||||||
|
@ -1444,7 +1444,7 @@ def test_call_with_key_over_budget(prisma_client):
|
||||||
|
|
||||||
# update spend using track_cost callback, make 2nd request, it should fail
|
# update spend using track_cost callback, make 2nd request, it should fail
|
||||||
from litellm import Choices, Message, ModelResponse, Usage
|
from litellm import Choices, Message, ModelResponse, Usage
|
||||||
from litellm.caching import Cache
|
from litellm.caching.caching import Cache
|
||||||
from litellm.proxy.proxy_server import (
|
from litellm.proxy.proxy_server import (
|
||||||
_PROXY_track_cost_callback as track_cost_callback,
|
_PROXY_track_cost_callback as track_cost_callback,
|
||||||
)
|
)
|
||||||
|
@ -1564,7 +1564,7 @@ def test_call_with_key_over_budget_no_cache(prisma_client):
|
||||||
setattr(litellm.proxy.proxy_server, "proxy_batch_write_at", 1)
|
setattr(litellm.proxy.proxy_server, "proxy_batch_write_at", 1)
|
||||||
|
|
||||||
from litellm import Choices, Message, ModelResponse, Usage
|
from litellm import Choices, Message, ModelResponse, Usage
|
||||||
from litellm.caching import Cache
|
from litellm.caching.caching import Cache
|
||||||
|
|
||||||
litellm.cache = Cache()
|
litellm.cache = Cache()
|
||||||
import time
|
import time
|
||||||
|
@ -1685,7 +1685,7 @@ def test_call_with_key_over_model_budget(prisma_client):
|
||||||
|
|
||||||
# update spend using track_cost callback, make 2nd request, it should fail
|
# update spend using track_cost callback, make 2nd request, it should fail
|
||||||
from litellm import Choices, Message, ModelResponse, Usage
|
from litellm import Choices, Message, ModelResponse, Usage
|
||||||
from litellm.caching import Cache
|
from litellm.caching.caching import Cache
|
||||||
from litellm.proxy.proxy_server import (
|
from litellm.proxy.proxy_server import (
|
||||||
_PROXY_track_cost_callback as track_cost_callback,
|
_PROXY_track_cost_callback as track_cost_callback,
|
||||||
)
|
)
|
||||||
|
|
|
@ -25,7 +25,7 @@ import pytest
|
||||||
|
|
||||||
import litellm
|
import litellm
|
||||||
from litellm._logging import verbose_proxy_logger
|
from litellm._logging import verbose_proxy_logger
|
||||||
from litellm.caching import DualCache
|
from litellm.caching.caching import DualCache
|
||||||
from litellm.proxy._types import UserAPIKeyAuth
|
from litellm.proxy._types import UserAPIKeyAuth
|
||||||
from litellm.proxy.guardrails.guardrail_hooks.lakera_ai import lakeraAI_Moderation
|
from litellm.proxy.guardrails.guardrail_hooks.lakera_ai import lakeraAI_Moderation
|
||||||
from litellm.proxy.proxy_server import embeddings
|
from litellm.proxy.proxy_server import embeddings
|
||||||
|
|
|
@ -20,7 +20,7 @@ import pytest
|
||||||
|
|
||||||
import litellm
|
import litellm
|
||||||
from litellm import Router
|
from litellm import Router
|
||||||
from litellm.caching import DualCache
|
from litellm.caching.caching import DualCache
|
||||||
from litellm.router_strategy.least_busy import LeastBusyLoggingHandler
|
from litellm.router_strategy.least_busy import LeastBusyLoggingHandler
|
||||||
|
|
||||||
### UNIT TESTS FOR LEAST BUSY LOGGING ###
|
### UNIT TESTS FOR LEAST BUSY LOGGING ###
|
||||||
|
|
|
@ -20,7 +20,7 @@ from litellm.proxy.enterprise.enterprise_hooks.llm_guard import _ENTERPRISE_LLMG
|
||||||
from litellm import Router, mock_completion
|
from litellm import Router, mock_completion
|
||||||
from litellm.proxy.utils import ProxyLogging, hash_token
|
from litellm.proxy.utils import ProxyLogging, hash_token
|
||||||
from litellm.proxy._types import UserAPIKeyAuth
|
from litellm.proxy._types import UserAPIKeyAuth
|
||||||
from litellm.caching import DualCache
|
from litellm.caching.caching import DualCache
|
||||||
|
|
||||||
### UNIT TESTS FOR LLM GUARD ###
|
### UNIT TESTS FOR LLM GUARD ###
|
||||||
|
|
||||||
|
|
|
@ -10,7 +10,7 @@
|
||||||
# import asyncio
|
# import asyncio
|
||||||
# from litellm import Router, Timeout
|
# from litellm import Router, Timeout
|
||||||
# import time
|
# import time
|
||||||
# from litellm.caching import Cache
|
# from litellm.caching.caching import Cache
|
||||||
# import litellm
|
# import litellm
|
||||||
|
|
||||||
# litellm.cache = Cache(
|
# litellm.cache = Cache(
|
||||||
|
|
|
@ -15,7 +15,7 @@ sys.path.insert(
|
||||||
import pytest
|
import pytest
|
||||||
from litellm import Router
|
from litellm import Router
|
||||||
from litellm.router_strategy.lowest_cost import LowestCostLoggingHandler
|
from litellm.router_strategy.lowest_cost import LowestCostLoggingHandler
|
||||||
from litellm.caching import DualCache
|
from litellm.caching.caching import DualCache
|
||||||
|
|
||||||
### UNIT TESTS FOR cost ROUTING ###
|
### UNIT TESTS FOR cost ROUTING ###
|
||||||
|
|
||||||
|
|
|
@ -22,7 +22,7 @@ import pytest
|
||||||
|
|
||||||
import litellm
|
import litellm
|
||||||
from litellm import Router
|
from litellm import Router
|
||||||
from litellm.caching import DualCache
|
from litellm.caching.caching import DualCache
|
||||||
from litellm.router_strategy.lowest_latency import LowestLatencyLoggingHandler
|
from litellm.router_strategy.lowest_latency import LowestLatencyLoggingHandler
|
||||||
|
|
||||||
### UNIT TESTS FOR LATENCY ROUTING ###
|
### UNIT TESTS FOR LATENCY ROUTING ###
|
||||||
|
|
|
@ -19,7 +19,7 @@
|
||||||
# from litellm import Router
|
# from litellm import Router
|
||||||
# from litellm.proxy.utils import ProxyLogging, hash_token
|
# from litellm.proxy.utils import ProxyLogging, hash_token
|
||||||
# from litellm.proxy._types import UserAPIKeyAuth
|
# from litellm.proxy._types import UserAPIKeyAuth
|
||||||
# from litellm.caching import DualCache, RedisCache
|
# from litellm.caching.caching import DualCache, RedisCache
|
||||||
# from litellm.proxy.hooks.tpm_rpm_limiter import _PROXY_MaxTPMRPMLimiter
|
# from litellm.proxy.hooks.tpm_rpm_limiter import _PROXY_MaxTPMRPMLimiter
|
||||||
# from datetime import datetime
|
# from datetime import datetime
|
||||||
|
|
||||||
|
|
|
@ -22,7 +22,7 @@ from litellm.proxy.enterprise.enterprise_hooks.openai_moderation import (
|
||||||
from litellm import Router, mock_completion
|
from litellm import Router, mock_completion
|
||||||
from litellm.proxy.utils import ProxyLogging, hash_token
|
from litellm.proxy.utils import ProxyLogging, hash_token
|
||||||
from litellm.proxy._types import UserAPIKeyAuth
|
from litellm.proxy._types import UserAPIKeyAuth
|
||||||
from litellm.caching import DualCache
|
from litellm.caching.caching import DualCache
|
||||||
|
|
||||||
### UNIT TESTS FOR OpenAI Moderation ###
|
### UNIT TESTS FOR OpenAI Moderation ###
|
||||||
|
|
||||||
|
|
|
@ -23,7 +23,7 @@ import pytest
|
||||||
|
|
||||||
import litellm
|
import litellm
|
||||||
from litellm import Router
|
from litellm import Router
|
||||||
from litellm.caching import DualCache
|
from litellm.caching.caching import DualCache
|
||||||
from litellm.proxy._types import UserAPIKeyAuth
|
from litellm.proxy._types import UserAPIKeyAuth
|
||||||
from litellm.proxy.hooks.parallel_request_limiter import (
|
from litellm.proxy.hooks.parallel_request_limiter import (
|
||||||
_PROXY_MaxParallelRequestsHandler as MaxParallelRequestsHandler,
|
_PROXY_MaxParallelRequestsHandler as MaxParallelRequestsHandler,
|
||||||
|
|
|
@ -22,7 +22,7 @@ import pytest
|
||||||
|
|
||||||
import litellm
|
import litellm
|
||||||
from litellm import Router, mock_completion
|
from litellm import Router, mock_completion
|
||||||
from litellm.caching import DualCache
|
from litellm.caching.caching import DualCache
|
||||||
from litellm.proxy._types import UserAPIKeyAuth
|
from litellm.proxy._types import UserAPIKeyAuth
|
||||||
from litellm.proxy.hooks.presidio_pii_masking import _OPTIONAL_PresidioPIIMasking
|
from litellm.proxy.hooks.presidio_pii_masking import _OPTIONAL_PresidioPIIMasking
|
||||||
from litellm.proxy.utils import ProxyLogging
|
from litellm.proxy.utils import ProxyLogging
|
||||||
|
|
|
@ -67,7 +67,7 @@ async def test_completion_with_caching_bad_call():
|
||||||
litellm.set_verbose = True
|
litellm.set_verbose = True
|
||||||
|
|
||||||
try:
|
try:
|
||||||
from litellm.caching import RedisCache
|
from litellm.caching.caching import RedisCache
|
||||||
|
|
||||||
litellm.service_callback = ["prometheus_system"]
|
litellm.service_callback = ["prometheus_system"]
|
||||||
sl = ServiceLogging(mock_testing=True)
|
sl = ServiceLogging(mock_testing=True)
|
||||||
|
|
|
@ -20,7 +20,7 @@ from litellm.proxy.hooks.prompt_injection_detection import (
|
||||||
from litellm import Router, mock_completion
|
from litellm import Router, mock_completion
|
||||||
from litellm.proxy.utils import ProxyLogging
|
from litellm.proxy.utils import ProxyLogging
|
||||||
from litellm.proxy._types import UserAPIKeyAuth, LiteLLMPromptInjectionParams
|
from litellm.proxy._types import UserAPIKeyAuth, LiteLLMPromptInjectionParams
|
||||||
from litellm.caching import DualCache
|
from litellm.caching.caching import DualCache
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
|
|
|
@ -31,7 +31,7 @@ from starlette.datastructures import URL
|
||||||
|
|
||||||
import litellm
|
import litellm
|
||||||
from litellm import Router, mock_completion
|
from litellm import Router, mock_completion
|
||||||
from litellm.caching import DualCache
|
from litellm.caching.caching import DualCache
|
||||||
from litellm.integrations.custom_logger import CustomLogger
|
from litellm.integrations.custom_logger import CustomLogger
|
||||||
from litellm.proxy._types import UserAPIKeyAuth
|
from litellm.proxy._types import UserAPIKeyAuth
|
||||||
from litellm.proxy.enterprise.enterprise_hooks.secret_detection import (
|
from litellm.proxy.enterprise.enterprise_hooks.secret_detection import (
|
||||||
|
|
|
@ -745,7 +745,7 @@ async def test_team_update_redis():
|
||||||
"""
|
"""
|
||||||
Tests if team update, updates the redis cache if set
|
Tests if team update, updates the redis cache if set
|
||||||
"""
|
"""
|
||||||
from litellm.caching import DualCache, RedisCache
|
from litellm.caching.caching import DualCache, RedisCache
|
||||||
from litellm.proxy._types import LiteLLM_TeamTableCachedObj
|
from litellm.proxy._types import LiteLLM_TeamTableCachedObj
|
||||||
from litellm.proxy.auth.auth_checks import _cache_team_object
|
from litellm.proxy.auth.auth_checks import _cache_team_object
|
||||||
|
|
||||||
|
@ -775,7 +775,7 @@ async def test_get_team_redis(client_no_auth):
|
||||||
"""
|
"""
|
||||||
Tests if get_team_object gets value from redis cache, if set
|
Tests if get_team_object gets value from redis cache, if set
|
||||||
"""
|
"""
|
||||||
from litellm.caching import DualCache, RedisCache
|
from litellm.caching.caching import DualCache, RedisCache
|
||||||
from litellm.proxy.auth.auth_checks import get_team_object
|
from litellm.proxy.auth.auth_checks import get_team_object
|
||||||
|
|
||||||
proxy_logging_obj: ProxyLogging = getattr(
|
proxy_logging_obj: ProxyLogging = getattr(
|
||||||
|
|
|
@ -26,7 +26,7 @@ from starlette.datastructures import URL
|
||||||
|
|
||||||
import litellm
|
import litellm
|
||||||
from litellm import Router, mock_completion
|
from litellm import Router, mock_completion
|
||||||
from litellm.caching import DualCache
|
from litellm.caching.caching import DualCache
|
||||||
from litellm.integrations.custom_logger import CustomLogger
|
from litellm.integrations.custom_logger import CustomLogger
|
||||||
from litellm.proxy._types import UserAPIKeyAuth
|
from litellm.proxy._types import UserAPIKeyAuth
|
||||||
from litellm.proxy.enterprise.enterprise_hooks.secret_detection import (
|
from litellm.proxy.enterprise.enterprise_hooks.secret_detection import (
|
||||||
|
|
|
@ -3128,7 +3128,7 @@ async def test_azure_astreaming_and_function_calling():
|
||||||
"content": f"What is the weather like in Boston? {uuid.uuid4()}",
|
"content": f"What is the weather like in Boston? {uuid.uuid4()}",
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
from litellm.caching import Cache
|
from litellm.caching.caching import Cache
|
||||||
|
|
||||||
litellm.cache = Cache(
|
litellm.cache = Cache(
|
||||||
type="redis",
|
type="redis",
|
||||||
|
|
|
@ -23,7 +23,7 @@ import pytest
|
||||||
|
|
||||||
import litellm
|
import litellm
|
||||||
from litellm import Router
|
from litellm import Router
|
||||||
from litellm.caching import DualCache
|
from litellm.caching.caching import DualCache
|
||||||
from litellm.router_strategy.lowest_tpm_rpm_v2 import (
|
from litellm.router_strategy.lowest_tpm_rpm_v2 import (
|
||||||
LowestTPMLoggingHandler_v2 as LowestTPMLoggingHandler,
|
LowestTPMLoggingHandler_v2 as LowestTPMLoggingHandler,
|
||||||
)
|
)
|
||||||
|
|
|
@ -27,7 +27,7 @@ import pytest
|
||||||
import litellm
|
import litellm
|
||||||
from litellm import Router, mock_completion
|
from litellm import Router, mock_completion
|
||||||
from litellm._logging import verbose_proxy_logger
|
from litellm._logging import verbose_proxy_logger
|
||||||
from litellm.caching import DualCache
|
from litellm.caching.caching import DualCache
|
||||||
from litellm.proxy._types import UserAPIKeyAuth
|
from litellm.proxy._types import UserAPIKeyAuth
|
||||||
from litellm.proxy.management_endpoints.internal_user_endpoints import (
|
from litellm.proxy.management_endpoints.internal_user_endpoints import (
|
||||||
new_user,
|
new_user,
|
||||||
|
@ -53,7 +53,7 @@ verbose_proxy_logger.setLevel(level=logging.DEBUG)
|
||||||
|
|
||||||
from starlette.datastructures import URL
|
from starlette.datastructures import URL
|
||||||
|
|
||||||
from litellm.caching import DualCache
|
from litellm.caching.caching import DualCache
|
||||||
from litellm.proxy._types import (
|
from litellm.proxy._types import (
|
||||||
BlockUsers,
|
BlockUsers,
|
||||||
DynamoDBArgs,
|
DynamoDBArgs,
|
||||||
|
|
|
@ -157,7 +157,7 @@ async def test_transcription_on_router():
|
||||||
@pytest.mark.asyncio()
|
@pytest.mark.asyncio()
|
||||||
async def test_transcription_caching():
|
async def test_transcription_caching():
|
||||||
import litellm
|
import litellm
|
||||||
from litellm.caching import Cache
|
from litellm.caching.caching import Cache
|
||||||
|
|
||||||
litellm.set_verbose = True
|
litellm.set_verbose = True
|
||||||
litellm.cache = Cache()
|
litellm.cache = Cache()
|
||||||
|
|
|
@ -71,7 +71,7 @@ verbose_proxy_logger.setLevel(level=logging.DEBUG)
|
||||||
|
|
||||||
from starlette.datastructures import URL
|
from starlette.datastructures import URL
|
||||||
|
|
||||||
from litellm.caching import DualCache
|
from litellm.caching.caching import DualCache
|
||||||
from litellm.proxy._types import (
|
from litellm.proxy._types import (
|
||||||
DynamoDBArgs,
|
DynamoDBArgs,
|
||||||
GenerateKeyRequest,
|
GenerateKeyRequest,
|
||||||
|
|
|
@ -78,7 +78,7 @@ verbose_proxy_logger.setLevel(level=logging.DEBUG)
|
||||||
|
|
||||||
from starlette.datastructures import URL
|
from starlette.datastructures import URL
|
||||||
|
|
||||||
from litellm.caching import DualCache
|
from litellm.caching.caching import DualCache
|
||||||
from litellm.proxy._types import *
|
from litellm.proxy._types import *
|
||||||
|
|
||||||
proxy_logging_obj = ProxyLogging(user_api_key_cache=DualCache())
|
proxy_logging_obj = ProxyLogging(user_api_key_cache=DualCache())
|
||||||
|
|
|
@ -17,7 +17,7 @@ from litellm.proxy._types import LitellmUserRoles
|
||||||
import os
|
import os
|
||||||
import jwt
|
import jwt
|
||||||
import time
|
import time
|
||||||
from litellm.caching import DualCache
|
from litellm.caching.caching import DualCache
|
||||||
|
|
||||||
proxy_logging_obj = ProxyLogging(user_api_key_cache=DualCache())
|
proxy_logging_obj = ProxyLogging(user_api_key_cache=DualCache())
|
||||||
|
|
||||||
|
|
|
@ -85,7 +85,7 @@ verbose_proxy_logger.setLevel(level=logging.DEBUG)
|
||||||
|
|
||||||
from starlette.datastructures import URL
|
from starlette.datastructures import URL
|
||||||
|
|
||||||
from litellm.caching import DualCache
|
from litellm.caching.caching import DualCache
|
||||||
from litellm.proxy._types import (
|
from litellm.proxy._types import (
|
||||||
DynamoDBArgs,
|
DynamoDBArgs,
|
||||||
GenerateKeyRequest,
|
GenerateKeyRequest,
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue