mirror of
https://github.com/BerriAI/litellm.git
synced 2025-04-26 03:04:13 +00:00
[Feat] Improve OTEL Tracking - Require all Redis Cache reads to be logged on OTEL (#5881)
* fix use previous internal usage caching logic * fix test_dual_cache_uses_redis * redis track event_metadata in service logging * show otel error on _get_parent_otel_span_from_kwargs * track parent otel span on internal usage cache * update_request_status * fix internal usage cache * fix linting * fix test internal usage cache * fix linting error * show event metadata in redis set * fix test_get_team_redis * fix test_get_team_redis * test_proxy_logging_setup
This commit is contained in:
parent
4ec4d02474
commit
7cbcf538c6
9 changed files with 243 additions and 79 deletions
|
@ -28,7 +28,7 @@ from litellm.proxy._types import UserAPIKeyAuth
|
|||
from litellm.proxy.hooks.parallel_request_limiter import (
|
||||
_PROXY_MaxParallelRequestsHandler as MaxParallelRequestsHandler,
|
||||
)
|
||||
from litellm.proxy.utils import ProxyLogging, hash_token
|
||||
from litellm.proxy.utils import InternalUsageCache, ProxyLogging, hash_token
|
||||
|
||||
## On Request received
|
||||
## On Request success
|
||||
|
@ -48,7 +48,7 @@ async def test_global_max_parallel_requests():
|
|||
user_api_key_dict = UserAPIKeyAuth(api_key=_api_key, max_parallel_requests=100)
|
||||
local_cache = DualCache()
|
||||
parallel_request_handler = MaxParallelRequestsHandler(
|
||||
internal_usage_cache=local_cache
|
||||
internal_usage_cache=InternalUsageCache(dual_cache=local_cache)
|
||||
)
|
||||
|
||||
for _ in range(3):
|
||||
|
@ -78,7 +78,7 @@ async def test_pre_call_hook():
|
|||
user_api_key_dict = UserAPIKeyAuth(api_key=_api_key, max_parallel_requests=1)
|
||||
local_cache = DualCache()
|
||||
parallel_request_handler = MaxParallelRequestsHandler(
|
||||
internal_usage_cache=local_cache
|
||||
internal_usage_cache=InternalUsageCache(dual_cache=local_cache)
|
||||
)
|
||||
|
||||
await parallel_request_handler.async_pre_call_hook(
|
||||
|
@ -115,7 +115,7 @@ async def test_pre_call_hook_rpm_limits():
|
|||
)
|
||||
local_cache = DualCache()
|
||||
parallel_request_handler = MaxParallelRequestsHandler(
|
||||
internal_usage_cache=local_cache
|
||||
internal_usage_cache=InternalUsageCache(dual_cache=local_cache)
|
||||
)
|
||||
|
||||
await parallel_request_handler.async_pre_call_hook(
|
||||
|
@ -157,7 +157,7 @@ async def test_pre_call_hook_rpm_limits_retry_after():
|
|||
)
|
||||
local_cache = DualCache()
|
||||
parallel_request_handler = MaxParallelRequestsHandler(
|
||||
internal_usage_cache=local_cache
|
||||
internal_usage_cache=InternalUsageCache(dual_cache=local_cache)
|
||||
)
|
||||
|
||||
await parallel_request_handler.async_pre_call_hook(
|
||||
|
@ -208,7 +208,7 @@ async def test_pre_call_hook_team_rpm_limits():
|
|||
)
|
||||
local_cache = DualCache()
|
||||
parallel_request_handler = MaxParallelRequestsHandler(
|
||||
internal_usage_cache=local_cache
|
||||
internal_usage_cache=InternalUsageCache(dual_cache=local_cache)
|
||||
)
|
||||
|
||||
await parallel_request_handler.async_pre_call_hook(
|
||||
|
@ -256,7 +256,7 @@ async def test_pre_call_hook_tpm_limits():
|
|||
)
|
||||
local_cache = DualCache()
|
||||
parallel_request_handler = MaxParallelRequestsHandler(
|
||||
internal_usage_cache=local_cache
|
||||
internal_usage_cache=InternalUsageCache(dual_cache=local_cache)
|
||||
)
|
||||
|
||||
await parallel_request_handler.async_pre_call_hook(
|
||||
|
@ -308,7 +308,7 @@ async def test_pre_call_hook_user_tpm_limits():
|
|||
print("dict user", res)
|
||||
|
||||
parallel_request_handler = MaxParallelRequestsHandler(
|
||||
internal_usage_cache=local_cache
|
||||
internal_usage_cache=InternalUsageCache(dual_cache=local_cache)
|
||||
)
|
||||
|
||||
await parallel_request_handler.async_pre_call_hook(
|
||||
|
@ -353,7 +353,7 @@ async def test_success_call_hook():
|
|||
user_api_key_dict = UserAPIKeyAuth(api_key=_api_key, max_parallel_requests=1)
|
||||
local_cache = DualCache()
|
||||
parallel_request_handler = MaxParallelRequestsHandler(
|
||||
internal_usage_cache=local_cache
|
||||
internal_usage_cache=InternalUsageCache(dual_cache=local_cache)
|
||||
)
|
||||
|
||||
await parallel_request_handler.async_pre_call_hook(
|
||||
|
@ -397,7 +397,7 @@ async def test_failure_call_hook():
|
|||
user_api_key_dict = UserAPIKeyAuth(api_key=_api_key, max_parallel_requests=1)
|
||||
local_cache = DualCache()
|
||||
parallel_request_handler = MaxParallelRequestsHandler(
|
||||
internal_usage_cache=local_cache
|
||||
internal_usage_cache=InternalUsageCache(dual_cache=local_cache)
|
||||
)
|
||||
|
||||
await parallel_request_handler.async_pre_call_hook(
|
||||
|
@ -975,7 +975,7 @@ async def test_bad_router_tpm_limit_per_model():
|
|||
|
||||
print(
|
||||
"internal usage cache: ",
|
||||
parallel_request_handler.internal_usage_cache.in_memory_cache.cache_dict,
|
||||
parallel_request_handler.internal_usage_cache.dual_cache.in_memory_cache.cache_dict,
|
||||
)
|
||||
|
||||
assert (
|
||||
|
@ -1161,7 +1161,7 @@ async def test_pre_call_hook_tpm_limits_per_model():
|
|||
|
||||
print(
|
||||
"internal usage cache: ",
|
||||
parallel_request_handler.internal_usage_cache.in_memory_cache.cache_dict,
|
||||
parallel_request_handler.internal_usage_cache.dual_cache.in_memory_cache.cache_dict,
|
||||
)
|
||||
|
||||
assert (
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue