[Feat] Improve OTEL Tracking - Require all Redis Cache reads to be logged on OTEL (#5881)

* fix use previous internal usage caching logic

* fix test_dual_cache_uses_redis

* redis track event_metadata in service logging

* show otel error on _get_parent_otel_span_from_kwargs

* track parent otel span on internal usage cache

* update_request_status

* fix internal usage cache

* fix linting

* fix test internal usage cache

* fix linting error

* show event metadata in redis set

* fix test_get_team_redis

* fix test_get_team_redis

* test_proxy_logging_setup
This commit is contained in:
Ishaan Jaff 2024-09-25 10:57:08 -07:00 committed by GitHub
parent 4ec4d02474
commit 7cbcf538c6
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
9 changed files with 243 additions and 79 deletions

View file

@ -28,7 +28,7 @@ from litellm.proxy._types import UserAPIKeyAuth
from litellm.proxy.hooks.parallel_request_limiter import (
_PROXY_MaxParallelRequestsHandler as MaxParallelRequestsHandler,
)
from litellm.proxy.utils import ProxyLogging, hash_token
from litellm.proxy.utils import InternalUsageCache, ProxyLogging, hash_token
## On Request received
## On Request success
@ -48,7 +48,7 @@ async def test_global_max_parallel_requests():
user_api_key_dict = UserAPIKeyAuth(api_key=_api_key, max_parallel_requests=100)
local_cache = DualCache()
parallel_request_handler = MaxParallelRequestsHandler(
internal_usage_cache=local_cache
internal_usage_cache=InternalUsageCache(dual_cache=local_cache)
)
for _ in range(3):
@ -78,7 +78,7 @@ async def test_pre_call_hook():
user_api_key_dict = UserAPIKeyAuth(api_key=_api_key, max_parallel_requests=1)
local_cache = DualCache()
parallel_request_handler = MaxParallelRequestsHandler(
internal_usage_cache=local_cache
internal_usage_cache=InternalUsageCache(dual_cache=local_cache)
)
await parallel_request_handler.async_pre_call_hook(
@ -115,7 +115,7 @@ async def test_pre_call_hook_rpm_limits():
)
local_cache = DualCache()
parallel_request_handler = MaxParallelRequestsHandler(
internal_usage_cache=local_cache
internal_usage_cache=InternalUsageCache(dual_cache=local_cache)
)
await parallel_request_handler.async_pre_call_hook(
@ -157,7 +157,7 @@ async def test_pre_call_hook_rpm_limits_retry_after():
)
local_cache = DualCache()
parallel_request_handler = MaxParallelRequestsHandler(
internal_usage_cache=local_cache
internal_usage_cache=InternalUsageCache(dual_cache=local_cache)
)
await parallel_request_handler.async_pre_call_hook(
@ -208,7 +208,7 @@ async def test_pre_call_hook_team_rpm_limits():
)
local_cache = DualCache()
parallel_request_handler = MaxParallelRequestsHandler(
internal_usage_cache=local_cache
internal_usage_cache=InternalUsageCache(dual_cache=local_cache)
)
await parallel_request_handler.async_pre_call_hook(
@ -256,7 +256,7 @@ async def test_pre_call_hook_tpm_limits():
)
local_cache = DualCache()
parallel_request_handler = MaxParallelRequestsHandler(
internal_usage_cache=local_cache
internal_usage_cache=InternalUsageCache(dual_cache=local_cache)
)
await parallel_request_handler.async_pre_call_hook(
@ -308,7 +308,7 @@ async def test_pre_call_hook_user_tpm_limits():
print("dict user", res)
parallel_request_handler = MaxParallelRequestsHandler(
internal_usage_cache=local_cache
internal_usage_cache=InternalUsageCache(dual_cache=local_cache)
)
await parallel_request_handler.async_pre_call_hook(
@ -353,7 +353,7 @@ async def test_success_call_hook():
user_api_key_dict = UserAPIKeyAuth(api_key=_api_key, max_parallel_requests=1)
local_cache = DualCache()
parallel_request_handler = MaxParallelRequestsHandler(
internal_usage_cache=local_cache
internal_usage_cache=InternalUsageCache(dual_cache=local_cache)
)
await parallel_request_handler.async_pre_call_hook(
@ -397,7 +397,7 @@ async def test_failure_call_hook():
user_api_key_dict = UserAPIKeyAuth(api_key=_api_key, max_parallel_requests=1)
local_cache = DualCache()
parallel_request_handler = MaxParallelRequestsHandler(
internal_usage_cache=local_cache
internal_usage_cache=InternalUsageCache(dual_cache=local_cache)
)
await parallel_request_handler.async_pre_call_hook(
@ -975,7 +975,7 @@ async def test_bad_router_tpm_limit_per_model():
print(
"internal usage cache: ",
parallel_request_handler.internal_usage_cache.in_memory_cache.cache_dict,
parallel_request_handler.internal_usage_cache.dual_cache.in_memory_cache.cache_dict,
)
assert (
@ -1161,7 +1161,7 @@ async def test_pre_call_hook_tpm_limits_per_model():
print(
"internal usage cache: ",
parallel_request_handler.internal_usage_cache.in_memory_cache.cache_dict,
parallel_request_handler.internal_usage_cache.dual_cache.in_memory_cache.cache_dict,
)
assert (