forked from phoenix/litellm-mirror
use correct enum for InternalUsageCache
This commit is contained in:
parent
16c0307eab
commit
86ebdc611f
2 changed files with 15 additions and 1 deletions
|
@ -2454,6 +2454,17 @@
|
||||||
"mode": "chat",
|
"mode": "chat",
|
||||||
"source": "https://cloud.google.com/vertex-ai/generative-ai/pricing#partner-models"
|
"source": "https://cloud.google.com/vertex-ai/generative-ai/pricing#partner-models"
|
||||||
},
|
},
|
||||||
|
"vertex_ai/meta/llama-3.2-90b-vision-instruct-maas": {
|
||||||
|
"max_tokens": 8192,
|
||||||
|
"max_input_tokens": 128000,
|
||||||
|
"max_output_tokens": 8192,
|
||||||
|
"input_cost_per_token": 0.0,
|
||||||
|
"output_cost_per_token": 0.0,
|
||||||
|
"litellm_provider": "vertex_ai-llama_models",
|
||||||
|
"mode": "chat",
|
||||||
|
"supports_system_messages": true,
|
||||||
|
"source": "https://console.cloud.google.com/vertex-ai/publishers/meta/model-garden/llama-3.2-90b-vision-instruct-maas"
|
||||||
|
},
|
||||||
"vertex_ai/mistral-large@latest": {
|
"vertex_ai/mistral-large@latest": {
|
||||||
"max_tokens": 8191,
|
"max_tokens": 8191,
|
||||||
"max_input_tokens": 128000,
|
"max_input_tokens": 128000,
|
||||||
|
|
|
@ -70,6 +70,7 @@ from litellm.proxy.hooks.max_budget_limiter import _PROXY_MaxBudgetLimiter
|
||||||
from litellm.proxy.hooks.parallel_request_limiter import (
|
from litellm.proxy.hooks.parallel_request_limiter import (
|
||||||
_PROXY_MaxParallelRequestsHandler,
|
_PROXY_MaxParallelRequestsHandler,
|
||||||
)
|
)
|
||||||
|
from litellm.proxy.proxy_server import UserAPIKeyCacheTTLEnum
|
||||||
from litellm.types.utils import CallTypes, LoggedLiteLLMParams
|
from litellm.types.utils import CallTypes, LoggedLiteLLMParams
|
||||||
|
|
||||||
if TYPE_CHECKING:
|
if TYPE_CHECKING:
|
||||||
|
@ -301,7 +302,9 @@ class ProxyLogging:
|
||||||
self.call_details: dict = {}
|
self.call_details: dict = {}
|
||||||
self.call_details["user_api_key_cache"] = user_api_key_cache
|
self.call_details["user_api_key_cache"] = user_api_key_cache
|
||||||
self.internal_usage_cache: InternalUsageCache = InternalUsageCache(
|
self.internal_usage_cache: InternalUsageCache = InternalUsageCache(
|
||||||
dual_cache=DualCache(default_in_memory_ttl=1) # ping redis cache every 1s
|
dual_cache=DualCache(
|
||||||
|
default_in_memory_ttl=UserAPIKeyCacheTTLEnum.in_memory_cache_ttl.value
|
||||||
|
) # ping redis cache every 1s
|
||||||
)
|
)
|
||||||
self.max_parallel_request_limiter = _PROXY_MaxParallelRequestsHandler(
|
self.max_parallel_request_limiter = _PROXY_MaxParallelRequestsHandler(
|
||||||
self.internal_usage_cache
|
self.internal_usage_cache
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue