mirror of
https://github.com/BerriAI/litellm.git
synced 2025-04-25 18:54:30 +00:00
ResponsesApiDeploymentCheck
This commit is contained in:
parent
b8838004ca
commit
cddbac5b28
1 changed files with 33 additions and 5 deletions
|
@ -2,7 +2,7 @@
|
||||||
If previous_response_id is provided, route to the deployment that returned the previous response
|
If previous_response_id is provided, route to the deployment that returned the previous response
|
||||||
"""
|
"""
|
||||||
|
|
||||||
from typing import List, Optional, cast
|
from typing import List, Optional
|
||||||
|
|
||||||
from litellm import verbose_logger
|
from litellm import verbose_logger
|
||||||
from litellm.caching.dual_cache import DualCache
|
from litellm.caching.dual_cache import DualCache
|
||||||
|
@ -32,9 +32,15 @@ class ResponsesApiDeploymentCheck(CustomLogger):
|
||||||
if previous_response_id is None:
|
if previous_response_id is None:
|
||||||
return healthy_deployments
|
return healthy_deployments
|
||||||
|
|
||||||
# for deployment in healthy_deployments:
|
model_id = await self.async_get_response_id_from_cache(
|
||||||
# if deployment["model_info"]["id"] == model_id:
|
response_id=previous_response_id,
|
||||||
# return [deployment]
|
)
|
||||||
|
if model_id is None:
|
||||||
|
return healthy_deployments
|
||||||
|
|
||||||
|
for deployment in healthy_deployments:
|
||||||
|
if deployment["model_info"]["id"] == model_id:
|
||||||
|
return [deployment]
|
||||||
|
|
||||||
return healthy_deployments
|
return healthy_deployments
|
||||||
|
|
||||||
|
@ -60,6 +66,17 @@ class ResponsesApiDeploymentCheck(CustomLogger):
|
||||||
response_id = getattr(response_obj, "id", None)
|
response_id = getattr(response_obj, "id", None)
|
||||||
model_id = standard_logging_object["model_id"]
|
model_id = standard_logging_object["model_id"]
|
||||||
|
|
||||||
|
if response_id is None or model_id is None:
|
||||||
|
verbose_logger.debug(
|
||||||
|
"litellm.router_utils.pre_call_checks.responses_api_deployment_check: skipping adding response_id to cache, RESPONSE ID OR MODEL ID IS NONE"
|
||||||
|
)
|
||||||
|
return
|
||||||
|
|
||||||
|
await self.async_add_response_id_to_cache(
|
||||||
|
response_id=response_id,
|
||||||
|
model_id=model_id,
|
||||||
|
)
|
||||||
|
|
||||||
return
|
return
|
||||||
|
|
||||||
async def async_add_response_id_to_cache(
|
async def async_add_response_id_to_cache(
|
||||||
|
@ -68,9 +85,20 @@ class ResponsesApiDeploymentCheck(CustomLogger):
|
||||||
model_id: str,
|
model_id: str,
|
||||||
):
|
):
|
||||||
await self.cache.async_set_cache(
|
await self.cache.async_set_cache(
|
||||||
key=self.RESPONSES_API_RESPONSE_MODEL_ID_CACHE_KEY,
|
key=self.get_cache_key_for_response_id(response_id),
|
||||||
value={
|
value={
|
||||||
"response_id": response_id,
|
"response_id": response_id,
|
||||||
"model_id": model_id,
|
"model_id": model_id,
|
||||||
},
|
},
|
||||||
)
|
)
|
||||||
|
|
||||||
|
async def async_get_response_id_from_cache(self, response_id: str) -> Optional[str]:
|
||||||
|
cache_value = await self.cache.async_get_cache(
|
||||||
|
key=self.get_cache_key_for_response_id(response_id),
|
||||||
|
)
|
||||||
|
if cache_value is None:
|
||||||
|
return None
|
||||||
|
return str(cache_value)
|
||||||
|
|
||||||
|
def get_cache_key_for_response_id(self, response_id: str) -> str:
|
||||||
|
return f"{self.RESPONSES_API_RESPONSE_MODEL_ID_CACHE_KEY}:{response_id}"
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue