mirror of
https://github.com/BerriAI/litellm.git
synced 2025-04-25 18:54:30 +00:00
simplify ResponsesApiDeploymentCheck
This commit is contained in:
parent
39610d4888
commit
a582a067f4
5 changed files with 102 additions and 73 deletions
|
@ -116,6 +116,13 @@ async def aresponses(
|
||||||
response = await init_response
|
response = await init_response
|
||||||
else:
|
else:
|
||||||
response = init_response
|
response = init_response
|
||||||
|
|
||||||
|
# Update the responses_api_response_id with the model_id
|
||||||
|
if isinstance(response, ResponsesAPIResponse):
|
||||||
|
response = ResponsesAPIRequestUtils._update_responses_api_response_id_with_model_id(
|
||||||
|
responses_api_response=response,
|
||||||
|
kwargs=kwargs,
|
||||||
|
)
|
||||||
return response
|
return response
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
raise litellm.exception_type(
|
raise litellm.exception_type(
|
||||||
|
@ -248,6 +255,13 @@ def responses(
|
||||||
),
|
),
|
||||||
)
|
)
|
||||||
|
|
||||||
|
# Update the responses_api_response_id with the model_id
|
||||||
|
if isinstance(response, ResponsesAPIResponse):
|
||||||
|
response = ResponsesAPIRequestUtils._update_responses_api_response_id_with_model_id(
|
||||||
|
responses_api_response=response,
|
||||||
|
kwargs=kwargs,
|
||||||
|
)
|
||||||
|
|
||||||
return response
|
return response
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
raise litellm.exception_type(
|
raise litellm.exception_type(
|
||||||
|
|
|
@ -1,12 +1,30 @@
|
||||||
from typing import Any, Dict, Union, cast, get_type_hints
|
import base64
|
||||||
|
from typing import (
|
||||||
|
TYPE_CHECKING,
|
||||||
|
Any,
|
||||||
|
Coroutine,
|
||||||
|
Dict,
|
||||||
|
Optional,
|
||||||
|
Tuple,
|
||||||
|
Union,
|
||||||
|
cast,
|
||||||
|
get_type_hints,
|
||||||
|
)
|
||||||
|
|
||||||
import litellm
|
import litellm
|
||||||
|
from litellm._logging import verbose_logger
|
||||||
from litellm.llms.base_llm.responses.transformation import BaseResponsesAPIConfig
|
from litellm.llms.base_llm.responses.transformation import BaseResponsesAPIConfig
|
||||||
from litellm.types.llms.openai import (
|
from litellm.types.llms.openai import (
|
||||||
ResponseAPIUsage,
|
ResponseAPIUsage,
|
||||||
ResponsesAPIOptionalRequestParams,
|
ResponsesAPIOptionalRequestParams,
|
||||||
|
ResponsesAPIResponse,
|
||||||
)
|
)
|
||||||
from litellm.types.utils import Usage
|
from litellm.types.utils import SpecialEnums, Usage
|
||||||
|
|
||||||
|
if TYPE_CHECKING:
|
||||||
|
from litellm.responses.streaming_iterator import BaseResponsesAPIStreamingIterator
|
||||||
|
else:
|
||||||
|
BaseResponsesAPIStreamingIterator = Any
|
||||||
|
|
||||||
|
|
||||||
class ResponsesAPIRequestUtils:
|
class ResponsesAPIRequestUtils:
|
||||||
|
@ -77,6 +95,66 @@ class ResponsesAPIRequestUtils:
|
||||||
}
|
}
|
||||||
return cast(ResponsesAPIOptionalRequestParams, filtered_params)
|
return cast(ResponsesAPIOptionalRequestParams, filtered_params)
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _update_responses_api_response_id_with_model_id(
|
||||||
|
responses_api_response: ResponsesAPIResponse,
|
||||||
|
kwargs: Dict[str, Any],
|
||||||
|
) -> ResponsesAPIResponse:
|
||||||
|
"""Update the responses_api_response_id with the model_id"""
|
||||||
|
litellm_metadata: Dict[str, Any] = kwargs.get("litellm_metadata", {}) or {}
|
||||||
|
model_info: Dict[str, Any] = litellm_metadata.get("model_info", {}) or {}
|
||||||
|
model_id = model_info.get("id")
|
||||||
|
updated_id = ResponsesAPIRequestUtils._build_responses_api_response_id(
|
||||||
|
model_id=model_id,
|
||||||
|
response_id=responses_api_response.id,
|
||||||
|
)
|
||||||
|
responses_api_response.id = updated_id
|
||||||
|
return responses_api_response
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _build_responses_api_response_id(
|
||||||
|
model_id: Optional[str],
|
||||||
|
response_id: str,
|
||||||
|
) -> str:
|
||||||
|
"""Build the responses_api_response_id"""
|
||||||
|
if model_id is None:
|
||||||
|
return response_id
|
||||||
|
assembled_id: str = str(
|
||||||
|
SpecialEnums.LITELLM_MANAGED_RESPONSE_COMPLETE_STR.value
|
||||||
|
).format(model_id, response_id)
|
||||||
|
base64_encoded_id: str = base64.b64encode(assembled_id.encode("utf-8")).decode(
|
||||||
|
"utf-8"
|
||||||
|
)
|
||||||
|
return f"resp_{base64_encoded_id}"
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _decode_responses_api_response_id(
|
||||||
|
response_id: str,
|
||||||
|
) -> Tuple[Optional[str], str]:
|
||||||
|
"""
|
||||||
|
Decode the responses_api_response_id
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Tuple of model_id, response_id (from upstream provider)
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
# Remove prefix and decode
|
||||||
|
cleaned_id = response_id.replace("resp_", "")
|
||||||
|
decoded_id = base64.b64decode(cleaned_id.encode("utf-8")).decode("utf-8")
|
||||||
|
|
||||||
|
# Parse components using known prefixes
|
||||||
|
if ";" not in decoded_id:
|
||||||
|
return None, response_id
|
||||||
|
|
||||||
|
model_part, response_part = decoded_id.split(";", 1)
|
||||||
|
model_id = model_part.replace("litellm:model_id:", "")
|
||||||
|
decoded_response_id = response_part.replace("response_id:", "")
|
||||||
|
|
||||||
|
return model_id, decoded_response_id
|
||||||
|
except Exception as e:
|
||||||
|
verbose_logger.debug(f"Error decoding response_id '{response_id}': {e}")
|
||||||
|
return None, response_id
|
||||||
|
|
||||||
|
|
||||||
class ResponseAPILoggingUtils:
|
class ResponseAPILoggingUtils:
|
||||||
@staticmethod
|
@staticmethod
|
||||||
|
|
|
@ -769,9 +769,7 @@ class Router:
|
||||||
model_list=self.model_list,
|
model_list=self.model_list,
|
||||||
)
|
)
|
||||||
elif pre_call_check == "responses_api_deployment_check":
|
elif pre_call_check == "responses_api_deployment_check":
|
||||||
_callback = ResponsesApiDeploymentCheck(
|
_callback = ResponsesApiDeploymentCheck()
|
||||||
cache=self.cache,
|
|
||||||
)
|
|
||||||
if _callback is not None:
|
if _callback is not None:
|
||||||
litellm.logging_callback_manager.add_litellm_callback(_callback)
|
litellm.logging_callback_manager.add_litellm_callback(_callback)
|
||||||
|
|
||||||
|
|
|
@ -12,21 +12,12 @@ If previous_response_id is provided, route to the deployment that returned the p
|
||||||
|
|
||||||
from typing import List, Optional
|
from typing import List, Optional
|
||||||
|
|
||||||
from litellm import verbose_logger
|
|
||||||
from litellm.caching.dual_cache import DualCache
|
|
||||||
from litellm.integrations.custom_logger import CustomLogger, Span
|
from litellm.integrations.custom_logger import CustomLogger, Span
|
||||||
|
from litellm.responses.utils import ResponsesAPIRequestUtils
|
||||||
from litellm.types.llms.openai import AllMessageValues
|
from litellm.types.llms.openai import AllMessageValues
|
||||||
from litellm.types.utils import CallTypes, StandardLoggingPayload
|
|
||||||
|
|
||||||
|
|
||||||
class ResponsesApiDeploymentCheck(CustomLogger):
|
class ResponsesApiDeploymentCheck(CustomLogger):
|
||||||
RESPONSES_API_RESPONSE_MODEL_ID_CACHE_KEY = (
|
|
||||||
"litellm_responses_api_response_model_id"
|
|
||||||
)
|
|
||||||
|
|
||||||
def __init__(self, cache: DualCache):
|
|
||||||
self.cache = cache
|
|
||||||
|
|
||||||
async def async_filter_deployments(
|
async def async_filter_deployments(
|
||||||
self,
|
self,
|
||||||
model: str,
|
model: str,
|
||||||
|
@ -40,8 +31,10 @@ class ResponsesApiDeploymentCheck(CustomLogger):
|
||||||
if previous_response_id is None:
|
if previous_response_id is None:
|
||||||
return healthy_deployments
|
return healthy_deployments
|
||||||
|
|
||||||
model_id = await self.async_get_response_id_from_cache(
|
model_id, response_id = (
|
||||||
response_id=previous_response_id,
|
ResponsesAPIRequestUtils._decode_responses_api_response_id(
|
||||||
|
response_id=previous_response_id,
|
||||||
|
)
|
||||||
)
|
)
|
||||||
if model_id is None:
|
if model_id is None:
|
||||||
return healthy_deployments
|
return healthy_deployments
|
||||||
|
@ -51,59 +44,3 @@ class ResponsesApiDeploymentCheck(CustomLogger):
|
||||||
return [deployment]
|
return [deployment]
|
||||||
|
|
||||||
return healthy_deployments
|
return healthy_deployments
|
||||||
|
|
||||||
async def async_log_success_event(self, kwargs, response_obj, start_time, end_time):
|
|
||||||
standard_logging_object: Optional[StandardLoggingPayload] = kwargs.get(
|
|
||||||
"standard_logging_object", None
|
|
||||||
)
|
|
||||||
|
|
||||||
if standard_logging_object is None:
|
|
||||||
return
|
|
||||||
|
|
||||||
call_type = standard_logging_object["call_type"]
|
|
||||||
|
|
||||||
if (
|
|
||||||
call_type != CallTypes.responses.value
|
|
||||||
and call_type != CallTypes.aresponses.value
|
|
||||||
): # only use response id checks for responses api
|
|
||||||
verbose_logger.debug(
|
|
||||||
"litellm.router_utils.pre_call_checks.responses_api_deployment_check: skipping adding response_id to cache, CALL TYPE IS NOT RESPONSES"
|
|
||||||
)
|
|
||||||
return
|
|
||||||
|
|
||||||
response_id = getattr(response_obj, "id", None)
|
|
||||||
model_id = standard_logging_object["model_id"]
|
|
||||||
|
|
||||||
if response_id is None or model_id is None:
|
|
||||||
verbose_logger.debug(
|
|
||||||
"litellm.router_utils.pre_call_checks.responses_api_deployment_check: skipping adding response_id to cache, RESPONSE ID OR MODEL ID IS NONE"
|
|
||||||
)
|
|
||||||
return
|
|
||||||
|
|
||||||
await self.async_add_response_id_to_cache(
|
|
||||||
response_id=response_id,
|
|
||||||
model_id=model_id,
|
|
||||||
)
|
|
||||||
|
|
||||||
return
|
|
||||||
|
|
||||||
async def async_add_response_id_to_cache(
|
|
||||||
self,
|
|
||||||
response_id: str,
|
|
||||||
model_id: str,
|
|
||||||
):
|
|
||||||
await self.cache.async_set_cache(
|
|
||||||
key=self.get_cache_key_for_response_id(response_id),
|
|
||||||
value=model_id,
|
|
||||||
)
|
|
||||||
|
|
||||||
async def async_get_response_id_from_cache(self, response_id: str) -> Optional[str]:
|
|
||||||
cache_value = await self.cache.async_get_cache(
|
|
||||||
key=self.get_cache_key_for_response_id(response_id),
|
|
||||||
)
|
|
||||||
if cache_value is None:
|
|
||||||
return None
|
|
||||||
return str(cache_value)
|
|
||||||
|
|
||||||
def get_cache_key_for_response_id(self, response_id: str) -> str:
|
|
||||||
return f"{self.RESPONSES_API_RESPONSE_MODEL_ID_CACHE_KEY}:{response_id}"
|
|
||||||
|
|
|
@ -2254,6 +2254,8 @@ class SpecialEnums(Enum):
|
||||||
LITELM_MANAGED_FILE_ID_PREFIX = "litellm_proxy"
|
LITELM_MANAGED_FILE_ID_PREFIX = "litellm_proxy"
|
||||||
LITELLM_MANAGED_FILE_COMPLETE_STR = "litellm_proxy:{};unified_id,{}"
|
LITELLM_MANAGED_FILE_COMPLETE_STR = "litellm_proxy:{};unified_id,{}"
|
||||||
|
|
||||||
|
LITELLM_MANAGED_RESPONSE_COMPLETE_STR = "litellm:model_id:{};response_id:{}"
|
||||||
|
|
||||||
|
|
||||||
LLMResponseTypes = Union[
|
LLMResponseTypes = Union[
|
||||||
ModelResponse, EmbeddingResponse, ImageResponse, OpenAIFileObject
|
ModelResponse, EmbeddingResponse, ImageResponse, OpenAIFileObject
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue