simplify ResponsesApiDeploymentCheck

2025-04-25 18:54:30 +00:00 · 2025-04-21 19:38:55 -07:00 · 2025-04-21 19:38:55 -07:00 · a582a067f4
commit a582a067f4
parent 39610d4888
5 changed files with 102 additions and 73 deletions
--- a/litellm/responses/main.py
+++ b/litellm/responses/main.py
@ -116,6 +116,13 @@ async def aresponses(
            response = await init_response
        else:
            response = init_response
        # Update the responses_api_response_id with the model_id
        if isinstance(response, ResponsesAPIResponse):
            response = ResponsesAPIRequestUtils._update_responses_api_response_id_with_model_id(
                responses_api_response=response,
                kwargs=kwargs,
            )
        return response
    except Exception as e:
        raise litellm.exception_type(
@ -248,6 +255,13 @@ def responses(
            ),
        )
        # Update the responses_api_response_id with the model_id
        if isinstance(response, ResponsesAPIResponse):
            response = ResponsesAPIRequestUtils._update_responses_api_response_id_with_model_id(
                responses_api_response=response,
                kwargs=kwargs,
            )
        return response
    except Exception as e:
        raise litellm.exception_type(
--- a/litellm/responses/utils.py
+++ b/litellm/responses/utils.py
@ -1,12 +1,30 @@
-from typing import Any, Dict, Union, cast, get_type_hints
+import base64
 from typing import (
    TYPE_CHECKING,
    Any,
    Coroutine,
    Dict,
    Optional,
    Tuple,
    Union,
    cast,
    get_type_hints,
 )
 import litellm
 from litellm._logging import verbose_logger
 from litellm.llms.base_llm.responses.transformation import BaseResponsesAPIConfig
 from litellm.types.llms.openai import (
    ResponseAPIUsage,
    ResponsesAPIOptionalRequestParams,
    ResponsesAPIResponse,
 )
-from litellm.types.utils import Usage
+from litellm.types.utils import SpecialEnums, Usage
 if TYPE_CHECKING:
    from litellm.responses.streaming_iterator import BaseResponsesAPIStreamingIterator
 else:
    BaseResponsesAPIStreamingIterator = Any
 class ResponsesAPIRequestUtils:
@ -77,6 +95,66 @@ class ResponsesAPIRequestUtils:
        }
        return cast(ResponsesAPIOptionalRequestParams, filtered_params)
    @staticmethod
    def _update_responses_api_response_id_with_model_id(
        responses_api_response: ResponsesAPIResponse,
        kwargs: Dict[str, Any],
    ) -> ResponsesAPIResponse:
        """Update the responses_api_response_id with the model_id"""
        litellm_metadata: Dict[str, Any] = kwargs.get("litellm_metadata", {}) or {}
        model_info: Dict[str, Any] = litellm_metadata.get("model_info", {}) or {}
        model_id = model_info.get("id")
        updated_id = ResponsesAPIRequestUtils._build_responses_api_response_id(
            model_id=model_id,
            response_id=responses_api_response.id,
        )
        responses_api_response.id = updated_id
        return responses_api_response
    @staticmethod
    def _build_responses_api_response_id(
        model_id: Optional[str],
        response_id: str,
    ) -> str:
        """Build the responses_api_response_id"""
        if model_id is None:
            return response_id
        assembled_id: str = str(
            SpecialEnums.LITELLM_MANAGED_RESPONSE_COMPLETE_STR.value
        ).format(model_id, response_id)
        base64_encoded_id: str = base64.b64encode(assembled_id.encode("utf-8")).decode(
            "utf-8"
        )
        return f"resp_{base64_encoded_id}"
    @staticmethod
    def _decode_responses_api_response_id(
        response_id: str,
    ) -> Tuple[Optional[str], str]:
        """
        Decode the responses_api_response_id
        Returns:
            Tuple of model_id, response_id (from upstream provider)
        """
        try:
            # Remove prefix and decode
            cleaned_id = response_id.replace("resp_", "")
            decoded_id = base64.b64decode(cleaned_id.encode("utf-8")).decode("utf-8")
            # Parse components using known prefixes
            if ";" not in decoded_id:
                return None, response_id
            model_part, response_part = decoded_id.split(";", 1)
            model_id = model_part.replace("litellm:model_id:", "")
            decoded_response_id = response_part.replace("response_id:", "")
            return model_id, decoded_response_id
        except Exception as e:
            verbose_logger.debug(f"Error decoding response_id '{response_id}': {e}")
            return None, response_id
 class ResponseAPILoggingUtils:
    @staticmethod
--- a/litellm/router.py
+++ b/litellm/router.py
@ -769,9 +769,7 @@ class Router:
                        model_list=self.model_list,
                    )
                elif pre_call_check == "responses_api_deployment_check":
-                    _callback = ResponsesApiDeploymentCheck(
+                    _callback = ResponsesApiDeploymentCheck()
                        cache=self.cache,
                    )
                if _callback is not None:
                    litellm.logging_callback_manager.add_litellm_callback(_callback)
--- a/litellm/router_utils/pre_call_checks/responses_api_deployment_check.py
+++ b/litellm/router_utils/pre_call_checks/responses_api_deployment_check.py
@ -12,21 +12,12 @@ If previous_response_id is provided, route to the deployment that returned the p
 from typing import List, Optional
 from litellm import verbose_logger
 from litellm.caching.dual_cache import DualCache
 from litellm.integrations.custom_logger import CustomLogger, Span
 from litellm.responses.utils import ResponsesAPIRequestUtils
 from litellm.types.llms.openai import AllMessageValues
 from litellm.types.utils import CallTypes, StandardLoggingPayload
 class ResponsesApiDeploymentCheck(CustomLogger):
    RESPONSES_API_RESPONSE_MODEL_ID_CACHE_KEY = (
        "litellm_responses_api_response_model_id"
    )
    def __init__(self, cache: DualCache):
        self.cache = cache
    async def async_filter_deployments(
        self,
        model: str,
@ -40,8 +31,10 @@ class ResponsesApiDeploymentCheck(CustomLogger):
        if previous_response_id is None:
            return healthy_deployments
-        model_id = await self.async_get_response_id_from_cache(
+        model_id, response_id = (
-            response_id=previous_response_id,
+            ResponsesAPIRequestUtils._decode_responses_api_response_id(
                response_id=previous_response_id,
            )
        )
        if model_id is None:
            return healthy_deployments
@ -51,59 +44,3 @@ class ResponsesApiDeploymentCheck(CustomLogger):
                return [deployment]
        return healthy_deployments
    async def async_log_success_event(self, kwargs, response_obj, start_time, end_time):
        standard_logging_object: Optional[StandardLoggingPayload] = kwargs.get(
            "standard_logging_object", None
        )
        if standard_logging_object is None:
            return
        call_type = standard_logging_object["call_type"]
        if (
            call_type != CallTypes.responses.value
            and call_type != CallTypes.aresponses.value
        ):  # only use response id checks for responses api
            verbose_logger.debug(
                "litellm.router_utils.pre_call_checks.responses_api_deployment_check: skipping adding response_id to cache, CALL TYPE IS NOT RESPONSES"
            )
            return
        response_id = getattr(response_obj, "id", None)
        model_id = standard_logging_object["model_id"]
        if response_id is None or model_id is None:
            verbose_logger.debug(
                "litellm.router_utils.pre_call_checks.responses_api_deployment_check: skipping adding response_id to cache, RESPONSE ID OR MODEL ID IS NONE"
            )
            return
        await self.async_add_response_id_to_cache(
            response_id=response_id,
            model_id=model_id,
        )
        return
    async def async_add_response_id_to_cache(
        self,
        response_id: str,
        model_id: str,
    ):
        await self.cache.async_set_cache(
            key=self.get_cache_key_for_response_id(response_id),
            value=model_id,
        )
    async def async_get_response_id_from_cache(self, response_id: str) -> Optional[str]:
        cache_value = await self.cache.async_get_cache(
            key=self.get_cache_key_for_response_id(response_id),
        )
        if cache_value is None:
            return None
        return str(cache_value)
    def get_cache_key_for_response_id(self, response_id: str) -> str:
        return f"{self.RESPONSES_API_RESPONSE_MODEL_ID_CACHE_KEY}:{response_id}"
--- a/litellm/types/utils.py
+++ b/litellm/types/utils.py
@ -2254,6 +2254,8 @@ class SpecialEnums(Enum):
    LITELM_MANAGED_FILE_ID_PREFIX = "litellm_proxy"
    LITELLM_MANAGED_FILE_COMPLETE_STR = "litellm_proxy:{};unified_id,{}"
    LITELLM_MANAGED_RESPONSE_COMPLETE_STR = "litellm:model_id:{};response_id:{}"
 LLMResponseTypes = Union[
    ModelResponse, EmbeddingResponse, ImageResponse, OpenAIFileObject