simplify ResponsesApiDeploymentCheck

2025-04-24 18:24:20 +00:00 · 2025-04-21 19:38:55 -07:00 · 2025-04-21 19:38:55 -07:00 · a582a067f4
commit a582a067f4
parent 39610d4888
5 changed files with 102 additions and 73 deletions
--- a/litellm/responses/main.py
+++ b/litellm/responses/main.py
@ -116,6 +116,13 @@ async def aresponses(
            response = await init_response
        else:
            response = init_response
+
+        # Update the responses_api_response_id with the model_id
+        if isinstance(response, ResponsesAPIResponse):
+            response = ResponsesAPIRequestUtils._update_responses_api_response_id_with_model_id(
+                responses_api_response=response,
+                kwargs=kwargs,
+            )
        return response
    except Exception as e:
        raise litellm.exception_type(
@ -248,6 +255,13 @@ def responses(
            ),
        )

+        # Update the responses_api_response_id with the model_id
+        if isinstance(response, ResponsesAPIResponse):
+            response = ResponsesAPIRequestUtils._update_responses_api_response_id_with_model_id(
+                responses_api_response=response,
+                kwargs=kwargs,
+            )
+
        return response
    except Exception as e:
        raise litellm.exception_type(
--- a/litellm/responses/utils.py
+++ b/litellm/responses/utils.py
@ -1,12 +1,30 @@
-from typing import Any, Dict, Union, cast, get_type_hints
+import base64
+from typing import (
+    TYPE_CHECKING,
+    Any,
+    Coroutine,
+    Dict,
+    Optional,
+    Tuple,
+    Union,
+    cast,
+    get_type_hints,
+)

 import litellm
+from litellm._logging import verbose_logger
 from litellm.llms.base_llm.responses.transformation import BaseResponsesAPIConfig
 from litellm.types.llms.openai import (
    ResponseAPIUsage,
    ResponsesAPIOptionalRequestParams,
+    ResponsesAPIResponse,
 )
-from litellm.types.utils import Usage
+from litellm.types.utils import SpecialEnums, Usage
+
+if TYPE_CHECKING:
+    from litellm.responses.streaming_iterator import BaseResponsesAPIStreamingIterator
+else:
+    BaseResponsesAPIStreamingIterator = Any


 class ResponsesAPIRequestUtils:
@ -77,6 +95,66 @@ class ResponsesAPIRequestUtils:
        }
        return cast(ResponsesAPIOptionalRequestParams, filtered_params)

+    @staticmethod
+    def _update_responses_api_response_id_with_model_id(
+        responses_api_response: ResponsesAPIResponse,
+        kwargs: Dict[str, Any],
+    ) -> ResponsesAPIResponse:
+        """Update the responses_api_response_id with the model_id"""
+        litellm_metadata: Dict[str, Any] = kwargs.get("litellm_metadata", {}) or {}
+        model_info: Dict[str, Any] = litellm_metadata.get("model_info", {}) or {}
+        model_id = model_info.get("id")
+        updated_id = ResponsesAPIRequestUtils._build_responses_api_response_id(
+            model_id=model_id,
+            response_id=responses_api_response.id,
+        )
+        responses_api_response.id = updated_id
+        return responses_api_response
+
+    @staticmethod
+    def _build_responses_api_response_id(
+        model_id: Optional[str],
+        response_id: str,
+    ) -> str:
+        """Build the responses_api_response_id"""
+        if model_id is None:
+            return response_id
+        assembled_id: str = str(
+            SpecialEnums.LITELLM_MANAGED_RESPONSE_COMPLETE_STR.value
+        ).format(model_id, response_id)
+        base64_encoded_id: str = base64.b64encode(assembled_id.encode("utf-8")).decode(
+            "utf-8"
+        )
+        return f"resp_{base64_encoded_id}"
+
+    @staticmethod
+    def _decode_responses_api_response_id(
+        response_id: str,
+    ) -> Tuple[Optional[str], str]:
+        """
+        Decode the responses_api_response_id
+
+        Returns:
+            Tuple of model_id, response_id (from upstream provider)
+        """
+        try:
+            # Remove prefix and decode
+            cleaned_id = response_id.replace("resp_", "")
+            decoded_id = base64.b64decode(cleaned_id.encode("utf-8")).decode("utf-8")
+
+            # Parse components using known prefixes
+            if ";" not in decoded_id:
+                return None, response_id
+
+            model_part, response_part = decoded_id.split(";", 1)
+            model_id = model_part.replace("litellm:model_id:", "")
+            decoded_response_id = response_part.replace("response_id:", "")
+
+            return model_id, decoded_response_id
+        except Exception as e:
+            verbose_logger.debug(f"Error decoding response_id '{response_id}': {e}")
+            return None, response_id
+

 class ResponseAPILoggingUtils:
    @staticmethod
--- a/litellm/router.py
+++ b/litellm/router.py
@ -769,9 +769,7 @@ class Router:
                        model_list=self.model_list,
                    )
                elif pre_call_check == "responses_api_deployment_check":
-                    _callback = ResponsesApiDeploymentCheck(
-                        cache=self.cache,
-                    )
+                    _callback = ResponsesApiDeploymentCheck()
                if _callback is not None:
                    litellm.logging_callback_manager.add_litellm_callback(_callback)

--- a/litellm/router_utils/pre_call_checks/responses_api_deployment_check.py
+++ b/litellm/router_utils/pre_call_checks/responses_api_deployment_check.py
@ -12,21 +12,12 @@ If previous_response_id is provided, route to the deployment that returned the p

 from typing import List, Optional

-from litellm import verbose_logger
-from litellm.caching.dual_cache import DualCache
 from litellm.integrations.custom_logger import CustomLogger, Span
+from litellm.responses.utils import ResponsesAPIRequestUtils
 from litellm.types.llms.openai import AllMessageValues
-from litellm.types.utils import CallTypes, StandardLoggingPayload


 class ResponsesApiDeploymentCheck(CustomLogger):
-    RESPONSES_API_RESPONSE_MODEL_ID_CACHE_KEY = (
-        "litellm_responses_api_response_model_id"
-    )
-
-    def __init__(self, cache: DualCache):
-        self.cache = cache
-
    async def async_filter_deployments(
        self,
        model: str,
@ -40,8 +31,10 @@ class ResponsesApiDeploymentCheck(CustomLogger):
        if previous_response_id is None:
            return healthy_deployments

-        model_id = await self.async_get_response_id_from_cache(
-            response_id=previous_response_id,
+        model_id, response_id = (
+            ResponsesAPIRequestUtils._decode_responses_api_response_id(
+                response_id=previous_response_id,
+            )
        )
        if model_id is None:
            return healthy_deployments
@ -51,59 +44,3 @@ class ResponsesApiDeploymentCheck(CustomLogger):
                return [deployment]

        return healthy_deployments
-
-    async def async_log_success_event(self, kwargs, response_obj, start_time, end_time):
-        standard_logging_object: Optional[StandardLoggingPayload] = kwargs.get(
-            "standard_logging_object", None
-        )
-
-        if standard_logging_object is None:
-            return
-
-        call_type = standard_logging_object["call_type"]
-
-        if (
-            call_type != CallTypes.responses.value
-            and call_type != CallTypes.aresponses.value
-        ):  # only use response id checks for responses api
-            verbose_logger.debug(
-                "litellm.router_utils.pre_call_checks.responses_api_deployment_check: skipping adding response_id to cache, CALL TYPE IS NOT RESPONSES"
-            )
-            return
-
-        response_id = getattr(response_obj, "id", None)
-        model_id = standard_logging_object["model_id"]
-
-        if response_id is None or model_id is None:
-            verbose_logger.debug(
-                "litellm.router_utils.pre_call_checks.responses_api_deployment_check: skipping adding response_id to cache, RESPONSE ID OR MODEL ID IS NONE"
-            )
-            return
-
-        await self.async_add_response_id_to_cache(
-            response_id=response_id,
-            model_id=model_id,
-        )
-
-        return
-
-    async def async_add_response_id_to_cache(
-        self,
-        response_id: str,
-        model_id: str,
-    ):
-        await self.cache.async_set_cache(
-            key=self.get_cache_key_for_response_id(response_id),
-            value=model_id,
-        )
-
-    async def async_get_response_id_from_cache(self, response_id: str) -> Optional[str]:
-        cache_value = await self.cache.async_get_cache(
-            key=self.get_cache_key_for_response_id(response_id),
-        )
-        if cache_value is None:
-            return None
-        return str(cache_value)
-
-    def get_cache_key_for_response_id(self, response_id: str) -> str:
-        return f"{self.RESPONSES_API_RESPONSE_MODEL_ID_CACHE_KEY}:{response_id}"
--- a/litellm/types/utils.py
+++ b/litellm/types/utils.py
@ -2254,6 +2254,8 @@ class SpecialEnums(Enum):
    LITELM_MANAGED_FILE_ID_PREFIX = "litellm_proxy"
    LITELLM_MANAGED_FILE_COMPLETE_STR = "litellm_proxy:{};unified_id,{}"

+    LITELLM_MANAGED_RESPONSE_COMPLETE_STR = "litellm:model_id:{};response_id:{}"
+

 LLMResponseTypes = Union[
    ModelResponse, EmbeddingResponse, ImageResponse, OpenAIFileObject