[Feat] Support for all litellm providers on Responses API (works with Codex) - Anthropic, Bedrock API, VertexAI, Ollama (#10132)

* transform request * basic handler for LiteLLMCompletionTransformationHandler * complete transform litellm to responses api * fixes to test * fix stream=True * fix streaming iterator * fixes for transformation * fixes for anthropic codex support * fix pass response_api_optional_params * test anthropic responses api tools * update responses types * working codex with litellm * add session handler * fixes streaming iterator * fix handler * add litellm codex example * fix code quality * test fix * docs litellm codex * litellm codexdoc * docs openai codex with litellm * docs litellm openai codex * litellm codex * linting fixes for transforming responses API * fix import error * fix responses api test * add sync iterator support for responses api
2025-04-26 03:04:13 +00:00 · 2025-04-18 19:53:59 -07:00 · 2025-04-18 19:53:59 -07:00 · 3d5022bd79
commit 3d5022bd79
parent 3e87ec4f16
14 changed files with 1282 additions and 53 deletions
--- a/litellm/responses/litellm_completion_transformation/handler.py
+++ b/litellm/responses/litellm_completion_transformation/handler.py
@ -0,0 +1,115 @@
+"""
+Handler for transforming responses api requests to litellm.completion requests
+"""
+
+from typing import Any, Coroutine, Optional, Union
+
+import litellm
+from litellm.responses.litellm_completion_transformation.streaming_iterator import (
+    LiteLLMCompletionStreamingIterator,
+)
+from litellm.responses.litellm_completion_transformation.transformation import (
+    LiteLLMCompletionResponsesConfig,
+)
+from litellm.responses.streaming_iterator import BaseResponsesAPIStreamingIterator
+from litellm.types.llms.openai import (
+    ResponseInputParam,
+    ResponsesAPIOptionalRequestParams,
+    ResponsesAPIResponse,
+)
+from litellm.types.utils import ModelResponse
+
+
+class LiteLLMCompletionTransformationHandler:
+
+    def response_api_handler(
+        self,
+        model: str,
+        input: Union[str, ResponseInputParam],
+        responses_api_request: ResponsesAPIOptionalRequestParams,
+        custom_llm_provider: Optional[str] = None,
+        _is_async: bool = False,
+        stream: Optional[bool] = None,
+        **kwargs,
+    ) -> Union[
+        ResponsesAPIResponse,
+        BaseResponsesAPIStreamingIterator,
+        Coroutine[
+            Any, Any, Union[ResponsesAPIResponse, BaseResponsesAPIStreamingIterator]
+        ],
+    ]:
+        litellm_completion_request: dict = (
+            LiteLLMCompletionResponsesConfig.transform_responses_api_request_to_chat_completion_request(
+                model=model,
+                input=input,
+                responses_api_request=responses_api_request,
+                custom_llm_provider=custom_llm_provider,
+                stream=stream,
+                **kwargs,
+            )
+        )
+
+        if _is_async:
+            return self.async_response_api_handler(
+                litellm_completion_request=litellm_completion_request,
+                request_input=input,
+                responses_api_request=responses_api_request,
+                **kwargs,
+            )
+
+        litellm_completion_response: Union[
+            ModelResponse, litellm.CustomStreamWrapper
+        ] = litellm.completion(
+            **litellm_completion_request,
+            **kwargs,
+        )
+
+        if isinstance(litellm_completion_response, ModelResponse):
+            responses_api_response: ResponsesAPIResponse = (
+                LiteLLMCompletionResponsesConfig.transform_chat_completion_response_to_responses_api_response(
+                    chat_completion_response=litellm_completion_response,
+                    request_input=input,
+                    responses_api_request=responses_api_request,
+                )
+            )
+
+            return responses_api_response
+
+        elif isinstance(litellm_completion_response, litellm.CustomStreamWrapper):
+            return LiteLLMCompletionStreamingIterator(
+                litellm_custom_stream_wrapper=litellm_completion_response,
+                request_input=input,
+                responses_api_request=responses_api_request,
+            )
+
+    async def async_response_api_handler(
+        self,
+        litellm_completion_request: dict,
+        request_input: Union[str, ResponseInputParam],
+        responses_api_request: ResponsesAPIOptionalRequestParams,
+        **kwargs,
+    ) -> Union[ResponsesAPIResponse, BaseResponsesAPIStreamingIterator]:
+        litellm_completion_response: Union[
+            ModelResponse, litellm.CustomStreamWrapper
+        ] = await litellm.acompletion(
+            **litellm_completion_request,
+            **kwargs,
+        )
+
+        if isinstance(litellm_completion_response, ModelResponse):
+            responses_api_response: ResponsesAPIResponse = (
+                LiteLLMCompletionResponsesConfig.transform_chat_completion_response_to_responses_api_response(
+                    chat_completion_response=litellm_completion_response,
+                    request_input=request_input,
+                    responses_api_request=responses_api_request,
+                )
+            )
+
+            return responses_api_response
+
+        elif isinstance(litellm_completion_response, litellm.CustomStreamWrapper):
+            return LiteLLMCompletionStreamingIterator(
+                litellm_custom_stream_wrapper=litellm_completion_response,
+                request_input=request_input,
+                responses_api_request=responses_api_request,
+            )