diff --git a/docs/my-website/docs/tutorials/openai_codex.md b/docs/my-website/docs/tutorials/openai_codex.md
new file mode 100644
index 0000000000..941f197b95
--- /dev/null
+++ b/docs/my-website/docs/tutorials/openai_codex.md
@@ -0,0 +1,139 @@
+import Image from '@theme/IdealImage';
+import Tabs from '@theme/Tabs';
+import TabItem from '@theme/TabItem';
+
+# Using LiteLLM with OpenAI Codex
+
+This guide walks you through connecting OpenAI Codex to LiteLLM. Using LiteLLM with Codex allows teams to:
+- Access 100+ LLMs through the Codex interface
+- Use powerful models like Gemini through a familiar interface
+- Track spend and usage with LiteLLM's built-in analytics
+- Control model access with virtual keys
+
+
+
+## Quickstart
+
+Make sure to set up LiteLLM with the [LiteLLM Getting Started Guide](../proxy/docker_quick_start.md).
+
+## 1. Install OpenAI Codex
+
+Install the OpenAI Codex CLI tool globally using npm:
+
+
+
+
+```bash showLineNumbers
+npm i -g @openai/codex
+```
+
+
+
+
+```bash showLineNumbers
+yarn global add @openai/codex
+```
+
+
+
+
+## 2. Start LiteLLM Proxy
+
+
+
+
+```bash showLineNumbers
+docker run \
+ -v $(pwd)/litellm_config.yaml:/app/config.yaml \
+ -p 4000:4000 \
+ ghcr.io/berriai/litellm:main-latest \
+ --config /app/config.yaml
+```
+
+
+
+
+```bash showLineNumbers
+litellm --config /path/to/config.yaml
+```
+
+
+
+
+LiteLLM should now be running on [http://localhost:4000](http://localhost:4000)
+
+## 3. Configure LiteLLM for Model Routing
+
+Ensure your LiteLLM Proxy is properly configured to route to your desired models. Create a `litellm_config.yaml` file with the following content:
+
+```yaml showLineNumbers
+model_list:
+ - model_name: o3-mini
+ litellm_params:
+ model: openai/o3-mini
+ api_key: os.environ/OPENAI_API_KEY
+ - model_name: claude-3-7-sonnet-latest
+ litellm_params:
+ model: anthropic/claude-3-7-sonnet-latest
+ api_key: os.environ/ANTHROPIC_API_KEY
+ - model_name: gemini-2.0-flash
+ litellm_params:
+ model: gemini/gemini-2.0-flash
+ api_key: os.environ/GEMINI_API_KEY
+
+litellm_settings:
+ drop_params: true
+```
+
+This configuration enables routing to specific OpenAI, Anthropic, and Gemini models with explicit names.
+
+## 4. Configure Codex to Use LiteLLM Proxy
+
+Set the required environment variables to point Codex to your LiteLLM Proxy:
+
+```bash
+# Point to your LiteLLM Proxy server
+export OPENAI_BASE_URL=http://0.0.0.0:4000
+
+# Use your LiteLLM API key (if you've set up authentication)
+export OPENAI_API_KEY="sk-1234"
+```
+
+## 5. Run Codex with Gemini
+
+With everything configured, you can now run Codex with Gemini:
+
+```bash showLineNumbers
+codex --model gemini-flash --full-auto
+```
+
+
+
+The `--full-auto` flag allows Codex to automatically generate code without additional prompting.
+
+## 6. Advanced Options
+
+### Using Different Models
+
+You can use any model configured in your LiteLLM proxy:
+
+```bash
+# Use Claude models
+codex --model claude-3-7-sonnet-latest
+
+# Use Google AI Studio Gemini models
+codex --model gemini/gemini-2.0-flash
+```
+
+## Troubleshooting
+
+- If you encounter connection issues, ensure your LiteLLM Proxy is running and accessible at the specified URL
+- Verify your LiteLLM API key is valid if you're using authentication
+- Check that your model routing configuration is correct
+- For model-specific errors, ensure the model is properly configured in your LiteLLM setup
+
+## Additional Resources
+
+- [LiteLLM Docker Quick Start Guide](../proxy/docker_quick_start.md)
+- [OpenAI Codex GitHub Repository](https://github.com/openai/codex)
+- [LiteLLM Virtual Keys and Authentication](../proxy/virtual_keys.md)
diff --git a/docs/my-website/img/litellm_codex.gif b/docs/my-website/img/litellm_codex.gif
new file mode 100644
index 0000000000..04332b5053
Binary files /dev/null and b/docs/my-website/img/litellm_codex.gif differ
diff --git a/docs/my-website/sidebars.js b/docs/my-website/sidebars.js
index fdf2019cc2..bc9182305a 100644
--- a/docs/my-website/sidebars.js
+++ b/docs/my-website/sidebars.js
@@ -443,6 +443,7 @@ const sidebars = {
label: "Tutorials",
items: [
"tutorials/openweb_ui",
+ "tutorials/openai_codex",
"tutorials/msft_sso",
"tutorials/prompt_caching",
"tutorials/tag_management",
diff --git a/litellm/proxy/proxy_config.yaml b/litellm/proxy/proxy_config.yaml
index 17658df903..d15cd9383e 100644
--- a/litellm/proxy/proxy_config.yaml
+++ b/litellm/proxy/proxy_config.yaml
@@ -1,6 +1,13 @@
model_list:
- - model_name: fake-openai-endpoint
+ - model_name: openai/*
litellm_params:
- model: openai/fake
- api_key: fake-key
- api_base: https://exampleopenaiendpoint-production.up.railway.app/
+ model: openai/*
+ - model_name: anthropic/*
+ litellm_params:
+ model: anthropic/*
+ - model_name: gemini/*
+ litellm_params:
+ model: gemini/*
+litellm_settings:
+ drop_params: true
+
diff --git a/litellm/responses/litellm_completion_transformation/handler.py b/litellm/responses/litellm_completion_transformation/handler.py
new file mode 100644
index 0000000000..3580fe5e44
--- /dev/null
+++ b/litellm/responses/litellm_completion_transformation/handler.py
@@ -0,0 +1,115 @@
+"""
+Handler for transforming responses api requests to litellm.completion requests
+"""
+
+from typing import Any, Coroutine, Optional, Union
+
+import litellm
+from litellm.responses.litellm_completion_transformation.streaming_iterator import (
+ LiteLLMCompletionStreamingIterator,
+)
+from litellm.responses.litellm_completion_transformation.transformation import (
+ LiteLLMCompletionResponsesConfig,
+)
+from litellm.responses.streaming_iterator import BaseResponsesAPIStreamingIterator
+from litellm.types.llms.openai import (
+ ResponseInputParam,
+ ResponsesAPIOptionalRequestParams,
+ ResponsesAPIResponse,
+)
+from litellm.types.utils import ModelResponse
+
+
+class LiteLLMCompletionTransformationHandler:
+
+ def response_api_handler(
+ self,
+ model: str,
+ input: Union[str, ResponseInputParam],
+ responses_api_request: ResponsesAPIOptionalRequestParams,
+ custom_llm_provider: Optional[str] = None,
+ _is_async: bool = False,
+ stream: Optional[bool] = None,
+ **kwargs,
+ ) -> Union[
+ ResponsesAPIResponse,
+ BaseResponsesAPIStreamingIterator,
+ Coroutine[
+ Any, Any, Union[ResponsesAPIResponse, BaseResponsesAPIStreamingIterator]
+ ],
+ ]:
+ litellm_completion_request: dict = (
+ LiteLLMCompletionResponsesConfig.transform_responses_api_request_to_chat_completion_request(
+ model=model,
+ input=input,
+ responses_api_request=responses_api_request,
+ custom_llm_provider=custom_llm_provider,
+ stream=stream,
+ **kwargs,
+ )
+ )
+
+ if _is_async:
+ return self.async_response_api_handler(
+ litellm_completion_request=litellm_completion_request,
+ request_input=input,
+ responses_api_request=responses_api_request,
+ **kwargs,
+ )
+
+ litellm_completion_response: Union[
+ ModelResponse, litellm.CustomStreamWrapper
+ ] = litellm.completion(
+ **litellm_completion_request,
+ **kwargs,
+ )
+
+ if isinstance(litellm_completion_response, ModelResponse):
+ responses_api_response: ResponsesAPIResponse = (
+ LiteLLMCompletionResponsesConfig.transform_chat_completion_response_to_responses_api_response(
+ chat_completion_response=litellm_completion_response,
+ request_input=input,
+ responses_api_request=responses_api_request,
+ )
+ )
+
+ return responses_api_response
+
+ elif isinstance(litellm_completion_response, litellm.CustomStreamWrapper):
+ return LiteLLMCompletionStreamingIterator(
+ litellm_custom_stream_wrapper=litellm_completion_response,
+ request_input=input,
+ responses_api_request=responses_api_request,
+ )
+
+ async def async_response_api_handler(
+ self,
+ litellm_completion_request: dict,
+ request_input: Union[str, ResponseInputParam],
+ responses_api_request: ResponsesAPIOptionalRequestParams,
+ **kwargs,
+ ) -> Union[ResponsesAPIResponse, BaseResponsesAPIStreamingIterator]:
+ litellm_completion_response: Union[
+ ModelResponse, litellm.CustomStreamWrapper
+ ] = await litellm.acompletion(
+ **litellm_completion_request,
+ **kwargs,
+ )
+
+ if isinstance(litellm_completion_response, ModelResponse):
+ responses_api_response: ResponsesAPIResponse = (
+ LiteLLMCompletionResponsesConfig.transform_chat_completion_response_to_responses_api_response(
+ chat_completion_response=litellm_completion_response,
+ request_input=request_input,
+ responses_api_request=responses_api_request,
+ )
+ )
+
+ return responses_api_response
+
+ elif isinstance(litellm_completion_response, litellm.CustomStreamWrapper):
+ return LiteLLMCompletionStreamingIterator(
+ litellm_custom_stream_wrapper=litellm_completion_response,
+ request_input=request_input,
+ responses_api_request=responses_api_request,
+ )
diff --git a/litellm/responses/litellm_completion_transformation/session_handler.py b/litellm/responses/litellm_completion_transformation/session_handler.py
new file mode 100644
index 0000000000..b114611c26
--- /dev/null
+++ b/litellm/responses/litellm_completion_transformation/session_handler.py
@@ -0,0 +1,59 @@
+"""
+Responses API has previous_response_id, which is the id of the previous response.
+
+LiteLLM needs to maintain a cache of the previous response input, output, previous_response_id, and model.
+
+This class handles that cache.
+"""
+
+from typing import List, Optional, Tuple, Union
+
+from typing_extensions import TypedDict
+
+from litellm.caching import InMemoryCache
+from litellm.types.llms.openai import ResponseInputParam, ResponsesAPIResponse
+
+RESPONSES_API_PREVIOUS_RESPONSES_CACHE = InMemoryCache()
+MAX_PREV_SESSION_INPUTS = 50
+
+
+class ResponsesAPISessionElement(TypedDict, total=False):
+ input: Union[str, ResponseInputParam]
+ output: ResponsesAPIResponse
+ response_id: str
+ previous_response_id: Optional[str]
+
+
+class SessionHandler:
+
+ def add_completed_response_to_cache(
+ self, response_id: str, session_element: ResponsesAPISessionElement
+ ):
+ RESPONSES_API_PREVIOUS_RESPONSES_CACHE.set_cache(
+ key=response_id, value=session_element
+ )
+
+ def get_chain_of_previous_input_output_pairs(
+ self, previous_response_id: str
+ ) -> List[Tuple[ResponseInputParam, ResponsesAPIResponse]]:
+ response_api_inputs: List[Tuple[ResponseInputParam, ResponsesAPIResponse]] = []
+ current_previous_response_id = previous_response_id
+
+ count_session_elements = 0
+ while current_previous_response_id:
+ if count_session_elements > MAX_PREV_SESSION_INPUTS:
+ break
+ session_element = RESPONSES_API_PREVIOUS_RESPONSES_CACHE.get_cache(
+ key=current_previous_response_id
+ )
+ if session_element:
+ response_api_inputs.append(
+ (session_element.get("input"), session_element.get("output"))
+ )
+ current_previous_response_id = session_element.get(
+ "previous_response_id"
+ )
+ else:
+ break
+ count_session_elements += 1
+ return response_api_inputs
diff --git a/litellm/responses/litellm_completion_transformation/streaming_iterator.py b/litellm/responses/litellm_completion_transformation/streaming_iterator.py
new file mode 100644
index 0000000000..d970746f89
--- /dev/null
+++ b/litellm/responses/litellm_completion_transformation/streaming_iterator.py
@@ -0,0 +1,110 @@
+from typing import List, Optional, Union
+
+import litellm
+from litellm.main import stream_chunk_builder
+from litellm.responses.litellm_completion_transformation.transformation import (
+ LiteLLMCompletionResponsesConfig,
+)
+from litellm.responses.streaming_iterator import ResponsesAPIStreamingIterator
+from litellm.types.llms.openai import (
+ ResponseCompletedEvent,
+ ResponseInputParam,
+ ResponsesAPIOptionalRequestParams,
+ ResponsesAPIStreamEvents,
+ ResponsesAPIStreamingResponse,
+)
+from litellm.types.utils import (
+ ModelResponse,
+ ModelResponseStream,
+ TextCompletionResponse,
+)
+
+
+class LiteLLMCompletionStreamingIterator(ResponsesAPIStreamingIterator):
+ """
+ Async iterator for processing streaming responses from the Responses API.
+ """
+
+ def __init__(
+ self,
+ litellm_custom_stream_wrapper: litellm.CustomStreamWrapper,
+ request_input: Union[str, ResponseInputParam],
+ responses_api_request: ResponsesAPIOptionalRequestParams,
+ ):
+ self.litellm_custom_stream_wrapper: litellm.CustomStreamWrapper = (
+ litellm_custom_stream_wrapper
+ )
+ self.request_input: Union[str, ResponseInputParam] = request_input
+ self.responses_api_request: ResponsesAPIOptionalRequestParams = (
+ responses_api_request
+ )
+ self.collected_chunks: List[ModelResponseStream] = []
+ self.finished: bool = False
+
+ async def __anext__(
+ self,
+ ) -> Union[ResponsesAPIStreamingResponse, ResponseCompletedEvent]:
+ try:
+ while True:
+ if self.finished is True:
+ raise StopAsyncIteration
+ # Get the next chunk from the stream
+ try:
+ chunk = await self.litellm_custom_stream_wrapper.__anext__()
+ self.collected_chunks.append(chunk)
+ except StopAsyncIteration:
+ self.finished = True
+ response_completed_event = self._emit_response_completed_event()
+ if response_completed_event:
+ return response_completed_event
+ else:
+ raise StopAsyncIteration
+
+ except Exception as e:
+ # Handle HTTP errors
+ self.finished = True
+ raise e
+
+ def __iter__(self):
+ return self
+
+ def __next__(
+ self,
+ ) -> Union[ResponsesAPIStreamingResponse, ResponseCompletedEvent]:
+ try:
+ while True:
+ if self.finished is True:
+ raise StopAsyncIteration
+ # Get the next chunk from the stream
+ try:
+ chunk = self.litellm_custom_stream_wrapper.__next__()
+ self.collected_chunks.append(chunk)
+ except StopAsyncIteration:
+ self.finished = True
+ response_completed_event = self._emit_response_completed_event()
+ if response_completed_event:
+ return response_completed_event
+ else:
+ raise StopAsyncIteration
+
+ except Exception as e:
+ # Handle HTTP errors
+ self.finished = True
+ raise e
+
+ def _emit_response_completed_event(self) -> Optional[ResponseCompletedEvent]:
+ litellm_model_response: Optional[
+ Union[ModelResponse, TextCompletionResponse]
+ ] = stream_chunk_builder(chunks=self.collected_chunks)
+ if litellm_model_response and isinstance(litellm_model_response, ModelResponse):
+
+ return ResponseCompletedEvent(
+ type=ResponsesAPIStreamEvents.RESPONSE_COMPLETED,
+ response=LiteLLMCompletionResponsesConfig.transform_chat_completion_response_to_responses_api_response(
+ request_input=self.request_input,
+ chat_completion_response=litellm_model_response,
+ responses_api_request=self.responses_api_request,
+ ),
+ )
+ else:
+ return None
diff --git a/litellm/responses/litellm_completion_transformation/transformation.py b/litellm/responses/litellm_completion_transformation/transformation.py
new file mode 100644
index 0000000000..b1e52eb8f3
--- /dev/null
+++ b/litellm/responses/litellm_completion_transformation/transformation.py
@@ -0,0 +1,631 @@
+"""
+Handles transforming from Responses API -> LiteLLM completion (Chat Completion API)
+"""
+
+from typing import Any, Dict, List, Optional, Union
+
+from openai.types.responses.tool_param import FunctionToolParam
+
+from litellm.caching import InMemoryCache
+from litellm.responses.litellm_completion_transformation.session_handler import (
+ ResponsesAPISessionElement,
+ SessionHandler,
+)
+from litellm.types.llms.openai import (
+ AllMessageValues,
+ ChatCompletionResponseMessage,
+ ChatCompletionSystemMessage,
+ ChatCompletionToolCallChunk,
+ ChatCompletionToolCallFunctionChunk,
+ ChatCompletionToolMessage,
+ ChatCompletionToolParam,
+ ChatCompletionToolParamFunctionChunk,
+ ChatCompletionUserMessage,
+ GenericChatCompletionMessage,
+ Reasoning,
+ ResponseAPIUsage,
+ ResponseInputParam,
+ ResponsesAPIOptionalRequestParams,
+ ResponsesAPIResponse,
+ ResponseTextConfig,
+)
+from litellm.types.responses.main import (
+ GenericResponseOutputItem,
+ GenericResponseOutputItemContentAnnotation,
+ OutputFunctionToolCall,
+ OutputText,
+)
+from litellm.types.utils import (
+ ChatCompletionAnnotation,
+ ChatCompletionMessageToolCall,
+ Choices,
+ Function,
+ Message,
+ ModelResponse,
+ Usage,
+)
+
+########### Initialize Classes used for Responses API ###########
+TOOL_CALLS_CACHE = InMemoryCache()
+RESPONSES_API_SESSION_HANDLER = SessionHandler()
+########### End of Initialize Classes used for Responses API ###########
+
+
+class LiteLLMCompletionResponsesConfig:
+
+ @staticmethod
+ def transform_responses_api_request_to_chat_completion_request(
+ model: str,
+ input: Union[str, ResponseInputParam],
+ responses_api_request: ResponsesAPIOptionalRequestParams,
+ custom_llm_provider: Optional[str] = None,
+ stream: Optional[bool] = None,
+ **kwargs,
+ ) -> dict:
+ """
+ Transform a Responses API request into a Chat Completion request
+ """
+ litellm_completion_request: dict = {
+ "messages": LiteLLMCompletionResponsesConfig.transform_responses_api_input_to_messages(
+ input=input,
+ responses_api_request=responses_api_request,
+ previous_response_id=responses_api_request.get("previous_response_id"),
+ ),
+ "model": model,
+ "tool_choice": responses_api_request.get("tool_choice"),
+ "tools": LiteLLMCompletionResponsesConfig.transform_responses_api_tools_to_chat_completion_tools(
+ responses_api_request.get("tools") or [] # type: ignore
+ ),
+ "top_p": responses_api_request.get("top_p"),
+ "user": responses_api_request.get("user"),
+ "temperature": responses_api_request.get("temperature"),
+ "parallel_tool_calls": responses_api_request.get("parallel_tool_calls"),
+ "max_tokens": responses_api_request.get("max_output_tokens"),
+ "stream": stream,
+ "metadata": kwargs.get("metadata"),
+ "service_tier": kwargs.get("service_tier"),
+ # litellm specific params
+ "custom_llm_provider": custom_llm_provider,
+ }
+
+ # only pass non-None values
+ litellm_completion_request = {
+ k: v for k, v in litellm_completion_request.items() if v is not None
+ }
+
+ return litellm_completion_request
+
+ @staticmethod
+ def transform_responses_api_input_to_messages(
+ input: Union[str, ResponseInputParam],
+ responses_api_request: ResponsesAPIOptionalRequestParams,
+ previous_response_id: Optional[str] = None,
+ ) -> List[
+ Union[
+ AllMessageValues,
+ GenericChatCompletionMessage,
+ ChatCompletionMessageToolCall,
+ ChatCompletionResponseMessage,
+ ]
+ ]:
+ """
+ Transform a Responses API input into a list of messages
+ """
+ messages: List[
+ Union[
+ AllMessageValues,
+ GenericChatCompletionMessage,
+ ChatCompletionMessageToolCall,
+ ChatCompletionResponseMessage,
+ ]
+ ] = []
+ if responses_api_request.get("instructions"):
+ messages.append(
+ LiteLLMCompletionResponsesConfig.transform_instructions_to_system_message(
+ responses_api_request.get("instructions")
+ )
+ )
+
+ if previous_response_id:
+ previous_response_pairs = (
+ RESPONSES_API_SESSION_HANDLER.get_chain_of_previous_input_output_pairs(
+ previous_response_id=previous_response_id
+ )
+ )
+ if previous_response_pairs:
+ for previous_response_pair in previous_response_pairs:
+ chat_completion_input_messages = LiteLLMCompletionResponsesConfig._transform_response_input_param_to_chat_completion_message(
+ input=previous_response_pair[0],
+ )
+ chat_completion_output_messages = LiteLLMCompletionResponsesConfig._transform_responses_api_outputs_to_chat_completion_messages(
+ responses_api_output=previous_response_pair[1],
+ )
+
+ messages.extend(chat_completion_input_messages)
+ messages.extend(chat_completion_output_messages)
+
+ messages.extend(
+ LiteLLMCompletionResponsesConfig._transform_response_input_param_to_chat_completion_message(
+ input=input,
+ )
+ )
+
+ return messages
+
+ @staticmethod
+ def _transform_response_input_param_to_chat_completion_message(
+ input: Union[str, ResponseInputParam],
+ ) -> List[
+ Union[
+ AllMessageValues,
+ GenericChatCompletionMessage,
+ ChatCompletionMessageToolCall,
+ ChatCompletionResponseMessage,
+ ]
+ ]:
+ """
+ Transform a ResponseInputParam into a Chat Completion message
+ """
+ messages: List[
+ Union[
+ AllMessageValues,
+ GenericChatCompletionMessage,
+ ChatCompletionMessageToolCall,
+ ChatCompletionResponseMessage,
+ ]
+ ] = []
+ tool_call_output_messages: List[
+ Union[
+ AllMessageValues,
+ GenericChatCompletionMessage,
+ ChatCompletionMessageToolCall,
+ ChatCompletionResponseMessage,
+ ]
+ ] = []
+
+ if isinstance(input, str):
+ messages.append(ChatCompletionUserMessage(role="user", content=input))
+ elif isinstance(input, list):
+ for _input in input:
+ chat_completion_messages = LiteLLMCompletionResponsesConfig._transform_responses_api_input_item_to_chat_completion_message(
+ input_item=_input
+ )
+ if LiteLLMCompletionResponsesConfig._is_input_item_tool_call_output(
+ input_item=_input
+ ):
+ tool_call_output_messages.extend(chat_completion_messages)
+ else:
+ messages.extend(chat_completion_messages)
+
+ messages.extend(tool_call_output_messages)
+ return messages
+
+ @staticmethod
+ def _ensure_tool_call_output_has_corresponding_tool_call(
+ messages: List[Union[AllMessageValues, GenericChatCompletionMessage]],
+ ) -> bool:
+ """
+ If any tool call output is present, ensure there is a corresponding tool call/tool_use block
+ """
+ for message in messages:
+ if message.get("role") == "tool":
+ return True
+ return False
+
+ @staticmethod
+ def _transform_responses_api_input_item_to_chat_completion_message(
+ input_item: Any,
+ ) -> List[
+ Union[
+ AllMessageValues,
+ GenericChatCompletionMessage,
+ ChatCompletionResponseMessage,
+ ]
+ ]:
+ """
+ Transform a Responses API input item into a Chat Completion message
+
+ - EasyInputMessageParam
+ - Message
+ - ResponseOutputMessageParam
+ - ResponseFileSearchToolCallParam
+ - ResponseComputerToolCallParam
+ - ComputerCallOutput
+ - ResponseFunctionWebSearchParam
+ - ResponseFunctionToolCallParam
+ - FunctionCallOutput
+ - ResponseReasoningItemParam
+ - ItemReference
+ """
+ if LiteLLMCompletionResponsesConfig._is_input_item_tool_call_output(input_item):
+ # handle executed tool call results
+ return LiteLLMCompletionResponsesConfig._transform_responses_api_tool_call_output_to_chat_completion_message(
+ tool_call_output=input_item
+ )
+ else:
+ return [
+ GenericChatCompletionMessage(
+ role=input_item.get("role") or "user",
+ content=LiteLLMCompletionResponsesConfig._transform_responses_api_content_to_chat_completion_content(
+ input_item.get("content")
+ ),
+ )
+ ]
+
+ @staticmethod
+ def _is_input_item_tool_call_output(input_item: Any) -> bool:
+ """
+ Check if the input item is a tool call output
+ """
+ return input_item.get("type") in [
+ "function_call_output",
+ "web_search_call",
+ "computer_call_output",
+ ]
+
+ @staticmethod
+ def _transform_responses_api_tool_call_output_to_chat_completion_message(
+ tool_call_output: Dict[str, Any],
+ ) -> List[
+ Union[
+ AllMessageValues,
+ GenericChatCompletionMessage,
+ ChatCompletionResponseMessage,
+ ]
+ ]:
+ """
+ ChatCompletionToolMessage is used to indicate the output from a tool call
+ """
+ tool_output_message = ChatCompletionToolMessage(
+ role="tool",
+ content=tool_call_output.get("output") or "",
+ tool_call_id=tool_call_output.get("call_id") or "",
+ )
+
+ _tool_use_definition = TOOL_CALLS_CACHE.get_cache(
+ key=tool_call_output.get("call_id") or "",
+ )
+ if _tool_use_definition:
+ """
+ Append the tool use definition to the list of messages
+
+
+ Providers like Anthropic require the tool use definition to be included with the tool output
+
+ - Input:
+ {'function':
+ arguments:'{"command": ["echo","\\n
\\n Hello\\n\\n\\n Hi
\\n\\n",">","index.html"]}',
+ name='shell',
+ 'id': 'toolu_018KFWsEySHjdKZPdUzXpymJ',
+ 'type': 'function'
+ }
+ - Output:
+ {
+ "id": "toolu_018KFWsEySHjdKZPdUzXpymJ",
+ "type": "function",
+ "function": {
+ "name": "get_weather",
+ "arguments": "{\"latitude\":48.8566,\"longitude\":2.3522}"
+ }
+ }
+
+ """
+ function: dict = _tool_use_definition.get("function") or {}
+ tool_call_chunk = ChatCompletionToolCallChunk(
+ id=_tool_use_definition.get("id") or "",
+ type=_tool_use_definition.get("type") or "function",
+ function=ChatCompletionToolCallFunctionChunk(
+ name=function.get("name") or "",
+ arguments=function.get("arguments") or "",
+ ),
+ index=0,
+ )
+ chat_completion_response_message = ChatCompletionResponseMessage(
+ tool_calls=[tool_call_chunk],
+ role="assistant",
+ )
+ return [chat_completion_response_message, tool_output_message]
+
+ return [tool_output_message]
+
+ @staticmethod
+ def _transform_responses_api_content_to_chat_completion_content(
+ content: Any,
+ ) -> Union[str, List[Union[str, Dict[str, Any]]]]:
+ """
+ Transform a Responses API content into a Chat Completion content
+ """
+
+ if isinstance(content, str):
+ return content
+ elif isinstance(content, list):
+ content_list: List[Union[str, Dict[str, Any]]] = []
+ for item in content:
+ if isinstance(item, str):
+ content_list.append(item)
+ elif isinstance(item, dict):
+ content_list.append(
+ {
+ "type": LiteLLMCompletionResponsesConfig._get_chat_completion_request_content_type(
+ item.get("type") or "text"
+ ),
+ "text": item.get("text"),
+ }
+ )
+ return content_list
+ else:
+ raise ValueError(f"Invalid content type: {type(content)}")
+
+ @staticmethod
+ def _get_chat_completion_request_content_type(content_type: str) -> str:
+ """
+ Get the Chat Completion request content type
+ """
+ # Responses API content has `input_` prefix, if it exists, remove it
+ if content_type.startswith("input_"):
+ return content_type[len("input_") :]
+ else:
+ return content_type
+
+ @staticmethod
+ def transform_instructions_to_system_message(
+ instructions: Optional[str],
+ ) -> ChatCompletionSystemMessage:
+ """
+ Transform a Instructions into a system message
+ """
+ return ChatCompletionSystemMessage(role="system", content=instructions or "")
+
+ @staticmethod
+ def transform_responses_api_tools_to_chat_completion_tools(
+ tools: Optional[List[FunctionToolParam]],
+ ) -> List[ChatCompletionToolParam]:
+ """
+ Transform a Responses API tools into a Chat Completion tools
+ """
+ if tools is None:
+ return []
+ chat_completion_tools: List[ChatCompletionToolParam] = []
+ for tool in tools:
+ chat_completion_tools.append(
+ ChatCompletionToolParam(
+ type="function",
+ function=ChatCompletionToolParamFunctionChunk(
+ name=tool["name"],
+ description=tool.get("description") or "",
+ parameters=tool.get("parameters", {}),
+ strict=tool.get("strict", False),
+ ),
+ )
+ )
+ return chat_completion_tools
+
+ @staticmethod
+ def transform_chat_completion_tools_to_responses_tools(
+ chat_completion_response: ModelResponse,
+ ) -> List[OutputFunctionToolCall]:
+ """
+ Transform a Chat Completion tools into a Responses API tools
+ """
+ all_chat_completion_tools: List[ChatCompletionMessageToolCall] = []
+ for choice in chat_completion_response.choices:
+ if isinstance(choice, Choices):
+ if choice.message.tool_calls:
+ all_chat_completion_tools.extend(choice.message.tool_calls)
+ for tool_call in choice.message.tool_calls:
+ TOOL_CALLS_CACHE.set_cache(
+ key=tool_call.id,
+ value=tool_call,
+ )
+
+ responses_tools: List[OutputFunctionToolCall] = []
+ for tool in all_chat_completion_tools:
+ if tool.type == "function":
+ function_definition = tool.function
+ responses_tools.append(
+ OutputFunctionToolCall(
+ name=function_definition.name or "",
+ arguments=function_definition.get("arguments") or "",
+ call_id=tool.id or "",
+ id=tool.id or "",
+ type="function_call", # critical this is "function_call" to work with tools like openai codex
+ status=function_definition.get("status") or "completed",
+ )
+ )
+ return responses_tools
+
+ @staticmethod
+ def transform_chat_completion_response_to_responses_api_response(
+ request_input: Union[str, ResponseInputParam],
+ responses_api_request: ResponsesAPIOptionalRequestParams,
+ chat_completion_response: ModelResponse,
+ ) -> ResponsesAPIResponse:
+ """
+ Transform a Chat Completion response into a Responses API response
+ """
+ responses_api_response: ResponsesAPIResponse = ResponsesAPIResponse(
+ id=chat_completion_response.id,
+ created_at=chat_completion_response.created,
+ model=chat_completion_response.model,
+ object=chat_completion_response.object,
+ error=getattr(chat_completion_response, "error", None),
+ incomplete_details=getattr(
+ chat_completion_response, "incomplete_details", None
+ ),
+ instructions=getattr(chat_completion_response, "instructions", None),
+ metadata=getattr(chat_completion_response, "metadata", {}),
+ output=LiteLLMCompletionResponsesConfig._transform_chat_completion_choices_to_responses_output(
+ chat_completion_response=chat_completion_response,
+ choices=getattr(chat_completion_response, "choices", []),
+ ),
+ parallel_tool_calls=getattr(
+ chat_completion_response, "parallel_tool_calls", False
+ ),
+ temperature=getattr(chat_completion_response, "temperature", 0),
+ tool_choice=getattr(chat_completion_response, "tool_choice", "auto"),
+ tools=getattr(chat_completion_response, "tools", []),
+ top_p=getattr(chat_completion_response, "top_p", None),
+ max_output_tokens=getattr(
+ chat_completion_response, "max_output_tokens", None
+ ),
+ previous_response_id=getattr(
+ chat_completion_response, "previous_response_id", None
+ ),
+ reasoning=Reasoning(),
+ status=getattr(chat_completion_response, "status", "completed"),
+ text=ResponseTextConfig(),
+ truncation=getattr(chat_completion_response, "truncation", None),
+ usage=LiteLLMCompletionResponsesConfig._transform_chat_completion_usage_to_responses_usage(
+ chat_completion_response=chat_completion_response
+ ),
+ user=getattr(chat_completion_response, "user", None),
+ )
+
+ RESPONSES_API_SESSION_HANDLER.add_completed_response_to_cache(
+ response_id=responses_api_response.id,
+ session_element=ResponsesAPISessionElement(
+ input=request_input,
+ output=responses_api_response,
+ response_id=responses_api_response.id,
+ previous_response_id=responses_api_request.get("previous_response_id"),
+ ),
+ )
+ return responses_api_response
+
+ @staticmethod
+ def _transform_chat_completion_choices_to_responses_output(
+ chat_completion_response: ModelResponse,
+ choices: List[Choices],
+ ) -> List[Union[GenericResponseOutputItem, OutputFunctionToolCall]]:
+ responses_output: List[
+ Union[GenericResponseOutputItem, OutputFunctionToolCall]
+ ] = []
+ for choice in choices:
+ responses_output.append(
+ GenericResponseOutputItem(
+ type="message",
+ id=chat_completion_response.id,
+ status=choice.finish_reason,
+ role=choice.message.role,
+ content=[
+ LiteLLMCompletionResponsesConfig._transform_chat_message_to_response_output_text(
+ choice.message
+ )
+ ],
+ )
+ )
+
+ tool_calls = LiteLLMCompletionResponsesConfig.transform_chat_completion_tools_to_responses_tools(
+ chat_completion_response=chat_completion_response
+ )
+ responses_output.extend(tool_calls)
+ return responses_output
+
+ @staticmethod
+ def _transform_responses_api_outputs_to_chat_completion_messages(
+ responses_api_output: ResponsesAPIResponse,
+ ) -> List[
+ Union[
+ AllMessageValues,
+ GenericChatCompletionMessage,
+ ChatCompletionMessageToolCall,
+ ]
+ ]:
+ messages: List[
+ Union[
+ AllMessageValues,
+ GenericChatCompletionMessage,
+ ChatCompletionMessageToolCall,
+ ]
+ ] = []
+ output_items = responses_api_output.output
+ for _output_item in output_items:
+ output_item: dict = dict(_output_item)
+ if output_item.get("type") == "function_call":
+ # handle function call output
+ messages.append(
+ LiteLLMCompletionResponsesConfig._transform_responses_output_tool_call_to_chat_completion_output_tool_call(
+ tool_call=output_item
+ )
+ )
+ else:
+ # transform as generic ResponseOutputItem
+ messages.append(
+ GenericChatCompletionMessage(
+ role=str(output_item.get("role")) or "user",
+ content=LiteLLMCompletionResponsesConfig._transform_responses_api_content_to_chat_completion_content(
+ output_item.get("content")
+ ),
+ )
+ )
+ return messages
+
+ @staticmethod
+ def _transform_responses_output_tool_call_to_chat_completion_output_tool_call(
+ tool_call: dict,
+ ) -> ChatCompletionMessageToolCall:
+ return ChatCompletionMessageToolCall(
+ id=tool_call.get("id") or "",
+ type="function",
+ function=Function(
+ name=tool_call.get("name") or "",
+ arguments=tool_call.get("arguments") or "",
+ ),
+ )
+
+ @staticmethod
+ def _transform_chat_message_to_response_output_text(
+ message: Message,
+ ) -> OutputText:
+ return OutputText(
+ type="output_text",
+ text=message.content,
+ annotations=LiteLLMCompletionResponsesConfig._transform_chat_completion_annotations_to_response_output_annotations(
+ annotations=getattr(message, "annotations", None)
+ ),
+ )
+
+ @staticmethod
+ def _transform_chat_completion_annotations_to_response_output_annotations(
+ annotations: Optional[List[ChatCompletionAnnotation]],
+ ) -> List[GenericResponseOutputItemContentAnnotation]:
+ response_output_annotations: List[
+ GenericResponseOutputItemContentAnnotation
+ ] = []
+
+ if annotations is None:
+ return response_output_annotations
+
+ for annotation in annotations:
+ annotation_type = annotation.get("type")
+ if annotation_type == "url_citation" and "url_citation" in annotation:
+ url_citation = annotation["url_citation"]
+ response_output_annotations.append(
+ GenericResponseOutputItemContentAnnotation(
+ type=annotation_type,
+ start_index=url_citation.get("start_index"),
+ end_index=url_citation.get("end_index"),
+ url=url_citation.get("url"),
+ title=url_citation.get("title"),
+ )
+ )
+ # Handle other annotation types here
+
+ return response_output_annotations
+
+ @staticmethod
+ def _transform_chat_completion_usage_to_responses_usage(
+ chat_completion_response: ModelResponse,
+ ) -> ResponseAPIUsage:
+ usage: Optional[Usage] = getattr(chat_completion_response, "usage", None)
+ if usage is None:
+ return ResponseAPIUsage(
+ input_tokens=0,
+ output_tokens=0,
+ total_tokens=0,
+ )
+ return ResponseAPIUsage(
+ input_tokens=usage.prompt_tokens,
+ output_tokens=usage.completion_tokens,
+ total_tokens=usage.total_tokens,
+ )
diff --git a/litellm/responses/main.py b/litellm/responses/main.py
index 70b651f376..e844d86716 100644
--- a/litellm/responses/main.py
+++ b/litellm/responses/main.py
@@ -10,6 +10,9 @@ from litellm.constants import request_timeout
from litellm.litellm_core_utils.litellm_logging import Logging as LiteLLMLoggingObj
from litellm.llms.base_llm.responses.transformation import BaseResponsesAPIConfig
from litellm.llms.custom_httpx.llm_http_handler import BaseLLMHTTPHandler
+from litellm.responses.litellm_completion_transformation.handler import (
+ LiteLLMCompletionTransformationHandler,
+)
from litellm.responses.utils import ResponsesAPIRequestUtils
from litellm.types.llms.openai import (
Reasoning,
@@ -29,6 +32,7 @@ from .streaming_iterator import BaseResponsesAPIStreamingIterator
####### ENVIRONMENT VARIABLES ###################
# Initialize any necessary instances or variables here
base_llm_http_handler = BaseLLMHTTPHandler()
+litellm_completion_transformation_handler = LiteLLMCompletionTransformationHandler()
#################################################
@@ -178,19 +182,12 @@ def responses(
)
# get provider config
- responses_api_provider_config: Optional[
- BaseResponsesAPIConfig
- ] = ProviderConfigManager.get_provider_responses_api_config(
- model=model,
- provider=litellm.LlmProviders(custom_llm_provider),
- )
-
- if responses_api_provider_config is None:
- raise litellm.BadRequestError(
+ responses_api_provider_config: Optional[BaseResponsesAPIConfig] = (
+ ProviderConfigManager.get_provider_responses_api_config(
model=model,
- llm_provider=custom_llm_provider,
- message=f"Responses API not available for custom_llm_provider={custom_llm_provider}, model: {model}",
+ provider=litellm.LlmProviders(custom_llm_provider),
)
+ )
local_vars.update(kwargs)
# Get ResponsesAPIOptionalRequestParams with only valid parameters
@@ -200,6 +197,17 @@ def responses(
)
)
+ if responses_api_provider_config is None:
+ return litellm_completion_transformation_handler.response_api_handler(
+ model=model,
+ input=input,
+ responses_api_request=response_api_optional_params,
+ custom_llm_provider=custom_llm_provider,
+ _is_async=_is_async,
+ stream=stream,
+ **kwargs,
+ )
+
# Get optional parameters for the responses API
responses_api_request_params: Dict = (
ResponsesAPIRequestUtils.get_optional_params_responses_api(
diff --git a/litellm/types/llms/base.py b/litellm/types/llms/base.py
new file mode 100644
index 0000000000..aec1438c48
--- /dev/null
+++ b/litellm/types/llms/base.py
@@ -0,0 +1,15 @@
+from pydantic import BaseModel
+
+
+class BaseLiteLLMOpenAIResponseObject(BaseModel):
+ def __getitem__(self, key):
+ return self.__dict__[key]
+
+ def get(self, key, default=None):
+ return self.__dict__.get(key, default)
+
+ def __contains__(self, key):
+ return key in self.__dict__
+
+ def items(self):
+ return self.__dict__.items()
diff --git a/litellm/types/llms/openai.py b/litellm/types/llms/openai.py
index 0cb05a710f..10766b65a6 100644
--- a/litellm/types/llms/openai.py
+++ b/litellm/types/llms/openai.py
@@ -49,9 +49,16 @@ from openai.types.responses.response_create_params import (
ToolChoice,
ToolParam,
)
+from openai.types.responses.response_function_tool_call import ResponseFunctionToolCall
from pydantic import BaseModel, Discriminator, Field, PrivateAttr
from typing_extensions import Annotated, Dict, Required, TypedDict, override
+from litellm.types.llms.base import BaseLiteLLMOpenAIResponseObject
+from litellm.types.responses.main import (
+ GenericResponseOutputItem,
+ OutputFunctionToolCall,
+)
+
FileContent = Union[IO[bytes], bytes, PathLike]
FileTypes = Union[
@@ -678,6 +685,11 @@ class ChatCompletionDeveloperMessage(OpenAIChatCompletionDeveloperMessage, total
cache_control: ChatCompletionCachedContent
+class GenericChatCompletionMessage(TypedDict, total=False):
+ role: Required[str]
+ content: Required[Union[str, List]]
+
+
ValidUserMessageContentTypes = [
"text",
"image_url",
@@ -803,12 +815,12 @@ class OpenAIChatCompletionChunk(ChatCompletionChunk):
class Hyperparameters(BaseModel):
batch_size: Optional[Union[str, int]] = None # "Number of examples in each batch."
- learning_rate_multiplier: Optional[
- Union[str, float]
- ] = None # Scaling factor for the learning rate
- n_epochs: Optional[
- Union[str, int]
- ] = None # "The number of epochs to train the model for"
+ learning_rate_multiplier: Optional[Union[str, float]] = (
+ None # Scaling factor for the learning rate
+ )
+ n_epochs: Optional[Union[str, int]] = (
+ None # "The number of epochs to train the model for"
+ )
class FineTuningJobCreate(BaseModel):
@@ -835,18 +847,18 @@ class FineTuningJobCreate(BaseModel):
model: str # "The name of the model to fine-tune."
training_file: str # "The ID of an uploaded file that contains training data."
- hyperparameters: Optional[
- Hyperparameters
- ] = None # "The hyperparameters used for the fine-tuning job."
- suffix: Optional[
- str
- ] = None # "A string of up to 18 characters that will be added to your fine-tuned model name."
- validation_file: Optional[
- str
- ] = None # "The ID of an uploaded file that contains validation data."
- integrations: Optional[
- List[str]
- ] = None # "A list of integrations to enable for your fine-tuning job."
+ hyperparameters: Optional[Hyperparameters] = (
+ None # "The hyperparameters used for the fine-tuning job."
+ )
+ suffix: Optional[str] = (
+ None # "A string of up to 18 characters that will be added to your fine-tuned model name."
+ )
+ validation_file: Optional[str] = (
+ None # "The ID of an uploaded file that contains validation data."
+ )
+ integrations: Optional[List[str]] = (
+ None # "A list of integrations to enable for your fine-tuning job."
+ )
seed: Optional[int] = None # "The seed controls the reproducibility of the job."
@@ -887,7 +899,7 @@ class ResponsesAPIOptionalRequestParams(TypedDict, total=False):
temperature: Optional[float]
text: Optional[ResponseTextConfigParam]
tool_choice: Optional[ToolChoice]
- tools: Optional[Iterable[ToolParam]]
+ tools: Optional[List[ToolParam]]
top_p: Optional[float]
truncation: Optional[Literal["auto", "disabled"]]
user: Optional[str]
@@ -900,20 +912,6 @@ class ResponsesAPIRequestParams(ResponsesAPIOptionalRequestParams, total=False):
model: str
-class BaseLiteLLMOpenAIResponseObject(BaseModel):
- def __getitem__(self, key):
- return self.__dict__[key]
-
- def get(self, key, default=None):
- return self.__dict__.get(key, default)
-
- def __contains__(self, key):
- return key in self.__dict__
-
- def items(self):
- return self.__dict__.items()
-
-
class OutputTokensDetails(BaseLiteLLMOpenAIResponseObject):
reasoning_tokens: Optional[int] = None
@@ -958,11 +956,14 @@ class ResponsesAPIResponse(BaseLiteLLMOpenAIResponseObject):
metadata: Optional[Dict]
model: Optional[str]
object: Optional[str]
- output: List[ResponseOutputItem]
+ output: Union[
+ List[ResponseOutputItem],
+ List[Union[GenericResponseOutputItem, OutputFunctionToolCall]],
+ ]
parallel_tool_calls: bool
temperature: Optional[float]
tool_choice: ToolChoice
- tools: List[Tool]
+ tools: Union[List[Tool], List[ResponseFunctionToolCall]]
top_p: Optional[float]
max_output_tokens: Optional[int]
previous_response_id: Optional[str]
diff --git a/litellm/types/responses/main.py b/litellm/types/responses/main.py
new file mode 100644
index 0000000000..63a548bbfd
--- /dev/null
+++ b/litellm/types/responses/main.py
@@ -0,0 +1,48 @@
+from typing import Literal
+
+from typing_extensions import Any, List, Optional, TypedDict
+
+from litellm.types.llms.base import BaseLiteLLMOpenAIResponseObject
+
+
+class GenericResponseOutputItemContentAnnotation(BaseLiteLLMOpenAIResponseObject):
+ """Annotation for content in a message"""
+
+ type: Optional[str]
+ start_index: Optional[int]
+ end_index: Optional[int]
+ url: Optional[str]
+ title: Optional[str]
+ pass
+
+
+class OutputText(BaseLiteLLMOpenAIResponseObject):
+ """Text output content from an assistant message"""
+
+ type: Optional[str] # "output_text"
+ text: Optional[str]
+ annotations: Optional[List[GenericResponseOutputItemContentAnnotation]]
+
+
+class OutputFunctionToolCall(BaseLiteLLMOpenAIResponseObject):
+ """A tool call to run a function"""
+
+ arguments: Optional[str]
+ call_id: Optional[str]
+ name: Optional[str]
+ type: Optional[str] # "function_call"
+ id: Optional[str]
+ status: Literal["in_progress", "completed", "incomplete"]
+
+
+class GenericResponseOutputItem(BaseLiteLLMOpenAIResponseObject):
+ """
+ Generic response API output item
+
+ """
+
+ type: str # "message"
+ id: str
+ status: str # "completed", "in_progress", etc.
+ role: str # "assistant", "user", etc.
+ content: List[OutputText]
diff --git a/tests/llm_responses_api_testing/base_responses_api.py b/tests/llm_responses_api_testing/base_responses_api.py
index 356fe5e78e..884d9bda7b 100644
--- a/tests/llm_responses_api_testing/base_responses_api.py
+++ b/tests/llm_responses_api_testing/base_responses_api.py
@@ -68,16 +68,16 @@ def validate_responses_api_response(response, final_chunk: bool = False):
"metadata": dict,
"model": str,
"object": str,
- "temperature": (int, float),
+ "temperature": (int, float, type(None)),
"tool_choice": (dict, str),
"tools": list,
- "top_p": (int, float),
+ "top_p": (int, float, type(None)),
"max_output_tokens": (int, type(None)),
"previous_response_id": (str, type(None)),
"reasoning": dict,
"status": str,
"text": ResponseTextConfig,
- "truncation": str,
+ "truncation": (str, type(None)),
"usage": ResponseAPIUsage,
"user": (str, type(None)),
}
diff --git a/tests/llm_responses_api_testing/test_anthropic_responses_api.py b/tests/llm_responses_api_testing/test_anthropic_responses_api.py
new file mode 100644
index 0000000000..0fcb771f73
--- /dev/null
+++ b/tests/llm_responses_api_testing/test_anthropic_responses_api.py
@@ -0,0 +1,95 @@
+import os
+import sys
+import pytest
+import asyncio
+from typing import Optional
+from unittest.mock import patch, AsyncMock
+
+sys.path.insert(0, os.path.abspath("../.."))
+import litellm
+from litellm.integrations.custom_logger import CustomLogger
+import json
+from litellm.types.utils import StandardLoggingPayload
+from litellm.types.llms.openai import (
+ ResponseCompletedEvent,
+ ResponsesAPIResponse,
+ ResponseTextConfig,
+ ResponseAPIUsage,
+ IncompleteDetails,
+)
+import litellm
+from litellm.llms.custom_httpx.http_handler import AsyncHTTPHandler
+from base_responses_api import BaseResponsesAPITest
+from openai.types.responses.function_tool import FunctionTool
+
+
+class TestAnthropicResponsesAPITest(BaseResponsesAPITest):
+ def get_base_completion_call_args(self):
+ #litellm._turn_on_debug()
+ return {
+ "model": "anthropic/claude-3-5-sonnet-latest",
+ }
+
+
+def test_multiturn_tool_calls():
+ # Test streaming response with tools for Anthropic
+ litellm._turn_on_debug()
+ shell_tool = dict(FunctionTool(
+ type="function",
+ name="shell",
+ description="Runs a shell command, and returns its output.",
+ parameters={
+ "type": "object",
+ "properties": {
+ "command": {"type": "array", "items": {"type": "string"}},
+ "workdir": {"type": "string", "description": "The working directory for the command."}
+ },
+ "required": ["command"]
+ },
+ strict=True
+ ))
+
+
+
+ # Step 1: Initial request with the tool
+ response = litellm.responses(
+ input=[{
+ 'role': 'user',
+ 'content': [
+ {'type': 'input_text', 'text': 'make a hello world html file'}
+ ],
+ 'type': 'message'
+ }],
+ model='anthropic/claude-3-7-sonnet-latest',
+ instructions='You are a helpful coding assistant.',
+ tools=[shell_tool]
+ )
+
+ print("response=", response)
+
+ # Step 2: Send the results of the tool call back to the model
+ # Get the response ID and tool call ID from the response
+
+ response_id = response.id
+ tool_call_id = ""
+ for item in response.output:
+ if 'type' in item and item['type'] == 'function_call':
+ tool_call_id = item['call_id']
+ break
+
+ # Use await with asyncio.run for the async function
+ follow_up_response = litellm.responses(
+ model='anthropic/claude-3-7-sonnet-latest',
+ previous_response_id=response_id,
+ input=[{
+ 'type': 'function_call_output',
+ 'call_id': tool_call_id,
+ 'output': '{"output":"\\n\\n Hello Page\\n\\n\\n Hi
\\n Welcome to this simple webpage!
\\n\\n > index.html\\n","metadata":{"exit_code":0,"duration_seconds":0}}'
+ }],
+ tools=[shell_tool]
+ )
+
+ print("follow_up_response=", follow_up_response)
+
+
+
\ No newline at end of file