diff --git a/docs/my-website/docs/tutorials/openai_codex.md b/docs/my-website/docs/tutorials/openai_codex.md new file mode 100644 index 0000000000..941f197b95 --- /dev/null +++ b/docs/my-website/docs/tutorials/openai_codex.md @@ -0,0 +1,139 @@ +import Image from '@theme/IdealImage'; +import Tabs from '@theme/Tabs'; +import TabItem from '@theme/TabItem'; + +# Using LiteLLM with OpenAI Codex + +This guide walks you through connecting OpenAI Codex to LiteLLM. Using LiteLLM with Codex allows teams to: +- Access 100+ LLMs through the Codex interface +- Use powerful models like Gemini through a familiar interface +- Track spend and usage with LiteLLM's built-in analytics +- Control model access with virtual keys + + + +## Quickstart + +Make sure to set up LiteLLM with the [LiteLLM Getting Started Guide](../proxy/docker_quick_start.md). + +## 1. Install OpenAI Codex + +Install the OpenAI Codex CLI tool globally using npm: + + + + +```bash showLineNumbers +npm i -g @openai/codex +``` + + + + +```bash showLineNumbers +yarn global add @openai/codex +``` + + + + +## 2. Start LiteLLM Proxy + + + + +```bash showLineNumbers +docker run \ + -v $(pwd)/litellm_config.yaml:/app/config.yaml \ + -p 4000:4000 \ + ghcr.io/berriai/litellm:main-latest \ + --config /app/config.yaml +``` + + + + +```bash showLineNumbers +litellm --config /path/to/config.yaml +``` + + + + +LiteLLM should now be running on [http://localhost:4000](http://localhost:4000) + +## 3. Configure LiteLLM for Model Routing + +Ensure your LiteLLM Proxy is properly configured to route to your desired models. Create a `litellm_config.yaml` file with the following content: + +```yaml showLineNumbers +model_list: + - model_name: o3-mini + litellm_params: + model: openai/o3-mini + api_key: os.environ/OPENAI_API_KEY + - model_name: claude-3-7-sonnet-latest + litellm_params: + model: anthropic/claude-3-7-sonnet-latest + api_key: os.environ/ANTHROPIC_API_KEY + - model_name: gemini-2.0-flash + litellm_params: + model: gemini/gemini-2.0-flash + api_key: os.environ/GEMINI_API_KEY + +litellm_settings: + drop_params: true +``` + +This configuration enables routing to specific OpenAI, Anthropic, and Gemini models with explicit names. + +## 4. Configure Codex to Use LiteLLM Proxy + +Set the required environment variables to point Codex to your LiteLLM Proxy: + +```bash +# Point to your LiteLLM Proxy server +export OPENAI_BASE_URL=http://0.0.0.0:4000 + +# Use your LiteLLM API key (if you've set up authentication) +export OPENAI_API_KEY="sk-1234" +``` + +## 5. Run Codex with Gemini + +With everything configured, you can now run Codex with Gemini: + +```bash showLineNumbers +codex --model gemini-flash --full-auto +``` + + + +The `--full-auto` flag allows Codex to automatically generate code without additional prompting. + +## 6. Advanced Options + +### Using Different Models + +You can use any model configured in your LiteLLM proxy: + +```bash +# Use Claude models +codex --model claude-3-7-sonnet-latest + +# Use Google AI Studio Gemini models +codex --model gemini/gemini-2.0-flash +``` + +## Troubleshooting + +- If you encounter connection issues, ensure your LiteLLM Proxy is running and accessible at the specified URL +- Verify your LiteLLM API key is valid if you're using authentication +- Check that your model routing configuration is correct +- For model-specific errors, ensure the model is properly configured in your LiteLLM setup + +## Additional Resources + +- [LiteLLM Docker Quick Start Guide](../proxy/docker_quick_start.md) +- [OpenAI Codex GitHub Repository](https://github.com/openai/codex) +- [LiteLLM Virtual Keys and Authentication](../proxy/virtual_keys.md) diff --git a/docs/my-website/img/litellm_codex.gif b/docs/my-website/img/litellm_codex.gif new file mode 100644 index 0000000000..04332b5053 Binary files /dev/null and b/docs/my-website/img/litellm_codex.gif differ diff --git a/docs/my-website/sidebars.js b/docs/my-website/sidebars.js index fdf2019cc2..bc9182305a 100644 --- a/docs/my-website/sidebars.js +++ b/docs/my-website/sidebars.js @@ -443,6 +443,7 @@ const sidebars = { label: "Tutorials", items: [ "tutorials/openweb_ui", + "tutorials/openai_codex", "tutorials/msft_sso", "tutorials/prompt_caching", "tutorials/tag_management", diff --git a/litellm/proxy/proxy_config.yaml b/litellm/proxy/proxy_config.yaml index 17658df903..d15cd9383e 100644 --- a/litellm/proxy/proxy_config.yaml +++ b/litellm/proxy/proxy_config.yaml @@ -1,6 +1,13 @@ model_list: - - model_name: fake-openai-endpoint + - model_name: openai/* litellm_params: - model: openai/fake - api_key: fake-key - api_base: https://exampleopenaiendpoint-production.up.railway.app/ + model: openai/* + - model_name: anthropic/* + litellm_params: + model: anthropic/* + - model_name: gemini/* + litellm_params: + model: gemini/* +litellm_settings: + drop_params: true + diff --git a/litellm/responses/litellm_completion_transformation/handler.py b/litellm/responses/litellm_completion_transformation/handler.py new file mode 100644 index 0000000000..3580fe5e44 --- /dev/null +++ b/litellm/responses/litellm_completion_transformation/handler.py @@ -0,0 +1,115 @@ +""" +Handler for transforming responses api requests to litellm.completion requests +""" + +from typing import Any, Coroutine, Optional, Union + +import litellm +from litellm.responses.litellm_completion_transformation.streaming_iterator import ( + LiteLLMCompletionStreamingIterator, +) +from litellm.responses.litellm_completion_transformation.transformation import ( + LiteLLMCompletionResponsesConfig, +) +from litellm.responses.streaming_iterator import BaseResponsesAPIStreamingIterator +from litellm.types.llms.openai import ( + ResponseInputParam, + ResponsesAPIOptionalRequestParams, + ResponsesAPIResponse, +) +from litellm.types.utils import ModelResponse + + +class LiteLLMCompletionTransformationHandler: + + def response_api_handler( + self, + model: str, + input: Union[str, ResponseInputParam], + responses_api_request: ResponsesAPIOptionalRequestParams, + custom_llm_provider: Optional[str] = None, + _is_async: bool = False, + stream: Optional[bool] = None, + **kwargs, + ) -> Union[ + ResponsesAPIResponse, + BaseResponsesAPIStreamingIterator, + Coroutine[ + Any, Any, Union[ResponsesAPIResponse, BaseResponsesAPIStreamingIterator] + ], + ]: + litellm_completion_request: dict = ( + LiteLLMCompletionResponsesConfig.transform_responses_api_request_to_chat_completion_request( + model=model, + input=input, + responses_api_request=responses_api_request, + custom_llm_provider=custom_llm_provider, + stream=stream, + **kwargs, + ) + ) + + if _is_async: + return self.async_response_api_handler( + litellm_completion_request=litellm_completion_request, + request_input=input, + responses_api_request=responses_api_request, + **kwargs, + ) + + litellm_completion_response: Union[ + ModelResponse, litellm.CustomStreamWrapper + ] = litellm.completion( + **litellm_completion_request, + **kwargs, + ) + + if isinstance(litellm_completion_response, ModelResponse): + responses_api_response: ResponsesAPIResponse = ( + LiteLLMCompletionResponsesConfig.transform_chat_completion_response_to_responses_api_response( + chat_completion_response=litellm_completion_response, + request_input=input, + responses_api_request=responses_api_request, + ) + ) + + return responses_api_response + + elif isinstance(litellm_completion_response, litellm.CustomStreamWrapper): + return LiteLLMCompletionStreamingIterator( + litellm_custom_stream_wrapper=litellm_completion_response, + request_input=input, + responses_api_request=responses_api_request, + ) + + async def async_response_api_handler( + self, + litellm_completion_request: dict, + request_input: Union[str, ResponseInputParam], + responses_api_request: ResponsesAPIOptionalRequestParams, + **kwargs, + ) -> Union[ResponsesAPIResponse, BaseResponsesAPIStreamingIterator]: + litellm_completion_response: Union[ + ModelResponse, litellm.CustomStreamWrapper + ] = await litellm.acompletion( + **litellm_completion_request, + **kwargs, + ) + + if isinstance(litellm_completion_response, ModelResponse): + responses_api_response: ResponsesAPIResponse = ( + LiteLLMCompletionResponsesConfig.transform_chat_completion_response_to_responses_api_response( + chat_completion_response=litellm_completion_response, + request_input=request_input, + responses_api_request=responses_api_request, + ) + ) + + return responses_api_response + + elif isinstance(litellm_completion_response, litellm.CustomStreamWrapper): + return LiteLLMCompletionStreamingIterator( + litellm_custom_stream_wrapper=litellm_completion_response, + request_input=request_input, + responses_api_request=responses_api_request, + ) diff --git a/litellm/responses/litellm_completion_transformation/session_handler.py b/litellm/responses/litellm_completion_transformation/session_handler.py new file mode 100644 index 0000000000..b114611c26 --- /dev/null +++ b/litellm/responses/litellm_completion_transformation/session_handler.py @@ -0,0 +1,59 @@ +""" +Responses API has previous_response_id, which is the id of the previous response. + +LiteLLM needs to maintain a cache of the previous response input, output, previous_response_id, and model. + +This class handles that cache. +""" + +from typing import List, Optional, Tuple, Union + +from typing_extensions import TypedDict + +from litellm.caching import InMemoryCache +from litellm.types.llms.openai import ResponseInputParam, ResponsesAPIResponse + +RESPONSES_API_PREVIOUS_RESPONSES_CACHE = InMemoryCache() +MAX_PREV_SESSION_INPUTS = 50 + + +class ResponsesAPISessionElement(TypedDict, total=False): + input: Union[str, ResponseInputParam] + output: ResponsesAPIResponse + response_id: str + previous_response_id: Optional[str] + + +class SessionHandler: + + def add_completed_response_to_cache( + self, response_id: str, session_element: ResponsesAPISessionElement + ): + RESPONSES_API_PREVIOUS_RESPONSES_CACHE.set_cache( + key=response_id, value=session_element + ) + + def get_chain_of_previous_input_output_pairs( + self, previous_response_id: str + ) -> List[Tuple[ResponseInputParam, ResponsesAPIResponse]]: + response_api_inputs: List[Tuple[ResponseInputParam, ResponsesAPIResponse]] = [] + current_previous_response_id = previous_response_id + + count_session_elements = 0 + while current_previous_response_id: + if count_session_elements > MAX_PREV_SESSION_INPUTS: + break + session_element = RESPONSES_API_PREVIOUS_RESPONSES_CACHE.get_cache( + key=current_previous_response_id + ) + if session_element: + response_api_inputs.append( + (session_element.get("input"), session_element.get("output")) + ) + current_previous_response_id = session_element.get( + "previous_response_id" + ) + else: + break + count_session_elements += 1 + return response_api_inputs diff --git a/litellm/responses/litellm_completion_transformation/streaming_iterator.py b/litellm/responses/litellm_completion_transformation/streaming_iterator.py new file mode 100644 index 0000000000..d970746f89 --- /dev/null +++ b/litellm/responses/litellm_completion_transformation/streaming_iterator.py @@ -0,0 +1,110 @@ +from typing import List, Optional, Union + +import litellm +from litellm.main import stream_chunk_builder +from litellm.responses.litellm_completion_transformation.transformation import ( + LiteLLMCompletionResponsesConfig, +) +from litellm.responses.streaming_iterator import ResponsesAPIStreamingIterator +from litellm.types.llms.openai import ( + ResponseCompletedEvent, + ResponseInputParam, + ResponsesAPIOptionalRequestParams, + ResponsesAPIStreamEvents, + ResponsesAPIStreamingResponse, +) +from litellm.types.utils import ( + ModelResponse, + ModelResponseStream, + TextCompletionResponse, +) + + +class LiteLLMCompletionStreamingIterator(ResponsesAPIStreamingIterator): + """ + Async iterator for processing streaming responses from the Responses API. + """ + + def __init__( + self, + litellm_custom_stream_wrapper: litellm.CustomStreamWrapper, + request_input: Union[str, ResponseInputParam], + responses_api_request: ResponsesAPIOptionalRequestParams, + ): + self.litellm_custom_stream_wrapper: litellm.CustomStreamWrapper = ( + litellm_custom_stream_wrapper + ) + self.request_input: Union[str, ResponseInputParam] = request_input + self.responses_api_request: ResponsesAPIOptionalRequestParams = ( + responses_api_request + ) + self.collected_chunks: List[ModelResponseStream] = [] + self.finished: bool = False + + async def __anext__( + self, + ) -> Union[ResponsesAPIStreamingResponse, ResponseCompletedEvent]: + try: + while True: + if self.finished is True: + raise StopAsyncIteration + # Get the next chunk from the stream + try: + chunk = await self.litellm_custom_stream_wrapper.__anext__() + self.collected_chunks.append(chunk) + except StopAsyncIteration: + self.finished = True + response_completed_event = self._emit_response_completed_event() + if response_completed_event: + return response_completed_event + else: + raise StopAsyncIteration + + except Exception as e: + # Handle HTTP errors + self.finished = True + raise e + + def __iter__(self): + return self + + def __next__( + self, + ) -> Union[ResponsesAPIStreamingResponse, ResponseCompletedEvent]: + try: + while True: + if self.finished is True: + raise StopAsyncIteration + # Get the next chunk from the stream + try: + chunk = self.litellm_custom_stream_wrapper.__next__() + self.collected_chunks.append(chunk) + except StopAsyncIteration: + self.finished = True + response_completed_event = self._emit_response_completed_event() + if response_completed_event: + return response_completed_event + else: + raise StopAsyncIteration + + except Exception as e: + # Handle HTTP errors + self.finished = True + raise e + + def _emit_response_completed_event(self) -> Optional[ResponseCompletedEvent]: + litellm_model_response: Optional[ + Union[ModelResponse, TextCompletionResponse] + ] = stream_chunk_builder(chunks=self.collected_chunks) + if litellm_model_response and isinstance(litellm_model_response, ModelResponse): + + return ResponseCompletedEvent( + type=ResponsesAPIStreamEvents.RESPONSE_COMPLETED, + response=LiteLLMCompletionResponsesConfig.transform_chat_completion_response_to_responses_api_response( + request_input=self.request_input, + chat_completion_response=litellm_model_response, + responses_api_request=self.responses_api_request, + ), + ) + else: + return None diff --git a/litellm/responses/litellm_completion_transformation/transformation.py b/litellm/responses/litellm_completion_transformation/transformation.py new file mode 100644 index 0000000000..b1e52eb8f3 --- /dev/null +++ b/litellm/responses/litellm_completion_transformation/transformation.py @@ -0,0 +1,631 @@ +""" +Handles transforming from Responses API -> LiteLLM completion (Chat Completion API) +""" + +from typing import Any, Dict, List, Optional, Union + +from openai.types.responses.tool_param import FunctionToolParam + +from litellm.caching import InMemoryCache +from litellm.responses.litellm_completion_transformation.session_handler import ( + ResponsesAPISessionElement, + SessionHandler, +) +from litellm.types.llms.openai import ( + AllMessageValues, + ChatCompletionResponseMessage, + ChatCompletionSystemMessage, + ChatCompletionToolCallChunk, + ChatCompletionToolCallFunctionChunk, + ChatCompletionToolMessage, + ChatCompletionToolParam, + ChatCompletionToolParamFunctionChunk, + ChatCompletionUserMessage, + GenericChatCompletionMessage, + Reasoning, + ResponseAPIUsage, + ResponseInputParam, + ResponsesAPIOptionalRequestParams, + ResponsesAPIResponse, + ResponseTextConfig, +) +from litellm.types.responses.main import ( + GenericResponseOutputItem, + GenericResponseOutputItemContentAnnotation, + OutputFunctionToolCall, + OutputText, +) +from litellm.types.utils import ( + ChatCompletionAnnotation, + ChatCompletionMessageToolCall, + Choices, + Function, + Message, + ModelResponse, + Usage, +) + +########### Initialize Classes used for Responses API ########### +TOOL_CALLS_CACHE = InMemoryCache() +RESPONSES_API_SESSION_HANDLER = SessionHandler() +########### End of Initialize Classes used for Responses API ########### + + +class LiteLLMCompletionResponsesConfig: + + @staticmethod + def transform_responses_api_request_to_chat_completion_request( + model: str, + input: Union[str, ResponseInputParam], + responses_api_request: ResponsesAPIOptionalRequestParams, + custom_llm_provider: Optional[str] = None, + stream: Optional[bool] = None, + **kwargs, + ) -> dict: + """ + Transform a Responses API request into a Chat Completion request + """ + litellm_completion_request: dict = { + "messages": LiteLLMCompletionResponsesConfig.transform_responses_api_input_to_messages( + input=input, + responses_api_request=responses_api_request, + previous_response_id=responses_api_request.get("previous_response_id"), + ), + "model": model, + "tool_choice": responses_api_request.get("tool_choice"), + "tools": LiteLLMCompletionResponsesConfig.transform_responses_api_tools_to_chat_completion_tools( + responses_api_request.get("tools") or [] # type: ignore + ), + "top_p": responses_api_request.get("top_p"), + "user": responses_api_request.get("user"), + "temperature": responses_api_request.get("temperature"), + "parallel_tool_calls": responses_api_request.get("parallel_tool_calls"), + "max_tokens": responses_api_request.get("max_output_tokens"), + "stream": stream, + "metadata": kwargs.get("metadata"), + "service_tier": kwargs.get("service_tier"), + # litellm specific params + "custom_llm_provider": custom_llm_provider, + } + + # only pass non-None values + litellm_completion_request = { + k: v for k, v in litellm_completion_request.items() if v is not None + } + + return litellm_completion_request + + @staticmethod + def transform_responses_api_input_to_messages( + input: Union[str, ResponseInputParam], + responses_api_request: ResponsesAPIOptionalRequestParams, + previous_response_id: Optional[str] = None, + ) -> List[ + Union[ + AllMessageValues, + GenericChatCompletionMessage, + ChatCompletionMessageToolCall, + ChatCompletionResponseMessage, + ] + ]: + """ + Transform a Responses API input into a list of messages + """ + messages: List[ + Union[ + AllMessageValues, + GenericChatCompletionMessage, + ChatCompletionMessageToolCall, + ChatCompletionResponseMessage, + ] + ] = [] + if responses_api_request.get("instructions"): + messages.append( + LiteLLMCompletionResponsesConfig.transform_instructions_to_system_message( + responses_api_request.get("instructions") + ) + ) + + if previous_response_id: + previous_response_pairs = ( + RESPONSES_API_SESSION_HANDLER.get_chain_of_previous_input_output_pairs( + previous_response_id=previous_response_id + ) + ) + if previous_response_pairs: + for previous_response_pair in previous_response_pairs: + chat_completion_input_messages = LiteLLMCompletionResponsesConfig._transform_response_input_param_to_chat_completion_message( + input=previous_response_pair[0], + ) + chat_completion_output_messages = LiteLLMCompletionResponsesConfig._transform_responses_api_outputs_to_chat_completion_messages( + responses_api_output=previous_response_pair[1], + ) + + messages.extend(chat_completion_input_messages) + messages.extend(chat_completion_output_messages) + + messages.extend( + LiteLLMCompletionResponsesConfig._transform_response_input_param_to_chat_completion_message( + input=input, + ) + ) + + return messages + + @staticmethod + def _transform_response_input_param_to_chat_completion_message( + input: Union[str, ResponseInputParam], + ) -> List[ + Union[ + AllMessageValues, + GenericChatCompletionMessage, + ChatCompletionMessageToolCall, + ChatCompletionResponseMessage, + ] + ]: + """ + Transform a ResponseInputParam into a Chat Completion message + """ + messages: List[ + Union[ + AllMessageValues, + GenericChatCompletionMessage, + ChatCompletionMessageToolCall, + ChatCompletionResponseMessage, + ] + ] = [] + tool_call_output_messages: List[ + Union[ + AllMessageValues, + GenericChatCompletionMessage, + ChatCompletionMessageToolCall, + ChatCompletionResponseMessage, + ] + ] = [] + + if isinstance(input, str): + messages.append(ChatCompletionUserMessage(role="user", content=input)) + elif isinstance(input, list): + for _input in input: + chat_completion_messages = LiteLLMCompletionResponsesConfig._transform_responses_api_input_item_to_chat_completion_message( + input_item=_input + ) + if LiteLLMCompletionResponsesConfig._is_input_item_tool_call_output( + input_item=_input + ): + tool_call_output_messages.extend(chat_completion_messages) + else: + messages.extend(chat_completion_messages) + + messages.extend(tool_call_output_messages) + return messages + + @staticmethod + def _ensure_tool_call_output_has_corresponding_tool_call( + messages: List[Union[AllMessageValues, GenericChatCompletionMessage]], + ) -> bool: + """ + If any tool call output is present, ensure there is a corresponding tool call/tool_use block + """ + for message in messages: + if message.get("role") == "tool": + return True + return False + + @staticmethod + def _transform_responses_api_input_item_to_chat_completion_message( + input_item: Any, + ) -> List[ + Union[ + AllMessageValues, + GenericChatCompletionMessage, + ChatCompletionResponseMessage, + ] + ]: + """ + Transform a Responses API input item into a Chat Completion message + + - EasyInputMessageParam + - Message + - ResponseOutputMessageParam + - ResponseFileSearchToolCallParam + - ResponseComputerToolCallParam + - ComputerCallOutput + - ResponseFunctionWebSearchParam + - ResponseFunctionToolCallParam + - FunctionCallOutput + - ResponseReasoningItemParam + - ItemReference + """ + if LiteLLMCompletionResponsesConfig._is_input_item_tool_call_output(input_item): + # handle executed tool call results + return LiteLLMCompletionResponsesConfig._transform_responses_api_tool_call_output_to_chat_completion_message( + tool_call_output=input_item + ) + else: + return [ + GenericChatCompletionMessage( + role=input_item.get("role") or "user", + content=LiteLLMCompletionResponsesConfig._transform_responses_api_content_to_chat_completion_content( + input_item.get("content") + ), + ) + ] + + @staticmethod + def _is_input_item_tool_call_output(input_item: Any) -> bool: + """ + Check if the input item is a tool call output + """ + return input_item.get("type") in [ + "function_call_output", + "web_search_call", + "computer_call_output", + ] + + @staticmethod + def _transform_responses_api_tool_call_output_to_chat_completion_message( + tool_call_output: Dict[str, Any], + ) -> List[ + Union[ + AllMessageValues, + GenericChatCompletionMessage, + ChatCompletionResponseMessage, + ] + ]: + """ + ChatCompletionToolMessage is used to indicate the output from a tool call + """ + tool_output_message = ChatCompletionToolMessage( + role="tool", + content=tool_call_output.get("output") or "", + tool_call_id=tool_call_output.get("call_id") or "", + ) + + _tool_use_definition = TOOL_CALLS_CACHE.get_cache( + key=tool_call_output.get("call_id") or "", + ) + if _tool_use_definition: + """ + Append the tool use definition to the list of messages + + + Providers like Anthropic require the tool use definition to be included with the tool output + + - Input: + {'function': + arguments:'{"command": ["echo","\\n\\n Hello\\n\\n\\n

Hi

\\n\\n",">","index.html"]}', + name='shell', + 'id': 'toolu_018KFWsEySHjdKZPdUzXpymJ', + 'type': 'function' + } + - Output: + { + "id": "toolu_018KFWsEySHjdKZPdUzXpymJ", + "type": "function", + "function": { + "name": "get_weather", + "arguments": "{\"latitude\":48.8566,\"longitude\":2.3522}" + } + } + + """ + function: dict = _tool_use_definition.get("function") or {} + tool_call_chunk = ChatCompletionToolCallChunk( + id=_tool_use_definition.get("id") or "", + type=_tool_use_definition.get("type") or "function", + function=ChatCompletionToolCallFunctionChunk( + name=function.get("name") or "", + arguments=function.get("arguments") or "", + ), + index=0, + ) + chat_completion_response_message = ChatCompletionResponseMessage( + tool_calls=[tool_call_chunk], + role="assistant", + ) + return [chat_completion_response_message, tool_output_message] + + return [tool_output_message] + + @staticmethod + def _transform_responses_api_content_to_chat_completion_content( + content: Any, + ) -> Union[str, List[Union[str, Dict[str, Any]]]]: + """ + Transform a Responses API content into a Chat Completion content + """ + + if isinstance(content, str): + return content + elif isinstance(content, list): + content_list: List[Union[str, Dict[str, Any]]] = [] + for item in content: + if isinstance(item, str): + content_list.append(item) + elif isinstance(item, dict): + content_list.append( + { + "type": LiteLLMCompletionResponsesConfig._get_chat_completion_request_content_type( + item.get("type") or "text" + ), + "text": item.get("text"), + } + ) + return content_list + else: + raise ValueError(f"Invalid content type: {type(content)}") + + @staticmethod + def _get_chat_completion_request_content_type(content_type: str) -> str: + """ + Get the Chat Completion request content type + """ + # Responses API content has `input_` prefix, if it exists, remove it + if content_type.startswith("input_"): + return content_type[len("input_") :] + else: + return content_type + + @staticmethod + def transform_instructions_to_system_message( + instructions: Optional[str], + ) -> ChatCompletionSystemMessage: + """ + Transform a Instructions into a system message + """ + return ChatCompletionSystemMessage(role="system", content=instructions or "") + + @staticmethod + def transform_responses_api_tools_to_chat_completion_tools( + tools: Optional[List[FunctionToolParam]], + ) -> List[ChatCompletionToolParam]: + """ + Transform a Responses API tools into a Chat Completion tools + """ + if tools is None: + return [] + chat_completion_tools: List[ChatCompletionToolParam] = [] + for tool in tools: + chat_completion_tools.append( + ChatCompletionToolParam( + type="function", + function=ChatCompletionToolParamFunctionChunk( + name=tool["name"], + description=tool.get("description") or "", + parameters=tool.get("parameters", {}), + strict=tool.get("strict", False), + ), + ) + ) + return chat_completion_tools + + @staticmethod + def transform_chat_completion_tools_to_responses_tools( + chat_completion_response: ModelResponse, + ) -> List[OutputFunctionToolCall]: + """ + Transform a Chat Completion tools into a Responses API tools + """ + all_chat_completion_tools: List[ChatCompletionMessageToolCall] = [] + for choice in chat_completion_response.choices: + if isinstance(choice, Choices): + if choice.message.tool_calls: + all_chat_completion_tools.extend(choice.message.tool_calls) + for tool_call in choice.message.tool_calls: + TOOL_CALLS_CACHE.set_cache( + key=tool_call.id, + value=tool_call, + ) + + responses_tools: List[OutputFunctionToolCall] = [] + for tool in all_chat_completion_tools: + if tool.type == "function": + function_definition = tool.function + responses_tools.append( + OutputFunctionToolCall( + name=function_definition.name or "", + arguments=function_definition.get("arguments") or "", + call_id=tool.id or "", + id=tool.id or "", + type="function_call", # critical this is "function_call" to work with tools like openai codex + status=function_definition.get("status") or "completed", + ) + ) + return responses_tools + + @staticmethod + def transform_chat_completion_response_to_responses_api_response( + request_input: Union[str, ResponseInputParam], + responses_api_request: ResponsesAPIOptionalRequestParams, + chat_completion_response: ModelResponse, + ) -> ResponsesAPIResponse: + """ + Transform a Chat Completion response into a Responses API response + """ + responses_api_response: ResponsesAPIResponse = ResponsesAPIResponse( + id=chat_completion_response.id, + created_at=chat_completion_response.created, + model=chat_completion_response.model, + object=chat_completion_response.object, + error=getattr(chat_completion_response, "error", None), + incomplete_details=getattr( + chat_completion_response, "incomplete_details", None + ), + instructions=getattr(chat_completion_response, "instructions", None), + metadata=getattr(chat_completion_response, "metadata", {}), + output=LiteLLMCompletionResponsesConfig._transform_chat_completion_choices_to_responses_output( + chat_completion_response=chat_completion_response, + choices=getattr(chat_completion_response, "choices", []), + ), + parallel_tool_calls=getattr( + chat_completion_response, "parallel_tool_calls", False + ), + temperature=getattr(chat_completion_response, "temperature", 0), + tool_choice=getattr(chat_completion_response, "tool_choice", "auto"), + tools=getattr(chat_completion_response, "tools", []), + top_p=getattr(chat_completion_response, "top_p", None), + max_output_tokens=getattr( + chat_completion_response, "max_output_tokens", None + ), + previous_response_id=getattr( + chat_completion_response, "previous_response_id", None + ), + reasoning=Reasoning(), + status=getattr(chat_completion_response, "status", "completed"), + text=ResponseTextConfig(), + truncation=getattr(chat_completion_response, "truncation", None), + usage=LiteLLMCompletionResponsesConfig._transform_chat_completion_usage_to_responses_usage( + chat_completion_response=chat_completion_response + ), + user=getattr(chat_completion_response, "user", None), + ) + + RESPONSES_API_SESSION_HANDLER.add_completed_response_to_cache( + response_id=responses_api_response.id, + session_element=ResponsesAPISessionElement( + input=request_input, + output=responses_api_response, + response_id=responses_api_response.id, + previous_response_id=responses_api_request.get("previous_response_id"), + ), + ) + return responses_api_response + + @staticmethod + def _transform_chat_completion_choices_to_responses_output( + chat_completion_response: ModelResponse, + choices: List[Choices], + ) -> List[Union[GenericResponseOutputItem, OutputFunctionToolCall]]: + responses_output: List[ + Union[GenericResponseOutputItem, OutputFunctionToolCall] + ] = [] + for choice in choices: + responses_output.append( + GenericResponseOutputItem( + type="message", + id=chat_completion_response.id, + status=choice.finish_reason, + role=choice.message.role, + content=[ + LiteLLMCompletionResponsesConfig._transform_chat_message_to_response_output_text( + choice.message + ) + ], + ) + ) + + tool_calls = LiteLLMCompletionResponsesConfig.transform_chat_completion_tools_to_responses_tools( + chat_completion_response=chat_completion_response + ) + responses_output.extend(tool_calls) + return responses_output + + @staticmethod + def _transform_responses_api_outputs_to_chat_completion_messages( + responses_api_output: ResponsesAPIResponse, + ) -> List[ + Union[ + AllMessageValues, + GenericChatCompletionMessage, + ChatCompletionMessageToolCall, + ] + ]: + messages: List[ + Union[ + AllMessageValues, + GenericChatCompletionMessage, + ChatCompletionMessageToolCall, + ] + ] = [] + output_items = responses_api_output.output + for _output_item in output_items: + output_item: dict = dict(_output_item) + if output_item.get("type") == "function_call": + # handle function call output + messages.append( + LiteLLMCompletionResponsesConfig._transform_responses_output_tool_call_to_chat_completion_output_tool_call( + tool_call=output_item + ) + ) + else: + # transform as generic ResponseOutputItem + messages.append( + GenericChatCompletionMessage( + role=str(output_item.get("role")) or "user", + content=LiteLLMCompletionResponsesConfig._transform_responses_api_content_to_chat_completion_content( + output_item.get("content") + ), + ) + ) + return messages + + @staticmethod + def _transform_responses_output_tool_call_to_chat_completion_output_tool_call( + tool_call: dict, + ) -> ChatCompletionMessageToolCall: + return ChatCompletionMessageToolCall( + id=tool_call.get("id") or "", + type="function", + function=Function( + name=tool_call.get("name") or "", + arguments=tool_call.get("arguments") or "", + ), + ) + + @staticmethod + def _transform_chat_message_to_response_output_text( + message: Message, + ) -> OutputText: + return OutputText( + type="output_text", + text=message.content, + annotations=LiteLLMCompletionResponsesConfig._transform_chat_completion_annotations_to_response_output_annotations( + annotations=getattr(message, "annotations", None) + ), + ) + + @staticmethod + def _transform_chat_completion_annotations_to_response_output_annotations( + annotations: Optional[List[ChatCompletionAnnotation]], + ) -> List[GenericResponseOutputItemContentAnnotation]: + response_output_annotations: List[ + GenericResponseOutputItemContentAnnotation + ] = [] + + if annotations is None: + return response_output_annotations + + for annotation in annotations: + annotation_type = annotation.get("type") + if annotation_type == "url_citation" and "url_citation" in annotation: + url_citation = annotation["url_citation"] + response_output_annotations.append( + GenericResponseOutputItemContentAnnotation( + type=annotation_type, + start_index=url_citation.get("start_index"), + end_index=url_citation.get("end_index"), + url=url_citation.get("url"), + title=url_citation.get("title"), + ) + ) + # Handle other annotation types here + + return response_output_annotations + + @staticmethod + def _transform_chat_completion_usage_to_responses_usage( + chat_completion_response: ModelResponse, + ) -> ResponseAPIUsage: + usage: Optional[Usage] = getattr(chat_completion_response, "usage", None) + if usage is None: + return ResponseAPIUsage( + input_tokens=0, + output_tokens=0, + total_tokens=0, + ) + return ResponseAPIUsage( + input_tokens=usage.prompt_tokens, + output_tokens=usage.completion_tokens, + total_tokens=usage.total_tokens, + ) diff --git a/litellm/responses/main.py b/litellm/responses/main.py index 70b651f376..e844d86716 100644 --- a/litellm/responses/main.py +++ b/litellm/responses/main.py @@ -10,6 +10,9 @@ from litellm.constants import request_timeout from litellm.litellm_core_utils.litellm_logging import Logging as LiteLLMLoggingObj from litellm.llms.base_llm.responses.transformation import BaseResponsesAPIConfig from litellm.llms.custom_httpx.llm_http_handler import BaseLLMHTTPHandler +from litellm.responses.litellm_completion_transformation.handler import ( + LiteLLMCompletionTransformationHandler, +) from litellm.responses.utils import ResponsesAPIRequestUtils from litellm.types.llms.openai import ( Reasoning, @@ -29,6 +32,7 @@ from .streaming_iterator import BaseResponsesAPIStreamingIterator ####### ENVIRONMENT VARIABLES ################### # Initialize any necessary instances or variables here base_llm_http_handler = BaseLLMHTTPHandler() +litellm_completion_transformation_handler = LiteLLMCompletionTransformationHandler() ################################################# @@ -178,19 +182,12 @@ def responses( ) # get provider config - responses_api_provider_config: Optional[ - BaseResponsesAPIConfig - ] = ProviderConfigManager.get_provider_responses_api_config( - model=model, - provider=litellm.LlmProviders(custom_llm_provider), - ) - - if responses_api_provider_config is None: - raise litellm.BadRequestError( + responses_api_provider_config: Optional[BaseResponsesAPIConfig] = ( + ProviderConfigManager.get_provider_responses_api_config( model=model, - llm_provider=custom_llm_provider, - message=f"Responses API not available for custom_llm_provider={custom_llm_provider}, model: {model}", + provider=litellm.LlmProviders(custom_llm_provider), ) + ) local_vars.update(kwargs) # Get ResponsesAPIOptionalRequestParams with only valid parameters @@ -200,6 +197,17 @@ def responses( ) ) + if responses_api_provider_config is None: + return litellm_completion_transformation_handler.response_api_handler( + model=model, + input=input, + responses_api_request=response_api_optional_params, + custom_llm_provider=custom_llm_provider, + _is_async=_is_async, + stream=stream, + **kwargs, + ) + # Get optional parameters for the responses API responses_api_request_params: Dict = ( ResponsesAPIRequestUtils.get_optional_params_responses_api( diff --git a/litellm/types/llms/base.py b/litellm/types/llms/base.py new file mode 100644 index 0000000000..aec1438c48 --- /dev/null +++ b/litellm/types/llms/base.py @@ -0,0 +1,15 @@ +from pydantic import BaseModel + + +class BaseLiteLLMOpenAIResponseObject(BaseModel): + def __getitem__(self, key): + return self.__dict__[key] + + def get(self, key, default=None): + return self.__dict__.get(key, default) + + def __contains__(self, key): + return key in self.__dict__ + + def items(self): + return self.__dict__.items() diff --git a/litellm/types/llms/openai.py b/litellm/types/llms/openai.py index 0cb05a710f..10766b65a6 100644 --- a/litellm/types/llms/openai.py +++ b/litellm/types/llms/openai.py @@ -49,9 +49,16 @@ from openai.types.responses.response_create_params import ( ToolChoice, ToolParam, ) +from openai.types.responses.response_function_tool_call import ResponseFunctionToolCall from pydantic import BaseModel, Discriminator, Field, PrivateAttr from typing_extensions import Annotated, Dict, Required, TypedDict, override +from litellm.types.llms.base import BaseLiteLLMOpenAIResponseObject +from litellm.types.responses.main import ( + GenericResponseOutputItem, + OutputFunctionToolCall, +) + FileContent = Union[IO[bytes], bytes, PathLike] FileTypes = Union[ @@ -678,6 +685,11 @@ class ChatCompletionDeveloperMessage(OpenAIChatCompletionDeveloperMessage, total cache_control: ChatCompletionCachedContent +class GenericChatCompletionMessage(TypedDict, total=False): + role: Required[str] + content: Required[Union[str, List]] + + ValidUserMessageContentTypes = [ "text", "image_url", @@ -803,12 +815,12 @@ class OpenAIChatCompletionChunk(ChatCompletionChunk): class Hyperparameters(BaseModel): batch_size: Optional[Union[str, int]] = None # "Number of examples in each batch." - learning_rate_multiplier: Optional[ - Union[str, float] - ] = None # Scaling factor for the learning rate - n_epochs: Optional[ - Union[str, int] - ] = None # "The number of epochs to train the model for" + learning_rate_multiplier: Optional[Union[str, float]] = ( + None # Scaling factor for the learning rate + ) + n_epochs: Optional[Union[str, int]] = ( + None # "The number of epochs to train the model for" + ) class FineTuningJobCreate(BaseModel): @@ -835,18 +847,18 @@ class FineTuningJobCreate(BaseModel): model: str # "The name of the model to fine-tune." training_file: str # "The ID of an uploaded file that contains training data." - hyperparameters: Optional[ - Hyperparameters - ] = None # "The hyperparameters used for the fine-tuning job." - suffix: Optional[ - str - ] = None # "A string of up to 18 characters that will be added to your fine-tuned model name." - validation_file: Optional[ - str - ] = None # "The ID of an uploaded file that contains validation data." - integrations: Optional[ - List[str] - ] = None # "A list of integrations to enable for your fine-tuning job." + hyperparameters: Optional[Hyperparameters] = ( + None # "The hyperparameters used for the fine-tuning job." + ) + suffix: Optional[str] = ( + None # "A string of up to 18 characters that will be added to your fine-tuned model name." + ) + validation_file: Optional[str] = ( + None # "The ID of an uploaded file that contains validation data." + ) + integrations: Optional[List[str]] = ( + None # "A list of integrations to enable for your fine-tuning job." + ) seed: Optional[int] = None # "The seed controls the reproducibility of the job." @@ -887,7 +899,7 @@ class ResponsesAPIOptionalRequestParams(TypedDict, total=False): temperature: Optional[float] text: Optional[ResponseTextConfigParam] tool_choice: Optional[ToolChoice] - tools: Optional[Iterable[ToolParam]] + tools: Optional[List[ToolParam]] top_p: Optional[float] truncation: Optional[Literal["auto", "disabled"]] user: Optional[str] @@ -900,20 +912,6 @@ class ResponsesAPIRequestParams(ResponsesAPIOptionalRequestParams, total=False): model: str -class BaseLiteLLMOpenAIResponseObject(BaseModel): - def __getitem__(self, key): - return self.__dict__[key] - - def get(self, key, default=None): - return self.__dict__.get(key, default) - - def __contains__(self, key): - return key in self.__dict__ - - def items(self): - return self.__dict__.items() - - class OutputTokensDetails(BaseLiteLLMOpenAIResponseObject): reasoning_tokens: Optional[int] = None @@ -958,11 +956,14 @@ class ResponsesAPIResponse(BaseLiteLLMOpenAIResponseObject): metadata: Optional[Dict] model: Optional[str] object: Optional[str] - output: List[ResponseOutputItem] + output: Union[ + List[ResponseOutputItem], + List[Union[GenericResponseOutputItem, OutputFunctionToolCall]], + ] parallel_tool_calls: bool temperature: Optional[float] tool_choice: ToolChoice - tools: List[Tool] + tools: Union[List[Tool], List[ResponseFunctionToolCall]] top_p: Optional[float] max_output_tokens: Optional[int] previous_response_id: Optional[str] diff --git a/litellm/types/responses/main.py b/litellm/types/responses/main.py new file mode 100644 index 0000000000..63a548bbfd --- /dev/null +++ b/litellm/types/responses/main.py @@ -0,0 +1,48 @@ +from typing import Literal + +from typing_extensions import Any, List, Optional, TypedDict + +from litellm.types.llms.base import BaseLiteLLMOpenAIResponseObject + + +class GenericResponseOutputItemContentAnnotation(BaseLiteLLMOpenAIResponseObject): + """Annotation for content in a message""" + + type: Optional[str] + start_index: Optional[int] + end_index: Optional[int] + url: Optional[str] + title: Optional[str] + pass + + +class OutputText(BaseLiteLLMOpenAIResponseObject): + """Text output content from an assistant message""" + + type: Optional[str] # "output_text" + text: Optional[str] + annotations: Optional[List[GenericResponseOutputItemContentAnnotation]] + + +class OutputFunctionToolCall(BaseLiteLLMOpenAIResponseObject): + """A tool call to run a function""" + + arguments: Optional[str] + call_id: Optional[str] + name: Optional[str] + type: Optional[str] # "function_call" + id: Optional[str] + status: Literal["in_progress", "completed", "incomplete"] + + +class GenericResponseOutputItem(BaseLiteLLMOpenAIResponseObject): + """ + Generic response API output item + + """ + + type: str # "message" + id: str + status: str # "completed", "in_progress", etc. + role: str # "assistant", "user", etc. + content: List[OutputText] diff --git a/tests/llm_responses_api_testing/base_responses_api.py b/tests/llm_responses_api_testing/base_responses_api.py index 356fe5e78e..884d9bda7b 100644 --- a/tests/llm_responses_api_testing/base_responses_api.py +++ b/tests/llm_responses_api_testing/base_responses_api.py @@ -68,16 +68,16 @@ def validate_responses_api_response(response, final_chunk: bool = False): "metadata": dict, "model": str, "object": str, - "temperature": (int, float), + "temperature": (int, float, type(None)), "tool_choice": (dict, str), "tools": list, - "top_p": (int, float), + "top_p": (int, float, type(None)), "max_output_tokens": (int, type(None)), "previous_response_id": (str, type(None)), "reasoning": dict, "status": str, "text": ResponseTextConfig, - "truncation": str, + "truncation": (str, type(None)), "usage": ResponseAPIUsage, "user": (str, type(None)), } diff --git a/tests/llm_responses_api_testing/test_anthropic_responses_api.py b/tests/llm_responses_api_testing/test_anthropic_responses_api.py new file mode 100644 index 0000000000..0fcb771f73 --- /dev/null +++ b/tests/llm_responses_api_testing/test_anthropic_responses_api.py @@ -0,0 +1,95 @@ +import os +import sys +import pytest +import asyncio +from typing import Optional +from unittest.mock import patch, AsyncMock + +sys.path.insert(0, os.path.abspath("../..")) +import litellm +from litellm.integrations.custom_logger import CustomLogger +import json +from litellm.types.utils import StandardLoggingPayload +from litellm.types.llms.openai import ( + ResponseCompletedEvent, + ResponsesAPIResponse, + ResponseTextConfig, + ResponseAPIUsage, + IncompleteDetails, +) +import litellm +from litellm.llms.custom_httpx.http_handler import AsyncHTTPHandler +from base_responses_api import BaseResponsesAPITest +from openai.types.responses.function_tool import FunctionTool + + +class TestAnthropicResponsesAPITest(BaseResponsesAPITest): + def get_base_completion_call_args(self): + #litellm._turn_on_debug() + return { + "model": "anthropic/claude-3-5-sonnet-latest", + } + + +def test_multiturn_tool_calls(): + # Test streaming response with tools for Anthropic + litellm._turn_on_debug() + shell_tool = dict(FunctionTool( + type="function", + name="shell", + description="Runs a shell command, and returns its output.", + parameters={ + "type": "object", + "properties": { + "command": {"type": "array", "items": {"type": "string"}}, + "workdir": {"type": "string", "description": "The working directory for the command."} + }, + "required": ["command"] + }, + strict=True + )) + + + + # Step 1: Initial request with the tool + response = litellm.responses( + input=[{ + 'role': 'user', + 'content': [ + {'type': 'input_text', 'text': 'make a hello world html file'} + ], + 'type': 'message' + }], + model='anthropic/claude-3-7-sonnet-latest', + instructions='You are a helpful coding assistant.', + tools=[shell_tool] + ) + + print("response=", response) + + # Step 2: Send the results of the tool call back to the model + # Get the response ID and tool call ID from the response + + response_id = response.id + tool_call_id = "" + for item in response.output: + if 'type' in item and item['type'] == 'function_call': + tool_call_id = item['call_id'] + break + + # Use await with asyncio.run for the async function + follow_up_response = litellm.responses( + model='anthropic/claude-3-7-sonnet-latest', + previous_response_id=response_id, + input=[{ + 'type': 'function_call_output', + 'call_id': tool_call_id, + 'output': '{"output":"\\n\\n Hello Page\\n\\n\\n

Hi

\\n

Welcome to this simple webpage!

\\n\\n > index.html\\n","metadata":{"exit_code":0,"duration_seconds":0}}' + }], + tools=[shell_tool] + ) + + print("follow_up_response=", follow_up_response) + + + \ No newline at end of file