diff --git a/docs/my-website/docs/response_api.md b/docs/my-website/docs/response_api.md
index 47b409ebfa..732b1706bb 100644
--- a/docs/my-website/docs/response_api.md
+++ b/docs/my-website/docs/response_api.md
@@ -14,22 +14,22 @@ LiteLLM provides a BETA endpoint in the spec of [OpenAI's `/responses` API](http
| Fallbacks | ✅ | Works between supported models |
| Loadbalancing | ✅ | Works between supported models |
| Supported LiteLLM Versions | 1.63.8+ | |
-| Supported LLM providers | `openai` | |
+| Supported LLM providers | **All LiteLLM supported providers** | `openai`, `anthropic`, `bedrock`, `vertex_ai`, `gemini`, `azure`, `azure_ai` etc. |
## Usage
-## Create a model response
+### LiteLLM Python SDK
-
+
#### Non-streaming
-```python showLineNumbers
+```python showLineNumbers title="OpenAI Non-streaming Response"
import litellm
# Non-streaming response
response = litellm.responses(
- model="o1-pro",
+ model="openai/o1-pro",
input="Tell me a three sentence bedtime story about a unicorn.",
max_output_tokens=100
)
@@ -38,12 +38,12 @@ print(response)
```
#### Streaming
-```python showLineNumbers
+```python showLineNumbers title="OpenAI Streaming Response"
import litellm
# Streaming response
response = litellm.responses(
- model="o1-pro",
+ model="openai/o1-pro",
input="Tell me a three sentence bedtime story about a unicorn.",
stream=True
)
@@ -53,58 +53,169 @@ for event in response:
```
-
-First, add this to your litellm proxy config.yaml:
-```yaml showLineNumbers
-model_list:
- - model_name: o1-pro
- litellm_params:
- model: openai/o1-pro
- api_key: os.environ/OPENAI_API_KEY
-```
-
-Start your LiteLLM proxy:
-```bash
-litellm --config /path/to/config.yaml
-
-# RUNNING on http://0.0.0.0:4000
-```
-
-Then use the OpenAI SDK pointed to your proxy:
+
#### Non-streaming
-```python showLineNumbers
-from openai import OpenAI
+```python showLineNumbers title="Anthropic Non-streaming Response"
+import litellm
+import os
-# Initialize client with your proxy URL
-client = OpenAI(
- base_url="http://localhost:4000", # Your proxy URL
- api_key="your-api-key" # Your proxy API key
-)
+# Set API key
+os.environ["ANTHROPIC_API_KEY"] = "your-anthropic-api-key"
# Non-streaming response
-response = client.responses.create(
- model="o1-pro",
- input="Tell me a three sentence bedtime story about a unicorn."
+response = litellm.responses(
+ model="anthropic/claude-3-5-sonnet-20240620",
+ input="Tell me a three sentence bedtime story about a unicorn.",
+ max_output_tokens=100
)
print(response)
```
#### Streaming
-```python showLineNumbers
-from openai import OpenAI
+```python showLineNumbers title="Anthropic Streaming Response"
+import litellm
+import os
-# Initialize client with your proxy URL
-client = OpenAI(
- base_url="http://localhost:4000", # Your proxy URL
- api_key="your-api-key" # Your proxy API key
-)
+# Set API key
+os.environ["ANTHROPIC_API_KEY"] = "your-anthropic-api-key"
# Streaming response
-response = client.responses.create(
- model="o1-pro",
+response = litellm.responses(
+ model="anthropic/claude-3-5-sonnet-20240620",
+ input="Tell me a three sentence bedtime story about a unicorn.",
+ stream=True
+)
+
+for event in response:
+ print(event)
+```
+
+
+
+
+
+#### Non-streaming
+```python showLineNumbers title="Vertex AI Non-streaming Response"
+import litellm
+import os
+
+# Set credentials - Vertex AI uses application default credentials
+# Run 'gcloud auth application-default login' to authenticate
+os.environ["VERTEXAI_PROJECT"] = "your-gcp-project-id"
+os.environ["VERTEXAI_LOCATION"] = "us-central1"
+
+# Non-streaming response
+response = litellm.responses(
+ model="vertex_ai/gemini-1.5-pro",
+ input="Tell me a three sentence bedtime story about a unicorn.",
+ max_output_tokens=100
+)
+
+print(response)
+```
+
+#### Streaming
+```python showLineNumbers title="Vertex AI Streaming Response"
+import litellm
+import os
+
+# Set credentials - Vertex AI uses application default credentials
+# Run 'gcloud auth application-default login' to authenticate
+os.environ["VERTEXAI_PROJECT"] = "your-gcp-project-id"
+os.environ["VERTEXAI_LOCATION"] = "us-central1"
+
+# Streaming response
+response = litellm.responses(
+ model="vertex_ai/gemini-1.5-pro",
+ input="Tell me a three sentence bedtime story about a unicorn.",
+ stream=True
+)
+
+for event in response:
+ print(event)
+```
+
+
+
+
+
+#### Non-streaming
+```python showLineNumbers title="AWS Bedrock Non-streaming Response"
+import litellm
+import os
+
+# Set AWS credentials
+os.environ["AWS_ACCESS_KEY_ID"] = "your-access-key-id"
+os.environ["AWS_SECRET_ACCESS_KEY"] = "your-secret-access-key"
+os.environ["AWS_REGION_NAME"] = "us-west-2" # or your AWS region
+
+# Non-streaming response
+response = litellm.responses(
+ model="bedrock/anthropic.claude-3-sonnet-20240229-v1:0",
+ input="Tell me a three sentence bedtime story about a unicorn.",
+ max_output_tokens=100
+)
+
+print(response)
+```
+
+#### Streaming
+```python showLineNumbers title="AWS Bedrock Streaming Response"
+import litellm
+import os
+
+# Set AWS credentials
+os.environ["AWS_ACCESS_KEY_ID"] = "your-access-key-id"
+os.environ["AWS_SECRET_ACCESS_KEY"] = "your-secret-access-key"
+os.environ["AWS_REGION_NAME"] = "us-west-2" # or your AWS region
+
+# Streaming response
+response = litellm.responses(
+ model="bedrock/anthropic.claude-3-sonnet-20240229-v1:0",
+ input="Tell me a three sentence bedtime story about a unicorn.",
+ stream=True
+)
+
+for event in response:
+ print(event)
+```
+
+
+
+
+
+#### Non-streaming
+```python showLineNumbers title="Google AI Studio Non-streaming Response"
+import litellm
+import os
+
+# Set API key for Google AI Studio
+os.environ["GEMINI_API_KEY"] = "your-gemini-api-key"
+
+# Non-streaming response
+response = litellm.responses(
+ model="gemini/gemini-1.5-flash",
+ input="Tell me a three sentence bedtime story about a unicorn.",
+ max_output_tokens=100
+)
+
+print(response)
+```
+
+#### Streaming
+```python showLineNumbers title="Google AI Studio Streaming Response"
+import litellm
+import os
+
+# Set API key for Google AI Studio
+os.environ["GEMINI_API_KEY"] = "your-gemini-api-key"
+
+# Streaming response
+response = litellm.responses(
+ model="gemini/gemini-1.5-flash",
input="Tell me a three sentence bedtime story about a unicorn.",
stream=True
)
@@ -116,10 +227,298 @@ for event in response:
+### LiteLLM Proxy with OpenAI SDK
+
+First, set up and start your LiteLLM proxy server.
+
+```bash title="Start LiteLLM Proxy Server"
+litellm --config /path/to/config.yaml
+
+# RUNNING on http://0.0.0.0:4000
+```
+
+
+
+
+First, add this to your litellm proxy config.yaml:
+```yaml showLineNumbers title="OpenAI Proxy Configuration"
+model_list:
+ - model_name: openai/o1-pro
+ litellm_params:
+ model: openai/o1-pro
+ api_key: os.environ/OPENAI_API_KEY
+```
+
+#### Non-streaming
+```python showLineNumbers title="OpenAI Proxy Non-streaming Response"
+from openai import OpenAI
+
+# Initialize client with your proxy URL
+client = OpenAI(
+ base_url="http://localhost:4000", # Your proxy URL
+ api_key="your-api-key" # Your proxy API key
+)
+
+# Non-streaming response
+response = client.responses.create(
+ model="openai/o1-pro",
+ input="Tell me a three sentence bedtime story about a unicorn."
+)
+
+print(response)
+```
+
+#### Streaming
+```python showLineNumbers title="OpenAI Proxy Streaming Response"
+from openai import OpenAI
+
+# Initialize client with your proxy URL
+client = OpenAI(
+ base_url="http://localhost:4000", # Your proxy URL
+ api_key="your-api-key" # Your proxy API key
+)
+
+# Streaming response
+response = client.responses.create(
+ model="openai/o1-pro",
+ input="Tell me a three sentence bedtime story about a unicorn.",
+ stream=True
+)
+
+for event in response:
+ print(event)
+```
+
+
+
+
+
+First, add this to your litellm proxy config.yaml:
+```yaml showLineNumbers title="Anthropic Proxy Configuration"
+model_list:
+ - model_name: anthropic/claude-3-5-sonnet-20240620
+ litellm_params:
+ model: anthropic/claude-3-5-sonnet-20240620
+ api_key: os.environ/ANTHROPIC_API_KEY
+```
+
+#### Non-streaming
+```python showLineNumbers title="Anthropic Proxy Non-streaming Response"
+from openai import OpenAI
+
+# Initialize client with your proxy URL
+client = OpenAI(
+ base_url="http://localhost:4000", # Your proxy URL
+ api_key="your-api-key" # Your proxy API key
+)
+
+# Non-streaming response
+response = client.responses.create(
+ model="anthropic/claude-3-5-sonnet-20240620",
+ input="Tell me a three sentence bedtime story about a unicorn."
+)
+
+print(response)
+```
+
+#### Streaming
+```python showLineNumbers title="Anthropic Proxy Streaming Response"
+from openai import OpenAI
+
+# Initialize client with your proxy URL
+client = OpenAI(
+ base_url="http://localhost:4000", # Your proxy URL
+ api_key="your-api-key" # Your proxy API key
+)
+
+# Streaming response
+response = client.responses.create(
+ model="anthropic/claude-3-5-sonnet-20240620",
+ input="Tell me a three sentence bedtime story about a unicorn.",
+ stream=True
+)
+
+for event in response:
+ print(event)
+```
+
+
+
+
+
+First, add this to your litellm proxy config.yaml:
+```yaml showLineNumbers title="Vertex AI Proxy Configuration"
+model_list:
+ - model_name: vertex_ai/gemini-1.5-pro
+ litellm_params:
+ model: vertex_ai/gemini-1.5-pro
+ vertex_project: your-gcp-project-id
+ vertex_location: us-central1
+```
+
+#### Non-streaming
+```python showLineNumbers title="Vertex AI Proxy Non-streaming Response"
+from openai import OpenAI
+
+# Initialize client with your proxy URL
+client = OpenAI(
+ base_url="http://localhost:4000", # Your proxy URL
+ api_key="your-api-key" # Your proxy API key
+)
+
+# Non-streaming response
+response = client.responses.create(
+ model="vertex_ai/gemini-1.5-pro",
+ input="Tell me a three sentence bedtime story about a unicorn."
+)
+
+print(response)
+```
+
+#### Streaming
+```python showLineNumbers title="Vertex AI Proxy Streaming Response"
+from openai import OpenAI
+
+# Initialize client with your proxy URL
+client = OpenAI(
+ base_url="http://localhost:4000", # Your proxy URL
+ api_key="your-api-key" # Your proxy API key
+)
+
+# Streaming response
+response = client.responses.create(
+ model="vertex_ai/gemini-1.5-pro",
+ input="Tell me a three sentence bedtime story about a unicorn.",
+ stream=True
+)
+
+for event in response:
+ print(event)
+```
+
+
+
+
+
+First, add this to your litellm proxy config.yaml:
+```yaml showLineNumbers title="AWS Bedrock Proxy Configuration"
+model_list:
+ - model_name: bedrock/anthropic.claude-3-sonnet-20240229-v1:0
+ litellm_params:
+ model: bedrock/anthropic.claude-3-sonnet-20240229-v1:0
+ aws_access_key_id: os.environ/AWS_ACCESS_KEY_ID
+ aws_secret_access_key: os.environ/AWS_SECRET_ACCESS_KEY
+ aws_region_name: us-west-2
+```
+
+#### Non-streaming
+```python showLineNumbers title="AWS Bedrock Proxy Non-streaming Response"
+from openai import OpenAI
+
+# Initialize client with your proxy URL
+client = OpenAI(
+ base_url="http://localhost:4000", # Your proxy URL
+ api_key="your-api-key" # Your proxy API key
+)
+
+# Non-streaming response
+response = client.responses.create(
+ model="bedrock/anthropic.claude-3-sonnet-20240229-v1:0",
+ input="Tell me a three sentence bedtime story about a unicorn."
+)
+
+print(response)
+```
+
+#### Streaming
+```python showLineNumbers title="AWS Bedrock Proxy Streaming Response"
+from openai import OpenAI
+
+# Initialize client with your proxy URL
+client = OpenAI(
+ base_url="http://localhost:4000", # Your proxy URL
+ api_key="your-api-key" # Your proxy API key
+)
+
+# Streaming response
+response = client.responses.create(
+ model="bedrock/anthropic.claude-3-sonnet-20240229-v1:0",
+ input="Tell me a three sentence bedtime story about a unicorn.",
+ stream=True
+)
+
+for event in response:
+ print(event)
+```
+
+
+
+
+
+First, add this to your litellm proxy config.yaml:
+```yaml showLineNumbers title="Google AI Studio Proxy Configuration"
+model_list:
+ - model_name: gemini/gemini-1.5-flash
+ litellm_params:
+ model: gemini/gemini-1.5-flash
+ api_key: os.environ/GEMINI_API_KEY
+```
+
+#### Non-streaming
+```python showLineNumbers title="Google AI Studio Proxy Non-streaming Response"
+from openai import OpenAI
+
+# Initialize client with your proxy URL
+client = OpenAI(
+ base_url="http://localhost:4000", # Your proxy URL
+ api_key="your-api-key" # Your proxy API key
+)
+
+# Non-streaming response
+response = client.responses.create(
+ model="gemini/gemini-1.5-flash",
+ input="Tell me a three sentence bedtime story about a unicorn."
+)
+
+print(response)
+```
+
+#### Streaming
+```python showLineNumbers title="Google AI Studio Proxy Streaming Response"
+from openai import OpenAI
+
+# Initialize client with your proxy URL
+client = OpenAI(
+ base_url="http://localhost:4000", # Your proxy URL
+ api_key="your-api-key" # Your proxy API key
+)
+
+# Streaming response
+response = client.responses.create(
+ model="gemini/gemini-1.5-flash",
+ input="Tell me a three sentence bedtime story about a unicorn.",
+ stream=True
+)
+
+for event in response:
+ print(event)
+```
+
+
+
+
+## Supported Responses API Parameters
+
+| Provider | Supported Parameters |
+|----------|---------------------|
+| `openai` | [All Responses API parameters are supported](https://github.com/BerriAI/litellm/blob/7c3df984da8e4dff9201e4c5353fdc7a2b441831/litellm/llms/openai/responses/transformation.py#L23) |
+| `azure` | [All Responses API parameters are supported](https://github.com/BerriAI/litellm/blob/7c3df984da8e4dff9201e4c5353fdc7a2b441831/litellm/llms/openai/responses/transformation.py#L23) |
+| `anthropic` | See supported parameters here |
+| `bedrock` | See supported parameters here |
+| `gemini` | See supported parameters here |
+| `vertex_ai` | See supported parameters here |
+| `azure_ai` | See supported parameters here |
+
+
-## **Supported Providers**
-| Provider | Link to Usage |
-|-------------|--------------------|
-| OpenAI| [Usage](#usage) |
-| Azure OpenAI| [Usage](../docs/providers/azure#responses-api) |
\ No newline at end of file
diff --git a/litellm/responses/litellm_completion_transformation/transformation.py b/litellm/responses/litellm_completion_transformation/transformation.py
index c00d6622bf..767778796b 100644
--- a/litellm/responses/litellm_completion_transformation/transformation.py
+++ b/litellm/responses/litellm_completion_transformation/transformation.py
@@ -53,6 +53,26 @@ RESPONSES_API_SESSION_HANDLER = SessionHandler()
class LiteLLMCompletionResponsesConfig:
+ @staticmethod
+ def get_supported_openai_params(model: str) -> list:
+ """
+ LiteLLM Adapter from OpenAI Responses API to Chat Completion API supports a subset of OpenAI Responses API params
+ """
+ return [
+ "input",
+ "model",
+ "instructions",
+ "max_output_tokens",
+ "metadata",
+ "parallel_tool_calls",
+ "previous_response_id",
+ "stream",
+ "temperature",
+ "tool_choice",
+ "tools",
+ "top_p",
+ "user",
+ ]
@staticmethod
def transform_responses_api_request_to_chat_completion_request(