2025-04-25 10:44:24 +00:00
182 changed files with 748 additions and 5272 deletions
--- a/deploy/charts/litellm-helm/templates/migrations-job.yaml
+++ b/deploy/charts/litellm-helm/templates/migrations-job.yaml
@ -16,7 +16,6 @@ spec:
        {{- toYaml . | nindent 8 }}
        {{- end }}
    spec:
-      serviceAccountName: {{ include "litellm.serviceAccountName" . }}
      containers:
        - name: prisma-migrations
          image: "{{ .Values.image.repository }}:{{ .Values.image.tag | default (printf "main-%s" .Chart.AppVersion) }}"
--- a/docs/my-website/docs/completion/audio.md
+++ b/docs/my-website/docs/completion/audio.md
@ -3,7 +3,7 @@ import TabItem from '@theme/TabItem';

 # Using Audio Models

-How to send / receive audio to a `/chat/completions` endpoint
+How to send / receieve audio to a `/chat/completions` endpoint


 ## Audio Output from a model
--- a/docs/my-website/docs/completion/document_understanding.md
+++ b/docs/my-website/docs/completion/document_understanding.md
@ -3,7 +3,7 @@ import TabItem from '@theme/TabItem';

 # Using PDF Input

-How to send / receive pdf's (other document types) to a `/chat/completions` endpoint
+How to send / receieve pdf's (other document types) to a `/chat/completions` endpoint

 Works for:
 - Vertex AI models (Gemini + Anthropic)
--- a/docs/my-website/docs/completion/vision.md
+++ b/docs/my-website/docs/completion/vision.md
@ -194,7 +194,7 @@ Expected Response

 ## Explicitly specify image type 

-If you have images without a mime-type, or if litellm is incorrectly inferring the mime type of your image (e.g. calling `gs://` url's with vertex ai), you can set this explicitly via the `format` param. 
+If you have images without a mime-type, or if litellm is incorrectly inferring the mime type of your image (e.g. calling `gs://` url's with vertex ai), you can set this explicity via the `format` param. 

 ```python
 "image_url": {
--- a/docs/my-website/docs/image_generation.md
+++ b/docs/my-website/docs/image_generation.md
@ -20,9 +20,9 @@ print(f"response: {response}")

 ```yaml
 model_list:
-  - model_name: gpt-image-1 ### RECEIVED MODEL NAME ###
+  - model_name: dall-e-2 ### RECEIVED MODEL NAME ###
    litellm_params: # all params accepted by litellm.image_generation()
-      model: azure/gpt-image-1 ### MODEL NAME sent to `litellm.image_generation()` ###
+      model: azure/dall-e-2 ### MODEL NAME sent to `litellm.image_generation()` ###
      api_base: https://my-endpoint-europe-berri-992.openai.azure.com/
      api_key: "os.environ/AZURE_API_KEY_EU" # does os.getenv("AZURE_API_KEY_EU")
      rpm: 6      # [OPTIONAL] Rate limit for this deployment: in requests per minute (rpm)
@ -47,7 +47,7 @@ curl -X POST 'http://0.0.0.0:4000/v1/images/generations' \
 -H 'Content-Type: application/json' \
 -H 'Authorization: Bearer sk-1234' \
 -D '{
-    "model": "gpt-image-1",
+    "model": "dall-e-2",
    "prompt": "A cute baby sea otter",
    "n": 1,
    "size": "1024x1024"
@ -104,7 +104,7 @@ Any non-openai params, will be treated as provider-specific params, and sent in
    litellm_logging_obj=None,
    custom_llm_provider=None,

- `model`: *string (optional)* The model to use for image generation. Defaults to openai/gpt-image-1
+- `model`: *string (optional)* The model to use for image generation. Defaults to openai/dall-e-2

 - `n`: *int (optional)* The number of images to generate. Must be between 1 and 10. For dall-e-3, only n=1 is supported.

@ -112,7 +112,7 @@ Any non-openai params, will be treated as provider-specific params, and sent in

 - `response_format`: *string (optional)* The format in which the generated images are returned. Must be one of url or b64_json.

- `size`: *string (optional)* The size of the generated images. Must be one of 256x256, 512x512, or 1024x1024 for gpt-image-1. Must be one of 1024x1024, 1792x1024, or 1024x1792 for dall-e-3 models.
+- `size`: *string (optional)* The size of the generated images. Must be one of 256x256, 512x512, or 1024x1024 for dall-e-2. Must be one of 1024x1024, 1792x1024, or 1024x1792 for dall-e-3 models.

 - `timeout`: *integer* - The maximum time, in seconds, to wait for the API to respond. Defaults to 600 seconds (10 minutes).

@ -148,14 +148,13 @@ Any non-openai params, will be treated as provider-specific params, and sent in
 from litellm import image_generation
 import os
 os.environ['OPENAI_API_KEY'] = ""
-response = image_generation(model='gpt-image-1', prompt="cute baby otter")
+response = image_generation(model='dall-e-2', prompt="cute baby otter")
 ```

 | Model Name           | Function Call                               | Required OS Variables                |
 |----------------------|---------------------------------------------|--------------------------------------|
-| gpt-image-1 | `image_generation(model='gpt-image-1', prompt="cute baby otter")` | `os.environ['OPENAI_API_KEY']`       |
-| dall-e-3 | `image_generation(model='dall-e-3', prompt="cute baby otter")` | `os.environ['OPENAI_API_KEY']`       |
 | dall-e-2 | `image_generation(model='dall-e-2', prompt="cute baby otter")` | `os.environ['OPENAI_API_KEY']`       |
+| dall-e-3 | `image_generation(model='dall-e-3', prompt="cute baby otter")` | `os.environ['OPENAI_API_KEY']`       |

 ## Azure OpenAI Image Generation Models

@ -183,9 +182,8 @@ print(response)

 | Model Name           | Function Call                               |
 |----------------------|---------------------------------------------|
-| gpt-image-1 | `image_generation(model="azure/<your deployment name>", prompt="cute baby otter")` |
-| dall-e-3 | `image_generation(model="azure/<your deployment name>", prompt="cute baby otter")` |
 | dall-e-2 | `image_generation(model="azure/<your deployment name>", prompt="cute baby otter")` |
+| dall-e-3 | `image_generation(model="azure/<your deployment name>", prompt="cute baby otter")` |


 ## OpenAI Compatible Image Generation Models
--- a/docs/my-website/docs/observability/agentops_integration.md
+++ b/docs/my-website/docs/observability/agentops_integration.md
@ -1,83 +0,0 @@
-# 🖇️ AgentOps - LLM Observability Platform
-
-:::tip
-
-This is community maintained. Please make an issue if you run into a bug:
-https://github.com/BerriAI/litellm
-
-:::
-
-[AgentOps](https://docs.agentops.ai) is an observability platform that enables tracing and monitoring of LLM calls, providing detailed insights into your AI operations.
-
-## Using AgentOps with LiteLLM
-
-LiteLLM provides `success_callbacks` and `failure_callbacks`, allowing you to easily integrate AgentOps for comprehensive tracing and monitoring of your LLM operations.
-
-### Integration
-
-Use just a few lines of code to instantly trace your responses **across all providers** with AgentOps:
-Get your AgentOps API Keys from https://app.agentops.ai/
-```python
-import litellm
-
-# Configure LiteLLM to use AgentOps
-litellm.success_callback = ["agentops"]
-
-# Make your LLM calls as usual
-response = litellm.completion(
-    model="gpt-3.5-turbo",
-    messages=[{"role": "user", "content": "Hello, how are you?"}],
-)
-```
-
-Complete Code:
-
-```python
-import os
-from litellm import completion
-
-# Set env variables
-os.environ["OPENAI_API_KEY"] = "your-openai-key"
-os.environ["AGENTOPS_API_KEY"] = "your-agentops-api-key"
-
-# Configure LiteLLM to use AgentOps
-litellm.success_callback = ["agentops"]
-
-# OpenAI call
-response = completion(
-    model="gpt-4",
-    messages=[{"role": "user", "content": "Hi 👋 - I'm OpenAI"}],
-)
-
-print(response)
-```
-
-### Configuration Options
-
-The AgentOps integration can be configured through environment variables:
-
- `AGENTOPS_API_KEY` (str, optional): Your AgentOps API key
- `AGENTOPS_ENVIRONMENT` (str, optional): Deployment environment (defaults to "production")
- `AGENTOPS_SERVICE_NAME` (str, optional): Service name for tracing (defaults to "agentops")
-
-### Advanced Usage
-
-You can configure additional settings through environment variables:
-
-```python
-import os
-
-# Configure AgentOps settings
-os.environ["AGENTOPS_API_KEY"] = "your-agentops-api-key"
-os.environ["AGENTOPS_ENVIRONMENT"] = "staging"
-os.environ["AGENTOPS_SERVICE_NAME"] = "my-service"
-
-# Enable AgentOps tracing
-litellm.success_callback = ["agentops"]
-```
-
-### Support
-
-For issues or questions, please refer to:
- [AgentOps Documentation](https://docs.agentops.ai)
- [LiteLLM Documentation](https://docs.litellm.ai) 
--- a/docs/my-website/docs/observability/greenscale_integration.md
+++ b/docs/my-website/docs/observability/greenscale_integration.md
@ -53,7 +53,7 @@ response = completion(

 ## Additional information in metadata

-You can send any additional information to Greenscale by using the `metadata` field in completion and `greenscale_` prefix. This can be useful for sending metadata about the request, such as the project and application name, customer_id, environment, or any other information you want to track usage. `greenscale_project` and `greenscale_application` are required fields.
+You can send any additional information to Greenscale by using the `metadata` field in completion and `greenscale_` prefix. This can be useful for sending metadata about the request, such as the project and application name, customer_id, enviornment, or any other information you want to track usage. `greenscale_project` and `greenscale_application` are required fields.

 ```python
 #openai call with additional metadata
--- a/docs/my-website/docs/observability/langfuse_integration.md
+++ b/docs/my-website/docs/observability/langfuse_integration.md
@ -185,7 +185,7 @@ curl --location --request POST 'http://0.0.0.0:4000/chat/completions' \
 * `trace_release`  - Release for the trace, defaults to `None`
 * `trace_metadata` - Metadata for the trace, defaults to `None`
 * `trace_user_id`  - User identifier for the trace, defaults to completion argument `user`
-* `tags`           - Tags for the trace, defaults to `None`
+* `tags`           - Tags for the trace, defeaults to `None`

 ##### Updatable Parameters on Continuation

--- a/docs/my-website/docs/pass_through/vertex_ai.md
+++ b/docs/my-website/docs/pass_through/vertex_ai.md
@ -222,7 +222,7 @@ curl http://localhost:4000/vertex-ai/v1/projects/${PROJECT_ID}/locations/us-cent

 LiteLLM Proxy Server supports two methods of authentication to Vertex AI:

-1. Pass Vertex Credentials client side to proxy server
+1. Pass Vertex Credetials client side to proxy server

 2. Set Vertex AI credentials on proxy server

--- a/docs/my-website/docs/providers/anthropic.md
+++ b/docs/my-website/docs/providers/anthropic.md
@ -1095,7 +1095,7 @@ response = completion(
 print(response.choices[0])
 ```
 </TabItem>
-<TabItem value="proxy" label="PROXY">
+<TabItem value="proxy" lable="PROXY">

 1. Add model to config 

--- a/docs/my-website/docs/providers/azure.md
+++ b/docs/my-website/docs/providers/azure.md
@ -483,7 +483,7 @@ response.stream_to_file(speech_file_path)
 This is a walkthrough on how to use Azure Active Directory Tokens - Microsoft Entra ID to make `litellm.completion()` calls 

 Step 1 - Download Azure CLI 
-Installation instructions: https://learn.microsoft.com/en-us/cli/azure/install-azure-cli
+Installation instructons: https://learn.microsoft.com/en-us/cli/azure/install-azure-cli
 ```shell
 brew update && brew install azure-cli
 ```
--- a/docs/my-website/docs/providers/gemini.md
+++ b/docs/my-website/docs/providers/gemini.md
@ -655,7 +655,7 @@ import os

 os.environ["GEMINI_API_KEY"] = ".."

-tools = [{"googleSearch": {}}] # 👈 ADD GOOGLE SEARCH
+tools = [{"googleSearchRetrieval": {}}] # 👈 ADD GOOGLE SEARCH

 response = completion(
    model="gemini/gemini-2.0-flash",
@ -691,7 +691,7 @@ curl -X POST 'http://0.0.0.0:4000/chat/completions' \
 -d '{
  "model": "gemini-2.0-flash",
  "messages": [{"role": "user", "content": "What is the weather in San Francisco?"}],
-  "tools": [{"googleSearch": {}}]
+  "tools": [{"googleSearchRetrieval": {}}]
 }
 '
 ```
--- a/docs/my-website/docs/providers/vertex.md
+++ b/docs/my-website/docs/providers/vertex.md
@ -364,7 +364,7 @@ from litellm import completion
 ## SETUP ENVIRONMENT
 # !gcloud auth application-default login - run this to add vertex credentials to your env

-tools = [{"googleSearch": {}}] # 👈 ADD GOOGLE SEARCH
+tools = [{"googleSearchRetrieval": {}}] # 👈 ADD GOOGLE SEARCH

 resp = litellm.completion(
                    model="vertex_ai/gemini-1.0-pro-001",
@ -391,7 +391,7 @@ client = OpenAI(
 response = client.chat.completions.create(
    model="gemini-pro",
    messages=[{"role": "user", "content": "Who won the world cup?"}],
-    tools=[{"googleSearch": {}}],
+    tools=[{"googleSearchRetrieval": {}}],
 )

 print(response)
@ -410,7 +410,7 @@ curl http://localhost:4000/v1/chat/completions \
    ],
   "tools": [
        {
-            "googleSearch": {} 
+            "googleSearchRetrieval": {} 
        }
    ]
  }'
@ -529,7 +529,7 @@ from litellm import completion

 # !gcloud auth application-default login - run this to add vertex credentials to your env

-tools = [{"googleSearch": {"disable_attributon": False}}] # 👈 ADD GOOGLE SEARCH
+tools = [{"googleSearchRetrieval": {"disable_attributon": False}}] # 👈 ADD GOOGLE SEARCH

 resp = litellm.completion(
                    model="vertex_ai/gemini-1.0-pro-001",
@ -692,7 +692,7 @@ curl http://0.0.0.0:4000/v1/chat/completions \

 ### **Context Caching**

-Use Vertex AI context caching is supported by calling provider api directly. (Unified Endpoint support coming soon.).
+Use Vertex AI context caching is supported by calling provider api directly. (Unified Endpoint support comin soon.).

 [**Go straight to provider**](../pass_through/vertex_ai.md#context-caching)

@ -910,7 +910,7 @@ export VERTEXAI_PROJECT="my-test-project" # ONLY use if model project is differe


 ## Specifying Safety Settings 
-In certain use-cases you may need to make calls to the models and pass [safety settings](https://ai.google.dev/docs/safety_setting_gemini) different from the defaults. To do so, simple pass the `safety_settings` argument to `completion` or `acompletion`. For example:
+In certain use-cases you may need to make calls to the models and pass [safety settigns](https://ai.google.dev/docs/safety_setting_gemini) different from the defaults. To do so, simple pass the `safety_settings` argument to `completion` or `acompletion`. For example:

 ### Set per model/request

@ -2050,7 +2050,7 @@ response = completion(
 print(response.choices[0])
 ```
 </TabItem>
-<TabItem value="proxy" label="PROXY">
+<TabItem value="proxy" lable="PROXY">

 1. Add model to config 

--- a/docs/my-website/docs/proxy/admin_ui_sso.md
+++ b/docs/my-website/docs/proxy/admin_ui_sso.md
@ -243,12 +243,12 @@ We allow you to pass a local image or a an http/https url of your image

 Set `UI_LOGO_PATH` on your env. We recommend using a hosted image, it's a lot easier to set up and configure / debug

-Example setting Hosted image
+Exaple setting Hosted image
 ```shell
 UI_LOGO_PATH="https://litellm-logo-aws-marketplace.s3.us-west-2.amazonaws.com/berriai-logo-github.png"
 ```

-Example setting a local image (on your container)
+Exaple setting a local image (on your container)
 ```shell
 UI_LOGO_PATH="ui_images/logo.jpg"
 ```
--- a/docs/my-website/docs/proxy/alerting.md
+++ b/docs/my-website/docs/proxy/alerting.md
@ -213,7 +213,7 @@ model_list:
 general_settings: 
  master_key: sk-1234
  alerting: ["slack"]
-  alerting_threshold: 0.0001 # (Seconds) set an artificially low threshold for testing alerting
+  alerting_threshold: 0.0001 # (Seconds) set an artifically low threshold for testing alerting
  alert_to_webhook_url: {
    "llm_exceptions": "https://hooks.slack.com/services/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",
    "llm_too_slow": "https://hooks.slack.com/services/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",
@ -247,7 +247,7 @@ model_list:
 general_settings: 
  master_key: sk-1234
  alerting: ["slack"]
-  alerting_threshold: 0.0001 # (Seconds) set an artificially low threshold for testing alerting
+  alerting_threshold: 0.0001 # (Seconds) set an artifically low threshold for testing alerting
  alert_to_webhook_url: {
    "llm_exceptions": ["os.environ/SLACK_WEBHOOK_URL", "os.environ/SLACK_WEBHOOK_URL_2"],
    "llm_too_slow": ["https://webhook.site/7843a980-a494-4967-80fb-d502dbc16886", "https://webhook.site/28cfb179-f4fb-4408-8129-729ff55cf213"],
@ -425,7 +425,7 @@ curl -X GET --location 'http://0.0.0.0:4000/health/services?service=webhook' \
 - `projected_exceeded_date` *str or null*: The date when the budget is projected to be exceeded, returned when 'soft_budget' is set for key (optional).
 - `projected_spend` *float or null*: The projected spend amount, returned when 'soft_budget' is set for key (optional).
 - `event` *Literal["budget_crossed", "threshold_crossed", "projected_limit_exceeded"]*: The type of event that triggered the webhook. Possible values are:
-    * "spend_tracked": Emitted whenever spend is tracked for a customer id. 
+    * "spend_tracked": Emitted whenver spend is tracked for a customer id. 
    * "budget_crossed": Indicates that the spend has exceeded the max budget.
    * "threshold_crossed": Indicates that spend has crossed a threshold (currently sent when 85% and 95% of budget is reached).
    * "projected_limit_exceeded": For "key" only - Indicates that the projected spend is expected to exceed the soft budget threshold.
@ -480,7 +480,7 @@ LLM-related Alerts
 | `cooldown_deployment` | Alerts when a deployment is put into cooldown | ✅ |
 | `new_model_added` | Notifications when a new model is added to litellm proxy through /model/new| ✅ |
 | `outage_alerts` | Alerts when a specific LLM deployment is facing an outage | ✅ |
-| `region_outage_alerts` | Alerts when a specific LLM region is facing an outage. Example us-east-1 | ✅ |
+| `region_outage_alerts` | Alerts when a specfic LLM region is facing an outage. Example us-east-1 | ✅ |

 Budget and Spend Alerts

--- a/docs/my-website/docs/proxy/config_settings.md
+++ b/docs/my-website/docs/proxy/config_settings.md
@ -299,9 +299,6 @@ router_settings:
 |------|-------------|
 | ACTIONS_ID_TOKEN_REQUEST_TOKEN | Token for requesting ID in GitHub Actions
 | ACTIONS_ID_TOKEN_REQUEST_URL | URL for requesting ID token in GitHub Actions
-| AGENTOPS_ENVIRONMENT | Environment for AgentOps logging integration
-| AGENTOPS_API_KEY | API Key for AgentOps logging integration
-| AGENTOPS_SERVICE_NAME | Service Name for AgentOps logging integration
 | AISPEND_ACCOUNT_ID | Account ID for AI Spend
 | AISPEND_API_KEY | API Key for AI Spend
 | ALLOWED_EMAIL_DOMAINS | List of email domains allowed for access
--- a/docs/my-website/docs/proxy/custom_pricing.md
+++ b/docs/my-website/docs/proxy/custom_pricing.md
@ -56,7 +56,7 @@ model_list:
      model: azure/<your_deployment_name>
      api_key: os.environ/AZURE_API_KEY
      api_base: os.environ/AZURE_API_BASE
-      api_version: os.environ/AZURE_API_VERSION
+      api_version: os.envrion/AZURE_API_VERSION
    model_info:
      input_cost_per_token: 0.000421 # 👈 ONLY to track cost per token
      output_cost_per_token: 0.000520 # 👈 ONLY to track cost per token
--- a/docs/my-website/docs/proxy/db_deadlocks.md
+++ b/docs/my-website/docs/proxy/db_deadlocks.md
@ -19,7 +19,7 @@ LiteLLM writes `UPDATE` and `UPSERT` queries to the DB. When using 10+ instances

 ### Stage 1. Each instance writes updates to redis

-Each instance will accumulate the spend updates for a key, user, team, etc and write the updates to a redis queue. 
+Each instance will accumlate the spend updates for a key, user, team, etc and write the updates to a redis queue. 

 <Image img={require('../../img/deadlock_fix_1.png')}  style={{ width: '900px', height: 'auto' }} />
 <p style={{textAlign: 'left', color: '#666'}}>
--- a/docs/my-website/docs/proxy/deploy.md
+++ b/docs/my-website/docs/proxy/deploy.md
@ -22,7 +22,7 @@ echo 'LITELLM_MASTER_KEY="sk-1234"' > .env

 # Add the litellm salt key - you cannot change this after adding a model
 # It is used to encrypt / decrypt your LLM API Key credentials
-# We recommend - https://1password.com/password-generator/ 
+# We recommned - https://1password.com/password-generator/ 
 # password generator to get a random hash for litellm salt key
 echo 'LITELLM_SALT_KEY="sk-1234"' >> .env

@ -125,7 +125,7 @@ CMD ["--port", "4000", "--config", "config.yaml", "--detailed_debug"]

 ### Build from litellm `pip` package

-Follow these instructions to build a docker container from the litellm pip package. If your company has a strict requirement around security / building images you can follow these steps.
+Follow these instructons to build a docker container from the litellm pip package. If your company has a strict requirement around security / building images you can follow these steps.

 Dockerfile 

@ -999,7 +999,7 @@ services:
      - "4000:4000" # Map the container port to the host, change the host port if necessary
    volumes:
      - ./litellm-config.yaml:/app/config.yaml # Mount the local configuration file
-    # You can change the port or number of workers as per your requirements or pass any new supported CLI argument. Make sure the port passed here matches with the container port defined above in `ports` value
+    # You can change the port or number of workers as per your requirements or pass any new supported CLI augument. Make sure the port passed here matches with the container port defined above in `ports` value
    command: [ "--config", "/app/config.yaml", "--port", "4000", "--num_workers", "8" ]

 # ...rest of your docker-compose config if any
--- a/docs/my-website/docs/proxy/enterprise.md
+++ b/docs/my-website/docs/proxy/enterprise.md
@ -691,7 +691,7 @@ curl --request POST \
 <TabItem value="admin_only_routes" label="Test `admin_only_routes`">


-**Successful Request**
+**Successfull Request**

 ```shell
 curl --location 'http://0.0.0.0:4000/key/generate' \
@ -729,7 +729,7 @@ curl --location 'http://0.0.0.0:4000/key/generate' \
 <TabItem value="allowed_routes" label="Test `allowed_routes`">


-**Successful Request**
+**Successfull Request**

 ```shell
 curl http://localhost:4000/chat/completions \
--- a/docs/my-website/docs/proxy/guardrails/quick_start.md
+++ b/docs/my-website/docs/proxy/guardrails/quick_start.md
@ -164,7 +164,7 @@ curl -i http://localhost:4000/v1/chat/completions \

 **Expected response**

-Your response headers will include `x-litellm-applied-guardrails` with the guardrail applied 
+Your response headers will incude `x-litellm-applied-guardrails` with the guardrail applied 

 ```
 x-litellm-applied-guardrails: aporia-pre-guard
--- a/docs/my-website/docs/proxy/logging.md
+++ b/docs/my-website/docs/proxy/logging.md
@ -277,7 +277,7 @@ Found under `kwargs["standard_logging_object"]`. This is a standard payload, log

 ## Langfuse

-We will use the `--config` to set `litellm.success_callback = ["langfuse"]` this will log all successful LLM calls to langfuse. Make sure to set `LANGFUSE_PUBLIC_KEY` and `LANGFUSE_SECRET_KEY` in your environment
+We will use the `--config` to set `litellm.success_callback = ["langfuse"]` this will log all successfull LLM calls to langfuse. Make sure to set `LANGFUSE_PUBLIC_KEY` and `LANGFUSE_SECRET_KEY` in your environment

 **Step 1** Install langfuse

@ -535,8 +535,8 @@ print(response)
 Use this if you want to control which LiteLLM-specific fields are logged as tags by the LiteLLM proxy. By default LiteLLM Proxy logs no LiteLLM-specific fields

 | LiteLLM specific field               | Description                                           | Example Value                                       |
-|---------------------------|-----------------------------------------------------------------------------------------|------------------------------------------------|
-| `cache_hit`               | Indicates whether a cache hit occurred (True) or not (False)                            | `true`, `false`                                |
+|------------------------|-------------------------------------------------------|------------------------------------------------|
+| `cache_hit`            | Indicates whether a cache hit occured (True) or not (False)   | `true`, `false`                                |
 | `cache_key`            | The Cache key used for this request                | `d2b758c****`|
 | `proxy_base_url`       | The base URL for the proxy server, the value of env var `PROXY_BASE_URL` on your server                | `https://proxy.example.com`|
 | `user_api_key_alias`   | An alias for the LiteLLM Virtual Key.| `prod-app1`        |
@ -1190,7 +1190,7 @@ We will use the `--config` to set

 - `litellm.success_callback = ["s3"]` 

-This will log all successful LLM calls to s3 Bucket
+This will log all successfull LLM calls to s3 Bucket

 **Step 1** Set AWS Credentials in .env

@ -1279,7 +1279,7 @@ Log LLM Logs to [Azure Data Lake Storage](https://learn.microsoft.com/en-us/azur

 | Property | Details |
 |----------|---------|
-| Description | Log LLM Input/Output to Azure Blob Storage (Bucket) |
+| Description | Log LLM Input/Output to Azure Blob Storag (Bucket) |
 | Azure Docs on Data Lake Storage | [Azure Data Lake Storage](https://learn.microsoft.com/en-us/azure/storage/blobs/data-lake-storage-introduction) |


@ -1360,7 +1360,7 @@ LiteLLM Supports logging to the following Datdog Integrations:
 <Tabs>
 <TabItem value="datadog" label="Datadog Logs">

-We will use the `--config` to set `litellm.callbacks = ["datadog"]` this will log all successful LLM calls to DataDog
+We will use the `--config` to set `litellm.callbacks = ["datadog"]` this will log all successfull LLM calls to DataDog

 **Step 1**: Create a `config.yaml` file and set `litellm_settings`: `success_callback`

@ -1636,7 +1636,7 @@ class MyCustomHandler(CustomLogger):
            litellm_params = kwargs.get("litellm_params", {})
            metadata = litellm_params.get("metadata", {})   # headers passed to LiteLLM proxy, can be found here

-            # Access Exceptions & Traceback
+            # Acess Exceptions & Traceback
            exception_event = kwargs.get("exception", None)
            traceback_event = kwargs.get("traceback_exception", None)

@ -2205,7 +2205,7 @@ We will use the `--config` to set
 - `litellm.success_callback = ["dynamodb"]` 
 - `litellm.dynamodb_table_name = "your-table-name"`

-This will log all successful LLM calls to DynamoDB
+This will log all successfull LLM calls to DynamoDB

 **Step 1** Set AWS Credentials in .env

@ -2370,7 +2370,7 @@ litellm --test

 [Athina](https://athina.ai/) allows you to log LLM Input/Output for monitoring, analytics, and observability.

-We will use the `--config` to set `litellm.success_callback = ["athina"]` this will log all successful LLM calls to athina
+We will use the `--config` to set `litellm.success_callback = ["athina"]` this will log all successfull LLM calls to athina

 **Step 1** Set Athina API key

--- a/docs/my-website/docs/proxy/prod.md
+++ b/docs/my-website/docs/proxy/prod.md
@ -61,7 +61,7 @@ CMD ["--port", "4000", "--config", "./proxy_server_config.yaml"]

 ## 3. Use Redis 'port','host', 'password'. NOT 'redis_url'

-If you decide to use Redis, DO NOT use 'redis_url'. We recommend using redis port, host, and password params. 
+If you decide to use Redis, DO NOT use 'redis_url'. We recommend usig redis port, host, and password params. 

 `redis_url`is 80 RPS slower

@ -169,7 +169,7 @@ If you plan on using the DB, set a salt key for encrypting/decrypting variables

 Do not change this after adding a model. It is used to encrypt / decrypt your LLM API Key credentials

-We recommend - https://1password.com/password-generator/ password generator to get a random hash for litellm salt key.
+We recommned - https://1password.com/password-generator/ password generator to get a random hash for litellm salt key.

 ```bash
 export LITELLM_SALT_KEY="sk-1234"
--- a/docs/my-website/docs/proxy/temporary_budget_increase.md
+++ b/docs/my-website/docs/proxy/temporary_budget_increase.md
@ -3,7 +3,7 @@
 Set temporary budget increase for a LiteLLM Virtual Key. Use this if you get asked to increase the budget for a key temporarily.


-| Hierarchy | Supported | 
+| Heirarchy | Supported | 
 |-----------|-----------|
 | LiteLLM Virtual Key | ✅ |
 | User | ❌ |
--- a/docs/my-website/docs/proxy/ui_credentials.md
+++ b/docs/my-website/docs/proxy/ui_credentials.md
@ -4,7 +4,7 @@ import TabItem from '@theme/TabItem';

 # Adding LLM Credentials

-You can add LLM provider credentials on the UI. Once you add credentials you can reuse them when adding new models
+You can add LLM provider credentials on the UI. Once you add credentials you can re-use them when adding new models

 ## Add a credential + model

--- a/docs/my-website/docs/proxy/virtual_keys.md
+++ b/docs/my-website/docs/proxy/virtual_keys.md
@ -23,7 +23,7 @@ Requirements:
  - ** Set on config.yaml** set your master key under `general_settings:master_key`, example below
  - ** Set env variable** set `LITELLM_MASTER_KEY`

-(the proxy Dockerfile checks if the `DATABASE_URL` is set and then initializes the DB connection)
+(the proxy Dockerfile checks if the `DATABASE_URL` is set and then intializes the DB connection)

 ```shell
 export DATABASE_URL=postgresql://<user>:<password>@<host>:<port>/<dbname>
@ -333,7 +333,7 @@ curl http://localhost:4000/v1/chat/completions \

 **Expected Response**

-Expect to see a successful response from the litellm proxy since the key passed in `X-Litellm-Key` is valid
+Expect to see a successfull response from the litellm proxy since the key passed in `X-Litellm-Key` is valid
 ```shell
 {"id":"chatcmpl-f9b2b79a7c30477ab93cd0e717d1773e","choices":[{"finish_reason":"stop","index":0,"message":{"content":"\n\nHello there, how may I assist you today?","role":"assistant","tool_calls":null,"function_call":null}}],"created":1677652288,"model":"gpt-3.5-turbo-0125","object":"chat.completion","system_fingerprint":"fp_44709d6fcb","usage":{"completion_tokens":12,"prompt_tokens":9,"total_tokens":21}
 ```
--- a/docs/my-website/docs/response_api.md
+++ b/docs/my-website/docs/response_api.md
@ -631,3 +631,10 @@ follow_up = client.responses.create(

 </TabItem>
 </Tabs>
+
+#### How It Works
+
+1. When a user makes an initial request to the Responses API, LiteLLM caches which model deployment that returned the specific response. (Stored in Redis if you connected LiteLLM to Redis)
+2. When a subsequent request includes `previous_response_id`, LiteLLM automatically routes it to the same deployment
+3. If the original deployment is unavailable, or if the `previous_response_id` isn't found in the cache, LiteLLM falls back to normal routing
+
--- a/docs/my-website/docs/simple_proxy_old_doc.md
+++ b/docs/my-website/docs/simple_proxy_old_doc.md
@ -994,16 +994,16 @@ litellm --health

 ## Logging Proxy Input/Output - OpenTelemetry

-### Step 1 Start OpenTelemetry Collector Docker Container
+### Step 1 Start OpenTelemetry Collecter Docker Container
 This container sends logs to your selected destination 

-#### Install OpenTelemetry Collector Docker Image
+#### Install OpenTelemetry Collecter Docker Image
 ```shell
 docker pull otel/opentelemetry-collector:0.90.0
 docker run -p 127.0.0.1:4317:4317 -p 127.0.0.1:55679:55679 otel/opentelemetry-collector:0.90.0
 ```

-#### Set Destination paths on OpenTelemetry Collector
+#### Set Destination paths on OpenTelemetry Collecter

 Here's the OpenTelemetry yaml config to use with Elastic Search
 ```yaml
@ -1077,7 +1077,7 @@ general_settings:
 LiteLLM will read the `OTEL_ENDPOINT` environment variable to send data to your OTEL collector 

 ```python
-os.environ['OTEL_ENDPOINT'] # defaults to 127.0.0.1:4317 if not provided
+os.environ['OTEL_ENDPOINT'] # defauls to 127.0.0.1:4317 if not provided
 ```

 #### Start LiteLLM Proxy
@ -1101,8 +1101,8 @@ curl --location 'http://0.0.0.0:4000/chat/completions' \
 ```


-#### Test & View Logs on OpenTelemetry Collector
-On successful logging you should be able to see this log on your `OpenTelemetry Collector` Docker Container
+#### Test & View Logs on OpenTelemetry Collecter
+On successfull logging you should be able to see this log on your `OpenTelemetry Collecter` Docker Container
 ```shell
 Events:
 SpanEvent #0
@ -1149,7 +1149,7 @@ Here's the log view on Elastic Search. You can see the request `input`, `output`
 <Image img={require('../img/elastic_otel.png')} />

 ## Logging Proxy Input/Output - Langfuse
-We will use the `--config` to set `litellm.success_callback = ["langfuse"]` this will log all successful LLM calls to langfuse
+We will use the `--config` to set `litellm.success_callback = ["langfuse"]` this will log all successfull LLM calls to langfuse

 **Step 1** Install langfuse

--- a/docs/my-website/docs/tutorials/compare_llms.md
+++ b/docs/my-website/docs/tutorials/compare_llms.md
@ -117,7 +117,7 @@ response = completion("command-nightly", messages)
 """


-# questions/logs you want to run the LLM on
+# qustions/logs you want to run the LLM on
 questions = [
    "what is litellm?",
    "why should I use LiteLLM",
--- a/docs/my-website/docs/tutorials/gradio_integration.md
+++ b/docs/my-website/docs/tutorials/gradio_integration.md
@ -30,7 +30,7 @@ def inference(message, history):
            yield partial_message
    except Exception as e:
        print("Exception encountered:", str(e))
-        yield f"An Error occurred please 'Clear' the error and try your question again"
+        yield f"An Error occured please 'Clear' the error and try your question again"
 ```

 ### Define Chat Interface
--- a/docs/my-website/sidebars.js
+++ b/docs/my-website/sidebars.js
@ -411,7 +411,6 @@ const sidebars = {
      type: "category",
      label: "Logging & Observability",
      items: [
-        "observability/agentops_integration",
        "observability/langfuse_integration",
        "observability/lunary_integration",
        "observability/mlflow",
--- a/litellm/init.py
+++ b/litellm/init.py
@ -113,7 +113,6 @@ _custom_logger_compatible_callbacks_literal = Literal[
    "pagerduty",
    "humanloop",
    "gcs_pubsub",
-    "agentops",
    "anthropic_cache_control_hook",
 ]
 logged_real_time_event_types: Optional[Union[List[str], Literal["*"]]] = None
--- a/litellm/cost_calculator.py
+++ b/litellm/cost_calculator.py
@ -57,7 +57,6 @@ from litellm.llms.vertex_ai.image_generation.cost_calculator import (
 from litellm.responses.utils import ResponseAPILoggingUtils
 from litellm.types.llms.openai import (
    HttpxBinaryResponseContent,
-    ImageGenerationRequestQuality,
    OpenAIRealtimeStreamList,
    OpenAIRealtimeStreamResponseBaseObject,
    OpenAIRealtimeStreamSessionEvents,
@ -643,9 +642,9 @@ def completion_cost(  # noqa: PLR0915
                    or isinstance(completion_response, dict)
                ):  # tts returns a custom class
                    if isinstance(completion_response, dict):
-                        usage_obj: Optional[Union[dict, Usage]] = (
-                            completion_response.get("usage", {})
-                        )
+                        usage_obj: Optional[
+                            Union[dict, Usage]
+                        ] = completion_response.get("usage", {})
                    else:
                        usage_obj = getattr(completion_response, "usage", {})
                    if isinstance(usage_obj, BaseModel) and not _is_known_usage_objects(
@ -914,7 +913,7 @@ def completion_cost(  # noqa: PLR0915


 def get_response_cost_from_hidden_params(
-    hidden_params: Union[dict, BaseModel],
+    hidden_params: Union[dict, BaseModel]
 ) -> Optional[float]:
    if isinstance(hidden_params, BaseModel):
        _hidden_params_dict = hidden_params.model_dump()
@ -1102,36 +1101,29 @@ def default_image_cost_calculator(
        f"{quality}/{base_model_name}" if quality else base_model_name
    )

-    # gpt-image-1 models use low, medium, high quality. If user did not specify quality, use medium fot gpt-image-1 model family
-    model_name_with_v2_quality = (
-        f"{ImageGenerationRequestQuality.MEDIUM.value}/{base_model_name}"
-    )
-
    verbose_logger.debug(
        f"Looking up cost for models: {model_name_with_quality}, {base_model_name}"
    )

+    # Try model with quality first, fall back to base model name
+    if model_name_with_quality in litellm.model_cost:
+        cost_info = litellm.model_cost[model_name_with_quality]
+    elif base_model_name in litellm.model_cost:
+        cost_info = litellm.model_cost[base_model_name]
+    else:
+        # Try without provider prefix
        model_without_provider = f"{size_str}/{model.split('/')[-1]}"
        model_with_quality_without_provider = (
            f"{quality}/{model_without_provider}" if quality else model_without_provider
        )

-    # Try model with quality first, fall back to base model name
-    cost_info: Optional[dict] = None
-    models_to_check = [
-        model_name_with_quality,
-        base_model_name,
-        model_name_with_v2_quality,
-        model_with_quality_without_provider,
-        model_without_provider,
-    ]
-    for model in models_to_check:
-        if model in litellm.model_cost:
-            cost_info = litellm.model_cost[model]
-            break
-    if cost_info is None:
+        if model_with_quality_without_provider in litellm.model_cost:
+            cost_info = litellm.model_cost[model_with_quality_without_provider]
+        elif model_without_provider in litellm.model_cost:
+            cost_info = litellm.model_cost[model_without_provider]
+        else:
            raise Exception(
-            f"Model not found in cost map. Tried checking {models_to_check}"
+                f"Model not found in cost map. Tried {model_name_with_quality}, {base_model_name}, {model_with_quality_without_provider}, and {model_without_provider}"
            )

    return cost_info["input_cost_per_pixel"] * height * width * n
--- a/litellm/integrations/_types/open_inference.py
+++ b/litellm/integrations/_types/open_inference.py
@ -45,14 +45,6 @@ class SpanAttributes:
    """
    The name of the model being used.
    """
-    LLM_PROVIDER = "llm.provider"
-    """
-    The provider of the model, such as OpenAI, Azure, Google, etc.
-    """
-    LLM_SYSTEM = "llm.system"
-    """
-    The AI product as identified by the client or server
-    """
    LLM_PROMPTS = "llm.prompts"
    """
    Prompts provided to a completions API.
@ -73,40 +65,15 @@ class SpanAttributes:
    """
    Number of tokens in the prompt.
    """
-    LLM_TOKEN_COUNT_PROMPT_DETAILS_CACHE_WRITE = "llm.token_count.prompt_details.cache_write"
-    """
-    Number of tokens in the prompt that were written to cache.
-    """
-    LLM_TOKEN_COUNT_PROMPT_DETAILS_CACHE_READ = "llm.token_count.prompt_details.cache_read"
-    """
-    Number of tokens in the prompt that were read from cache.
-    """
-    LLM_TOKEN_COUNT_PROMPT_DETAILS_AUDIO = "llm.token_count.prompt_details.audio"
-    """
-    The number of audio input tokens presented in the prompt
-    """
    LLM_TOKEN_COUNT_COMPLETION = "llm.token_count.completion"
    """
    Number of tokens in the completion.
    """
-    LLM_TOKEN_COUNT_COMPLETION_DETAILS_REASONING = "llm.token_count.completion_details.reasoning"
-    """
-    Number of tokens used for reasoning steps in the completion.
-    """
-    LLM_TOKEN_COUNT_COMPLETION_DETAILS_AUDIO = "llm.token_count.completion_details.audio"
-    """
-    The number of audio input tokens generated by the model
-    """
    LLM_TOKEN_COUNT_TOTAL = "llm.token_count.total"
    """
    Total number of tokens, including both prompt and completion.
    """

-    LLM_TOOLS = "llm.tools"
-    """
-    List of tools that are advertised to the LLM to be able to call
-    """
-
    TOOL_NAME = "tool.name"
    """
    Name of the tool being used.
@ -145,19 +112,6 @@ class SpanAttributes:
    The id of the user
    """

-    PROMPT_VENDOR = "prompt.vendor"
-    """
-    The vendor or origin of the prompt, e.g. a prompt library, a specialized service, etc.
-    """
-    PROMPT_ID = "prompt.id"
-    """
-    A vendor-specific id used to locate the prompt.
-    """
-    PROMPT_URL = "prompt.url"
-    """
-    A vendor-specific url used to locate the prompt.
-    """
-

 class MessageAttributes:
    """
@ -197,10 +151,6 @@ class MessageAttributes:
    The JSON string representing the arguments passed to the function
    during a function call.
    """
-    MESSAGE_TOOL_CALL_ID = "message.tool_call_id"
-    """
-    The id of the tool call.
-    """


 class MessageContentAttributes:
@ -236,25 +186,6 @@ class ImageAttributes:
    """


-class AudioAttributes:
-    """
-    Attributes for audio
-    """
-
-    AUDIO_URL = "audio.url"
-    """
-    The url to an audio file
-    """
-    AUDIO_MIME_TYPE = "audio.mime_type"
-    """
-    The mime type of the audio file
-    """
-    AUDIO_TRANSCRIPT = "audio.transcript"
-    """
-    The transcript of the audio file
-    """
-
-
 class DocumentAttributes:
    """
    Attributes for a document.
@ -326,10 +257,6 @@ class ToolCallAttributes:
    Attributes for a tool call
    """

-    TOOL_CALL_ID = "tool_call.id"
-    """
-    The id of the tool call.
-    """
    TOOL_CALL_FUNCTION_NAME = "tool_call.function.name"
    """
    The name of function that is being called during a tool call.
@ -341,18 +268,6 @@ class ToolCallAttributes:
    """


-class ToolAttributes:
-    """
-    Attributes for a tools
-    """
-
-    TOOL_JSON_SCHEMA = "tool.json_schema"
-    """
-    The json schema of a tool input, It is RECOMMENDED that this be in the
-    OpenAI tool calling format: https://platform.openai.com/docs/assistants/tools
-    """
-
-
 class OpenInferenceSpanKindValues(Enum):
    TOOL = "TOOL"
    CHAIN = "CHAIN"
@ -369,21 +284,3 @@ class OpenInferenceSpanKindValues(Enum):
 class OpenInferenceMimeTypeValues(Enum):
    TEXT = "text/plain"
    JSON = "application/json"
-
-
-class OpenInferenceLLMSystemValues(Enum):
-    OPENAI = "openai"
-    ANTHROPIC = "anthropic"
-    COHERE = "cohere"
-    MISTRALAI = "mistralai"
-    VERTEXAI = "vertexai"
-
-
-class OpenInferenceLLMProviderValues(Enum):
-    OPENAI = "openai"
-    ANTHROPIC = "anthropic"
-    COHERE = "cohere"
-    MISTRALAI = "mistralai"
-    GOOGLE = "google"
-    AZURE = "azure"
-    AWS = "aws"
--- a/litellm/integrations/agentops/init.py
+++ b/litellm/integrations/agentops/init.py
@ -1,3 +0,0 @@
-from .agentops import AgentOps
-
-__all__ = ["AgentOps"] 
--- a/litellm/integrations/agentops/agentops.py
+++ b/litellm/integrations/agentops/agentops.py
@ -1,118 +0,0 @@
-"""
-AgentOps integration for LiteLLM - Provides OpenTelemetry tracing for LLM calls
-"""
-import os
-from dataclasses import dataclass
-from typing import Optional, Dict, Any
-from litellm.integrations.opentelemetry import OpenTelemetry, OpenTelemetryConfig
-from litellm.llms.custom_httpx.http_handler import _get_httpx_client
-
-@dataclass
-class AgentOpsConfig:
-    endpoint: str = "https://otlp.agentops.cloud/v1/traces"
-    api_key: Optional[str] = None
-    service_name: Optional[str] = None
-    deployment_environment: Optional[str] = None
-    auth_endpoint: str = "https://api.agentops.ai/v3/auth/token"
-
-    @classmethod
-    def from_env(cls):
-        return cls(
-            endpoint="https://otlp.agentops.cloud/v1/traces",
-            api_key=os.getenv("AGENTOPS_API_KEY"),
-            service_name=os.getenv("AGENTOPS_SERVICE_NAME", "agentops"),
-            deployment_environment=os.getenv("AGENTOPS_ENVIRONMENT", "production"),
-            auth_endpoint="https://api.agentops.ai/v3/auth/token"
-        )
-
-class AgentOps(OpenTelemetry):
-    """
-    AgentOps integration - built on top of OpenTelemetry
-
-    Example usage:
-        ```python
-        import litellm
-        
-        litellm.success_callback = ["agentops"]
-
-        response = litellm.completion(
-            model="gpt-3.5-turbo",
-            messages=[{"role": "user", "content": "Hello, how are you?"}],
-        )
-        ```
-    """
-    def __init__(
-        self,
-        config: Optional[AgentOpsConfig] = None,
-    ):
-        if config is None:
-            config = AgentOpsConfig.from_env()
-
-        # Prefetch JWT token for authentication
-        jwt_token = None
-        project_id = None
-        if config.api_key:
-            try:
-                response = self._fetch_auth_token(config.api_key, config.auth_endpoint)
-                jwt_token = response.get("token")
-                project_id = response.get("project_id")
-            except Exception:
-                pass
-
-        headers = f"Authorization=Bearer {jwt_token}" if jwt_token else None
-        
-        otel_config = OpenTelemetryConfig(
-            exporter="otlp_http",
-            endpoint=config.endpoint,
-            headers=headers
-        )
-
-        # Initialize OpenTelemetry with our config
-        super().__init__(
-            config=otel_config,
-            callback_name="agentops"
-        )
-
-        # Set AgentOps-specific resource attributes
-        resource_attrs = {
-            "service.name": config.service_name or "litellm",
-            "deployment.environment": config.deployment_environment or "production",
-            "telemetry.sdk.name": "agentops",
-        }
-        
-        if project_id:
-            resource_attrs["project.id"] = project_id
-            
-        self.resource_attributes = resource_attrs
-
-    def _fetch_auth_token(self, api_key: str, auth_endpoint: str) -> Dict[str, Any]:
-        """
-        Fetch JWT authentication token from AgentOps API
-        
-        Args:
-            api_key: AgentOps API key
-            auth_endpoint: Authentication endpoint
-            
-        Returns:
-            Dict containing JWT token and project ID
-        """
-        headers = {
-            "Content-Type": "application/json",
-            "Connection": "keep-alive",
-        }
-        
-        client = _get_httpx_client()
-        try:
-            response = client.post(
-                url=auth_endpoint,
-                headers=headers,
-                json={"api_key": api_key},
-                timeout=10
-            )
-            
-            if response.status_code != 200:
-                raise Exception(f"Failed to fetch auth token: {response.text}")
-            
-            return response.json()
-        finally:
-            client.close() 
--- a/litellm/integrations/arize/_utils.py
+++ b/litellm/integrations/arize/_utils.py
@ -1,4 +1,3 @@
-import json
 from typing import TYPE_CHECKING, Any, Optional, Union

 from litellm._logging import verbose_logger
@ -13,141 +12,36 @@ else:
    Span = Any


-def cast_as_primitive_value_type(value) -> Union[str, bool, int, float]:
-    """
-    Converts a value to an OTEL-supported primitive for Arize/Phoenix observability.
-    """
-    if value is None:
-        return ""
-    if isinstance(value, (str, bool, int, float)):
-        return value
-    try:
-        return str(value)
-    except Exception:
-        return ""
-
-
-def safe_set_attribute(span: Span, key: str, value: Any):
-    """
-    Sets a span attribute safely with OTEL-compliant primitive typing for Arize/Phoenix.
-    """
-    primitive_value = cast_as_primitive_value_type(value)
-    span.set_attribute(key, primitive_value)
-
-
-def set_attributes(span: Span, kwargs, response_obj):  # noqa: PLR0915
-    """
-    Populates span with OpenInference-compliant LLM attributes for Arize and Phoenix tracing.
-    """
+def set_attributes(span: Span, kwargs, response_obj):
    from litellm.integrations._types.open_inference import (
        MessageAttributes,
        OpenInferenceSpanKindValues,
        SpanAttributes,
-        ToolCallAttributes,
    )

    try:
-        optional_params = kwargs.get("optional_params", {})
-        litellm_params = kwargs.get("litellm_params", {})
        standard_logging_payload: Optional[StandardLoggingPayload] = kwargs.get(
            "standard_logging_object"
        )
-        if standard_logging_payload is None:
-            raise ValueError("standard_logging_object not found in kwargs")

        #############################################
        ############ LLM CALL METADATA ##############
        #############################################

-        # Set custom metadata for observability and trace enrichment.
-        metadata = (
-            standard_logging_payload.get("metadata")
-            if standard_logging_payload
-            else None
-        )
-        if metadata is not None:
-            safe_set_attribute(span, SpanAttributes.METADATA, safe_dumps(metadata))
+        if standard_logging_payload and (
+            metadata := standard_logging_payload["metadata"]
+        ):
+            span.set_attribute(SpanAttributes.METADATA, safe_dumps(metadata))

        #############################################
        ########## LLM Request Attributes ###########
        #############################################

-        # The name of the LLM a request is being made to.
+        # The name of the LLM a request is being made to
        if kwargs.get("model"):
-            safe_set_attribute(
-                span,
-                SpanAttributes.LLM_MODEL_NAME,
-                kwargs.get("model"),
-            )
+            span.set_attribute(SpanAttributes.LLM_MODEL_NAME, kwargs.get("model"))

-        # The LLM request type.
-        safe_set_attribute(
-            span,
-            "llm.request.type",
-            standard_logging_payload["call_type"],
-        )
-
-        # The Generative AI Provider: Azure, OpenAI, etc.
-        safe_set_attribute(
-            span,
-            SpanAttributes.LLM_PROVIDER,
-            litellm_params.get("custom_llm_provider", "Unknown"),
-        )
-
-        # The maximum number of tokens the LLM generates for a request.
-        if optional_params.get("max_tokens"):
-            safe_set_attribute(
-                span,
-                "llm.request.max_tokens",
-                optional_params.get("max_tokens"),
-            )
-
-        # The temperature setting for the LLM request.
-        if optional_params.get("temperature"):
-            safe_set_attribute(
-                span,
-                "llm.request.temperature",
-                optional_params.get("temperature"),
-            )
-
-        # The top_p sampling setting for the LLM request.
-        if optional_params.get("top_p"):
-            safe_set_attribute(
-                span,
-                "llm.request.top_p",
-                optional_params.get("top_p"),
-            )
-
-        # Indicates whether response is streamed.
-        safe_set_attribute(
-            span,
-            "llm.is_streaming",
-            str(optional_params.get("stream", False)),
-        )
-
-        # Logs the user ID if present.
-        if optional_params.get("user"):
-            safe_set_attribute(
-                span,
-                "llm.user",
-                optional_params.get("user"),
-            )
-
-        # The unique identifier for the completion.
-        if response_obj and response_obj.get("id"):
-            safe_set_attribute(span, "llm.response.id", response_obj.get("id"))
-
-        # The model used to generate the response.
-        if response_obj and response_obj.get("model"):
-            safe_set_attribute(
-                span,
-                "llm.response.model",
-                response_obj.get("model"),
-            )
-
-        # Required by OpenInference to mark span as LLM kind.
-        safe_set_attribute(
-            span,
+        span.set_attribute(
            SpanAttributes.OPENINFERENCE_SPAN_KIND,
            OpenInferenceSpanKindValues.LLM.value,
        )
@ -156,132 +50,77 @@ def set_attributes(span: Span, kwargs, response_obj):  # noqa: PLR0915
        # for /chat/completions
        # https://docs.arize.com/arize/large-language-models/tracing/semantic-conventions
        if messages:
-            last_message = messages[-1]
-            safe_set_attribute(
-                span,
+            span.set_attribute(
                SpanAttributes.INPUT_VALUE,
-                last_message.get("content", ""),
+                messages[-1].get("content", ""),  # get the last message for input
            )

-            # LLM_INPUT_MESSAGES shows up under `input_messages` tab on the span page.
+            # LLM_INPUT_MESSAGES shows up under `input_messages` tab on the span page
            for idx, msg in enumerate(messages):
-                prefix = f"{SpanAttributes.LLM_INPUT_MESSAGES}.{idx}"
-                # Set the role per message.
-                safe_set_attribute(
-                    span, f"{prefix}.{MessageAttributes.MESSAGE_ROLE}", msg.get("role")
+                # Set the role per message
+                span.set_attribute(
+                    f"{SpanAttributes.LLM_INPUT_MESSAGES}.{idx}.{MessageAttributes.MESSAGE_ROLE}",
+                    msg["role"],
                )
-                # Set the content per message.
-                safe_set_attribute(
-                    span,
-                    f"{prefix}.{MessageAttributes.MESSAGE_CONTENT}",
+                # Set the content per message
+                span.set_attribute(
+                    f"{SpanAttributes.LLM_INPUT_MESSAGES}.{idx}.{MessageAttributes.MESSAGE_CONTENT}",
                    msg.get("content", ""),
                )

-        # Capture tools (function definitions) used in the LLM call.
-        tools = optional_params.get("tools")
-        if tools:
-            for idx, tool in enumerate(tools):
-                function = tool.get("function")
-                if not function:
-                    continue
-                prefix = f"{SpanAttributes.LLM_TOOLS}.{idx}"
-                safe_set_attribute(
-                    span, f"{prefix}.{SpanAttributes.TOOL_NAME}", function.get("name")
-                )
-                safe_set_attribute(
-                    span,
-                    f"{prefix}.{SpanAttributes.TOOL_DESCRIPTION}",
-                    function.get("description"),
-                )
-                safe_set_attribute(
-                    span,
-                    f"{prefix}.{SpanAttributes.TOOL_PARAMETERS}",
-                    json.dumps(function.get("parameters")),
-                )
-
-        # Capture tool calls made during function-calling LLM flows.
-        functions = optional_params.get("functions")
-        if functions:
-            for idx, function in enumerate(functions):
-                prefix = f"{MessageAttributes.MESSAGE_TOOL_CALLS}.{idx}"
-                safe_set_attribute(
-                    span,
-                    f"{prefix}.{ToolCallAttributes.TOOL_CALL_FUNCTION_NAME}",
-                    function.get("name"),
-                )
-
-        # Capture invocation parameters and user ID if available.
-        model_params = (
-            standard_logging_payload.get("model_parameters")
-            if standard_logging_payload
-            else None
-        )
-        if model_params:
+        if standard_logging_payload and (
+            model_params := standard_logging_payload["model_parameters"]
+        ):
            # The Generative AI Provider: Azure, OpenAI, etc.
-            safe_set_attribute(
-                span,
-                SpanAttributes.LLM_INVOCATION_PARAMETERS,
-                safe_dumps(model_params),
+            span.set_attribute(
+                SpanAttributes.LLM_INVOCATION_PARAMETERS, safe_dumps(model_params)
            )

            if model_params.get("user"):
                user_id = model_params.get("user")
                if user_id is not None:
-                    safe_set_attribute(span, SpanAttributes.USER_ID, user_id)
+                    span.set_attribute(SpanAttributes.USER_ID, user_id)

        #############################################
        ########## LLM Response Attributes ##########
+        # https://docs.arize.com/arize/large-language-models/tracing/semantic-conventions
        #############################################
-
-        # Captures response tokens, message, and content.
        if hasattr(response_obj, "get"):
-            for idx, choice in enumerate(response_obj.get("choices", [])):
+            for choice in response_obj.get("choices", []):
                response_message = choice.get("message", {})
-                safe_set_attribute(
-                    span,
-                    SpanAttributes.OUTPUT_VALUE,
-                    response_message.get("content", ""),
+                span.set_attribute(
+                    SpanAttributes.OUTPUT_VALUE, response_message.get("content", "")
                )

-                # This shows up under `output_messages` tab on the span page.
-                prefix = f"{SpanAttributes.LLM_OUTPUT_MESSAGES}.{idx}"
-                safe_set_attribute(
-                    span,
-                    f"{prefix}.{MessageAttributes.MESSAGE_ROLE}",
+                # This shows up under `output_messages` tab on the span page
+                # This code assumes a single response
+                span.set_attribute(
+                    f"{SpanAttributes.LLM_OUTPUT_MESSAGES}.0.{MessageAttributes.MESSAGE_ROLE}",
                    response_message.get("role"),
                )
-                safe_set_attribute(
-                    span,
-                    f"{prefix}.{MessageAttributes.MESSAGE_CONTENT}",
+                span.set_attribute(
+                    f"{SpanAttributes.LLM_OUTPUT_MESSAGES}.0.{MessageAttributes.MESSAGE_CONTENT}",
                    response_message.get("content", ""),
                )

-            # Token usage info.
-            usage = response_obj and response_obj.get("usage")
+            usage = response_obj.get("usage")
            if usage:
-                safe_set_attribute(
-                    span,
+                span.set_attribute(
                    SpanAttributes.LLM_TOKEN_COUNT_TOTAL,
                    usage.get("total_tokens"),
                )

                # The number of tokens used in the LLM response (completion).
-                safe_set_attribute(
-                    span,
+                span.set_attribute(
                    SpanAttributes.LLM_TOKEN_COUNT_COMPLETION,
                    usage.get("completion_tokens"),
                )

                # The number of tokens used in the LLM prompt.
-                safe_set_attribute(
-                    span,
+                span.set_attribute(
                    SpanAttributes.LLM_TOKEN_COUNT_PROMPT,
                    usage.get("prompt_tokens"),
                )
-
+        pass
    except Exception as e:
-        verbose_logger.error(
-            f"[Arize/Phoenix] Failed to set OpenInference span attributes: {e}"
-        )
-        if hasattr(span, "record_exception"):
-            span.record_exception(e)
+        verbose_logger.error(f"Error setting arize attributes: {e}")
--- a/litellm/integrations/datadog/datadog_llm_obs.py
+++ b/litellm/integrations/datadog/datadog_llm_obs.py
@ -13,15 +13,10 @@ import uuid
 from datetime import datetime
 from typing import Any, Dict, List, Optional, Union

-import httpx
-
 import litellm
 from litellm._logging import verbose_logger
 from litellm.integrations.custom_batch_logger import CustomBatchLogger
 from litellm.integrations.datadog.datadog import DataDogLogger
-from litellm.litellm_core_utils.prompt_templates.common_utils import (
-    handle_any_messages_to_chat_completion_str_messages_conversion,
-)
 from litellm.llms.custom_httpx.http_handler import (
    get_async_httpx_client,
    httpxSpecialProvider,
@ -111,6 +106,7 @@ class DataDogLLMObsLogger(DataDogLogger, CustomBatchLogger):
                },
            )

+            response.raise_for_status()
            if response.status_code != 202:
                raise Exception(
                    f"DataDogLLMObs: Unexpected response - status_code: {response.status_code}, text: {response.text}"
@ -120,10 +116,6 @@ class DataDogLLMObsLogger(DataDogLogger, CustomBatchLogger):
                f"DataDogLLMObs: Successfully sent batch - status_code: {response.status_code}"
            )
            self.log_queue.clear()
-        except httpx.HTTPStatusError as e:
-            verbose_logger.exception(
-                f"DataDogLLMObs: Error sending batch - {e.response.text}"
-            )
        except Exception as e:
            verbose_logger.exception(f"DataDogLLMObs: Error sending batch - {str(e)}")

@ -141,11 +133,7 @@ class DataDogLLMObsLogger(DataDogLogger, CustomBatchLogger):

        metadata = kwargs.get("litellm_params", {}).get("metadata", {})

-        input_meta = InputMeta(
-            messages=handle_any_messages_to_chat_completion_str_messages_conversion(
-                messages
-            )
-        )
+        input_meta = InputMeta(messages=messages)  # type: ignore
        output_meta = OutputMeta(messages=self._get_response_messages(response_obj))

        meta = Meta(
--- a/litellm/litellm_core_utils/exception_mapping_utils.py
+++ b/litellm/litellm_core_utils/exception_mapping_utils.py
@ -311,9 +311,6 @@ def exception_type(  # type: ignore  # noqa: PLR0915
                elif (
                    "invalid_request_error" in error_str
                    and "content_policy_violation" in error_str
-                ) or (
-                    "Invalid prompt" in error_str
-                    and "violating our usage policy" in error_str
                ):
                    exception_mapping_worked = True
                    raise ContentPolicyViolationError(
--- a/litellm/litellm_core_utils/litellm_logging.py
+++ b/litellm/litellm_core_utils/litellm_logging.py
@ -28,7 +28,6 @@ from litellm._logging import _is_debugging_on, verbose_logger
 from litellm.batches.batch_utils import _handle_completed_batch
 from litellm.caching.caching import DualCache, InMemoryCache
 from litellm.caching.caching_handler import LLMCachingHandler
-
 from litellm.constants import (
    DEFAULT_MOCK_RESPONSE_COMPLETION_TOKEN_COUNT,
    DEFAULT_MOCK_RESPONSE_PROMPT_TOKEN_COUNT,
@ -37,7 +36,6 @@ from litellm.cost_calculator import (
    RealtimeAPITokenUsageProcessor,
    _select_model_name_for_cost_calc,
 )
-from litellm.integrations.agentops import AgentOps
 from litellm.integrations.anthropic_cache_control_hook import AnthropicCacheControlHook
 from litellm.integrations.arize.arize import ArizeLogger
 from litellm.integrations.custom_guardrail import CustomGuardrail
@ -2687,15 +2685,7 @@ def _init_custom_logger_compatible_class(  # noqa: PLR0915
    """
    try:
        custom_logger_init_args = custom_logger_init_args or {}
-        if logging_integration == "agentops":  # Add AgentOps initialization
-            for callback in _in_memory_loggers:
-                if isinstance(callback, AgentOps):
-                    return callback  # type: ignore
-
-            agentops_logger = AgentOps()
-            _in_memory_loggers.append(agentops_logger)
-            return agentops_logger  # type: ignore
-        elif logging_integration == "lago":
+        if logging_integration == "lago":
            for callback in _in_memory_loggers:
                if isinstance(callback, LagoLogger):
                    return callback  # type: ignore
--- a/litellm/litellm_core_utils/prompt_templates/common_utils.py
+++ b/litellm/litellm_core_utils/prompt_templates/common_utils.py
@ -6,7 +6,7 @@ import io
 import mimetypes
 import re
 from os import PathLike
-from typing import Any, Dict, List, Literal, Mapping, Optional, Union, cast
+from typing import Dict, List, Literal, Mapping, Optional, Union, cast

 from litellm.types.llms.openai import (
    AllMessageValues,
@ -32,35 +32,6 @@ DEFAULT_ASSISTANT_CONTINUE_MESSAGE = ChatCompletionAssistantMessage(
 )


-def handle_any_messages_to_chat_completion_str_messages_conversion(
-    messages: Any,
-) -> List[Dict[str, str]]:
-    """
-    Handles any messages to chat completion str messages conversion
-
-    Relevant Issue: https://github.com/BerriAI/litellm/issues/9494
-    """
-    import json
-
-    if isinstance(messages, list):
-        try:
-            return cast(
-                List[Dict[str, str]],
-                handle_messages_with_content_list_to_str_conversion(messages),
-            )
-        except Exception:
-            return [{"input": json.dumps(message, default=str)} for message in messages]
-    elif isinstance(messages, dict):
-        try:
-            return [{"input": json.dumps(messages, default=str)}]
-        except Exception:
-            return [{"input": str(messages)}]
-    elif isinstance(messages, str):
-        return [{"input": messages}]
-    else:
-        return [{"input": str(messages)}]
-
-
 def handle_messages_with_content_list_to_str_conversion(
    messages: List[AllMessageValues],
 ) -> List[AllMessageValues]:
--- a/litellm/llms/azure/responses/transformation.py
+++ b/litellm/llms/azure/responses/transformation.py
@ -1,14 +1,11 @@
-from typing import TYPE_CHECKING, Any, Dict, Optional, Tuple, cast
+from typing import TYPE_CHECKING, Any, Optional, cast

 import httpx

 import litellm
-from litellm._logging import verbose_logger
 from litellm.llms.openai.responses.transformation import OpenAIResponsesAPIConfig
 from litellm.secret_managers.main import get_secret_str
 from litellm.types.llms.openai import *
-from litellm.types.responses.main import *
-from litellm.types.router import GenericLiteLLMParams
 from litellm.utils import _add_path_to_api_base

 if TYPE_CHECKING:
@ -44,7 +41,11 @@ class AzureOpenAIResponsesAPIConfig(OpenAIResponsesAPIConfig):
    def get_complete_url(
        self,
        api_base: Optional[str],
+        api_key: Optional[str],
+        model: str,
+        optional_params: dict,
        litellm_params: dict,
+        stream: Optional[bool] = None,
    ) -> str:
        """
        Constructs a complete URL for the API request.
@ -91,82 +92,3 @@ class AzureOpenAIResponsesAPIConfig(OpenAIResponsesAPIConfig):
        final_url = httpx.URL(new_url).copy_with(params=query_params)

        return str(final_url)
-
-    #########################################################
-    ########## DELETE RESPONSE API TRANSFORMATION ##############
-    #########################################################
-    def _construct_url_for_response_id_in_path(
-        self, api_base: str, response_id: str
-    ) -> str:
-        """
-        Constructs a URL for the API request with the response_id in the path.
-        """
-        from urllib.parse import urlparse, urlunparse
-
-        # Parse the URL to separate its components
-        parsed_url = urlparse(api_base)
-
-        # Insert the response_id at the end of the path component
-        # Remove trailing slash if present to avoid double slashes
-        path = parsed_url.path.rstrip("/")
-        new_path = f"{path}/{response_id}"
-
-        # Reconstruct the URL with all original components but with the modified path
-        constructed_url = urlunparse(
-            (
-                parsed_url.scheme,  # http, https
-                parsed_url.netloc,  # domain name, port
-                new_path,  # path with response_id added
-                parsed_url.params,  # parameters
-                parsed_url.query,  # query string
-                parsed_url.fragment,  # fragment
-            )
-        )
-        return constructed_url
-
-    def transform_delete_response_api_request(
-        self,
-        response_id: str,
-        api_base: str,
-        litellm_params: GenericLiteLLMParams,
-        headers: dict,
-    ) -> Tuple[str, Dict]:
-        """
-        Transform the delete response API request into a URL and data
-
-        Azure OpenAI API expects the following request:
-        - DELETE /openai/responses/{response_id}?api-version=xxx
-
-        This function handles URLs with query parameters by inserting the response_id
-        at the correct location (before any query parameters).
-        """
-        delete_url = self._construct_url_for_response_id_in_path(
-            api_base=api_base, response_id=response_id
-        )
-
-        data: Dict = {}
-        verbose_logger.debug(f"delete response url={delete_url}")
-        return delete_url, data
-
-    #########################################################
-    ########## GET RESPONSE API TRANSFORMATION ###############
-    #########################################################
-    def transform_get_response_api_request(
-        self,
-        response_id: str,
-        api_base: str,
-        litellm_params: GenericLiteLLMParams,
-        headers: dict,
-    ) -> Tuple[str, Dict]:
-        """
-        Transform the get response API request into a URL and data
-
-        OpenAI API expects the following request
-        - GET /v1/responses/{response_id}
-        """
-        get_url = self._construct_url_for_response_id_in_path(
-            api_base=api_base, response_id=response_id
-        )
-        data: Dict = {}
-        verbose_logger.debug(f"get response url={get_url}")
-        return get_url, data
--- a/litellm/llms/base_llm/responses/transformation.py
+++ b/litellm/llms/base_llm/responses/transformation.py
@ -1,6 +1,6 @@
 import types
 from abc import ABC, abstractmethod
-from typing import TYPE_CHECKING, Any, Dict, Optional, Tuple, Union
+from typing import TYPE_CHECKING, Any, Dict, Optional, Union

 import httpx

@ -10,7 +10,6 @@ from litellm.types.llms.openai import (
    ResponsesAPIResponse,
    ResponsesAPIStreamingResponse,
 )
-from litellm.types.responses.main import *
 from litellm.types.router import GenericLiteLLMParams

 if TYPE_CHECKING:
@ -74,7 +73,11 @@ class BaseResponsesAPIConfig(ABC):
    def get_complete_url(
        self,
        api_base: Optional[str],
+        api_key: Optional[str],
+        model: str,
+        optional_params: dict,
        litellm_params: dict,
+        stream: Optional[bool] = None,
    ) -> str:
        """
        OPTIONAL
@ -119,56 +122,6 @@ class BaseResponsesAPIConfig(ABC):
        """
        pass

-    #########################################################
-    ########## DELETE RESPONSE API TRANSFORMATION ##############
-    #########################################################
-    @abstractmethod
-    def transform_delete_response_api_request(
-        self,
-        response_id: str,
-        api_base: str,
-        litellm_params: GenericLiteLLMParams,
-        headers: dict,
-    ) -> Tuple[str, Dict]:
-        pass
-
-    @abstractmethod
-    def transform_delete_response_api_response(
-        self,
-        raw_response: httpx.Response,
-        logging_obj: LiteLLMLoggingObj,
-    ) -> DeleteResponseResult:
-        pass
-
-    #########################################################
-    ########## END DELETE RESPONSE API TRANSFORMATION #######
-    #########################################################
-
-    #########################################################
-    ########## GET RESPONSE API TRANSFORMATION ###############
-    #########################################################
-    @abstractmethod
-    def transform_get_response_api_request(
-        self,
-        response_id: str,
-        api_base: str,
-        litellm_params: GenericLiteLLMParams,
-        headers: dict,
-    ) -> Tuple[str, Dict]:
-        pass
-    
-    @abstractmethod
-    def transform_get_response_api_response(
-        self,
-        raw_response: httpx.Response,
-        logging_obj: LiteLLMLoggingObj,
-    ) -> ResponsesAPIResponse:
-        pass
-
-    #########################################################
-    ########## END GET RESPONSE API TRANSFORMATION ##########
-    #########################################################
-    
    def get_error_class(
        self, error_message: str, status_code: int, headers: Union[dict, httpx.Headers]
    ) -> BaseLLMException:
--- a/litellm/llms/bedrock/chat/converse_transformation.py
+++ b/litellm/llms/bedrock/chat/converse_transformation.py
@ -107,15 +107,6 @@ class AmazonConverseConfig(BaseConfig):
            "response_format",
        ]

-        if (
-            "arn" in model
-        ):  # we can't infer the model from the arn, so just add all params
-            supported_params.append("tools")
-            supported_params.append("tool_choice")
-            supported_params.append("thinking")
-            supported_params.append("reasoning_effort")
-            return supported_params
-
        ## Filter out 'cross-region' from model name
        base_model = BedrockModelInfo.get_base_model(model)

--- a/litellm/llms/custom_httpx/http_handler.py
+++ b/litellm/llms/custom_httpx/http_handler.py
@ -650,49 +650,6 @@ class HTTPHandler:
        except Exception as e:
            raise e

-    def delete(
-        self,
-        url: str,
-        data: Optional[Union[dict, str]] = None,  # type: ignore
-        json: Optional[dict] = None,
-        params: Optional[dict] = None,
-        headers: Optional[dict] = None,
-        timeout: Optional[Union[float, httpx.Timeout]] = None,
-        stream: bool = False,
-    ):
-        try:
-            if timeout is not None:
-                req = self.client.build_request(
-                    "DELETE", url, data=data, json=json, params=params, headers=headers, timeout=timeout  # type: ignore
-                )
-            else:
-                req = self.client.build_request(
-                    "DELETE", url, data=data, json=json, params=params, headers=headers  # type: ignore
-                )
-            response = self.client.send(req, stream=stream)
-            response.raise_for_status()
-            return response
-        except httpx.TimeoutException:
-            raise litellm.Timeout(
-                message=f"Connection timed out after {timeout} seconds.",
-                model="default-model-name",
-                llm_provider="litellm-httpx-handler",
-            )
-        except httpx.HTTPStatusError as e:
-            if stream is True:
-                setattr(e, "message", mask_sensitive_info(e.response.read()))
-                setattr(e, "text", mask_sensitive_info(e.response.read()))
-            else:
-                error_text = mask_sensitive_info(e.response.text)
-                setattr(e, "message", error_text)
-                setattr(e, "text", error_text)
-
-            setattr(e, "status_code", e.response.status_code)
-
-            raise e
-        except Exception as e:
-            raise e
-
    def __del__(self) -> None:
        try:
            self.close()
--- a/litellm/llms/custom_httpx/llm_http_handler.py
+++ b/litellm/llms/custom_httpx/llm_http_handler.py
@ -36,7 +36,6 @@ from litellm.types.llms.openai import (
    ResponsesAPIResponse,
 )
 from litellm.types.rerank import OptionalRerankParams, RerankResponse
-from litellm.types.responses.main import DeleteResponseResult
 from litellm.types.router import GenericLiteLLMParams
 from litellm.types.utils import EmbeddingResponse, FileTypes, TranscriptionResponse
 from litellm.utils import CustomStreamWrapper, ModelResponse, ProviderConfigManager
@ -1016,7 +1015,6 @@ class BaseLLMHTTPHandler:
        client: Optional[Union[HTTPHandler, AsyncHTTPHandler]] = None,
        _is_async: bool = False,
        fake_stream: bool = False,
-        litellm_metadata: Optional[Dict[str, Any]] = None,
    ) -> Union[
        ResponsesAPIResponse,
        BaseResponsesAPIStreamingIterator,
@ -1043,7 +1041,6 @@ class BaseLLMHTTPHandler:
                timeout=timeout,
                client=client if isinstance(client, AsyncHTTPHandler) else None,
                fake_stream=fake_stream,
-                litellm_metadata=litellm_metadata,
            )

        if client is None or not isinstance(client, HTTPHandler):
@ -1067,7 +1064,11 @@ class BaseLLMHTTPHandler:

        api_base = responses_api_provider_config.get_complete_url(
            api_base=litellm_params.api_base,
+            api_key=litellm_params.api_key,
+            model=model,
+            optional_params=response_api_optional_request_params,
            litellm_params=dict(litellm_params),
+            stream=stream,
        )

        data = responses_api_provider_config.transform_responses_api_request(
@ -1112,8 +1113,6 @@ class BaseLLMHTTPHandler:
                        model=model,
                        logging_obj=logging_obj,
                        responses_api_provider_config=responses_api_provider_config,
-                        litellm_metadata=litellm_metadata,
-                        custom_llm_provider=custom_llm_provider,
                    )

                return SyncResponsesAPIStreamingIterator(
@ -1121,8 +1120,6 @@ class BaseLLMHTTPHandler:
                    model=model,
                    logging_obj=logging_obj,
                    responses_api_provider_config=responses_api_provider_config,
-                    litellm_metadata=litellm_metadata,
-                    custom_llm_provider=custom_llm_provider,
                )
            else:
                # For non-streaming requests
@ -1159,7 +1156,6 @@ class BaseLLMHTTPHandler:
        timeout: Optional[Union[float, httpx.Timeout]] = None,
        client: Optional[Union[HTTPHandler, AsyncHTTPHandler]] = None,
        fake_stream: bool = False,
-        litellm_metadata: Optional[Dict[str, Any]] = None,
    ) -> Union[ResponsesAPIResponse, BaseResponsesAPIStreamingIterator]:
        """
        Async version of the responses API handler.
@ -1187,7 +1183,11 @@ class BaseLLMHTTPHandler:

        api_base = responses_api_provider_config.get_complete_url(
            api_base=litellm_params.api_base,
+            api_key=litellm_params.api_key,
+            model=model,
+            optional_params=response_api_optional_request_params,
            litellm_params=dict(litellm_params),
+            stream=stream,
        )

        data = responses_api_provider_config.transform_responses_api_request(
@ -1234,8 +1234,6 @@ class BaseLLMHTTPHandler:
                        model=model,
                        logging_obj=logging_obj,
                        responses_api_provider_config=responses_api_provider_config,
-                        litellm_metadata=litellm_metadata,
-                        custom_llm_provider=custom_llm_provider,
                    )

                # Return the streaming iterator
@ -1244,8 +1242,6 @@ class BaseLLMHTTPHandler:
                    model=model,
                    logging_obj=logging_obj,
                    responses_api_provider_config=responses_api_provider_config,
-                    litellm_metadata=litellm_metadata,
-                    custom_llm_provider=custom_llm_provider,
                )
            else:
                # For non-streaming, proceed as before
@ -1269,319 +1265,6 @@ class BaseLLMHTTPHandler:
            logging_obj=logging_obj,
        )

-    async def async_delete_response_api_handler(
-        self,
-        response_id: str,
-        responses_api_provider_config: BaseResponsesAPIConfig,
-        litellm_params: GenericLiteLLMParams,
-        logging_obj: LiteLLMLoggingObj,
-        custom_llm_provider: Optional[str],
-        extra_headers: Optional[Dict[str, Any]] = None,
-        extra_body: Optional[Dict[str, Any]] = None,
-        timeout: Optional[Union[float, httpx.Timeout]] = None,
-        client: Optional[Union[HTTPHandler, AsyncHTTPHandler]] = None,
-        _is_async: bool = False,
-    ) -> DeleteResponseResult:
-        """
-        Async version of the delete response API handler.
-        Uses async HTTP client to make requests.
-        """
-        if client is None or not isinstance(client, AsyncHTTPHandler):
-            async_httpx_client = get_async_httpx_client(
-                llm_provider=litellm.LlmProviders(custom_llm_provider),
-                params={"ssl_verify": litellm_params.get("ssl_verify", None)},
-            )
-        else:
-            async_httpx_client = client
-
-        headers = responses_api_provider_config.validate_environment(
-            api_key=litellm_params.api_key,
-            headers=extra_headers or {},
-            model="None",
-        )
-
-        if extra_headers:
-            headers.update(extra_headers)
-
-        api_base = responses_api_provider_config.get_complete_url(
-            api_base=litellm_params.api_base,
-            litellm_params=dict(litellm_params),
-        )
-
-        url, data = responses_api_provider_config.transform_delete_response_api_request(
-            response_id=response_id,
-            api_base=api_base,
-            litellm_params=litellm_params,
-            headers=headers,
-        )
-
-        ## LOGGING
-        logging_obj.pre_call(
-            input=input,
-            api_key="",
-            additional_args={
-                "complete_input_dict": data,
-                "api_base": api_base,
-                "headers": headers,
-            },
-        )
-
-        try:
-            response = await async_httpx_client.delete(
-                url=url, headers=headers, data=json.dumps(data), timeout=timeout
-            )
-
-        except Exception as e:
-            raise self._handle_error(
-                e=e,
-                provider_config=responses_api_provider_config,
-            )
-
-        return responses_api_provider_config.transform_delete_response_api_response(
-            raw_response=response,
-            logging_obj=logging_obj,
-        )
-
-    def delete_response_api_handler(
-        self,
-        response_id: str,
-        responses_api_provider_config: BaseResponsesAPIConfig,
-        litellm_params: GenericLiteLLMParams,
-        logging_obj: LiteLLMLoggingObj,
-        custom_llm_provider: Optional[str],
-        extra_headers: Optional[Dict[str, Any]] = None,
-        extra_body: Optional[Dict[str, Any]] = None,
-        timeout: Optional[Union[float, httpx.Timeout]] = None,
-        client: Optional[Union[HTTPHandler, AsyncHTTPHandler]] = None,
-        _is_async: bool = False,
-    ) -> Union[DeleteResponseResult, Coroutine[Any, Any, DeleteResponseResult]]:
-        """
-        Async version of the responses API handler.
-        Uses async HTTP client to make requests.
-        """
-        if _is_async:
-            return self.async_delete_response_api_handler(
-                response_id=response_id,
-                responses_api_provider_config=responses_api_provider_config,
-                litellm_params=litellm_params,
-                logging_obj=logging_obj,
-                custom_llm_provider=custom_llm_provider,
-                extra_headers=extra_headers,
-                extra_body=extra_body,
-                timeout=timeout,
-                client=client,
-            )
-        if client is None or not isinstance(client, HTTPHandler):
-            sync_httpx_client = _get_httpx_client(
-                params={"ssl_verify": litellm_params.get("ssl_verify", None)}
-            )
-        else:
-            sync_httpx_client = client
-
-        headers = responses_api_provider_config.validate_environment(
-            api_key=litellm_params.api_key,
-            headers=extra_headers or {},
-            model="None",
-        )
-
-        if extra_headers:
-            headers.update(extra_headers)
-
-        api_base = responses_api_provider_config.get_complete_url(
-            api_base=litellm_params.api_base,
-            litellm_params=dict(litellm_params),
-        )
-
-        url, data = responses_api_provider_config.transform_delete_response_api_request(
-            response_id=response_id,
-            api_base=api_base,
-            litellm_params=litellm_params,
-            headers=headers,
-        )
-
-        ## LOGGING
-        logging_obj.pre_call(
-            input=input,
-            api_key="",
-            additional_args={
-                "complete_input_dict": data,
-                "api_base": api_base,
-                "headers": headers,
-            },
-        )
-
-        try:
-            response = sync_httpx_client.delete(
-                url=url, headers=headers, data=json.dumps(data), timeout=timeout
-            )
-
-        except Exception as e:
-            raise self._handle_error(
-                e=e,
-                provider_config=responses_api_provider_config,
-            )
-
-        return responses_api_provider_config.transform_delete_response_api_response(
-            raw_response=response,
-            logging_obj=logging_obj,
-        )
-
-    def get_responses(
-        self,
-        response_id: str,
-        responses_api_provider_config: BaseResponsesAPIConfig,
-        litellm_params: GenericLiteLLMParams,
-        logging_obj: LiteLLMLoggingObj,
-        custom_llm_provider: Optional[str] = None,
-        extra_headers: Optional[Dict[str, Any]] = None,
-        extra_body: Optional[Dict[str, Any]] = None,
-        timeout: Optional[Union[float, httpx.Timeout]] = None,
-        client: Optional[Union[HTTPHandler, AsyncHTTPHandler]] = None,
-        _is_async: bool = False,
-    ) -> Union[ResponsesAPIResponse, Coroutine[Any, Any, ResponsesAPIResponse]]:
-        """
-        Get a response by ID
-        Uses GET /v1/responses/{response_id} endpoint in the responses API
-        """
-        if _is_async:
-            return self.async_get_responses(
-                response_id=response_id,
-                responses_api_provider_config=responses_api_provider_config,
-                litellm_params=litellm_params,
-                logging_obj=logging_obj,
-                custom_llm_provider=custom_llm_provider,
-                extra_headers=extra_headers,
-                extra_body=extra_body,
-                timeout=timeout,
-                client=client,
-            )
-        
-        if client is None or not isinstance(client, HTTPHandler):
-            sync_httpx_client = _get_httpx_client(
-                params={"ssl_verify": litellm_params.get("ssl_verify", None)}
-            )
-        else:
-            sync_httpx_client = client
-
-        headers = responses_api_provider_config.validate_environment(
-            api_key=litellm_params.api_key,
-            headers=extra_headers or {},
-            model="None",
-        )
-
-        if extra_headers:
-            headers.update(extra_headers)
-
-        api_base = responses_api_provider_config.get_complete_url(
-            api_base=litellm_params.api_base,
-            litellm_params=dict(litellm_params),
-        )
-
-        url, data = responses_api_provider_config.transform_get_response_api_request(
-            response_id=response_id,
-            api_base=api_base,
-            litellm_params=litellm_params,
-            headers=headers,
-        )
-
-        ## LOGGING
-        logging_obj.pre_call(
-            input="",
-            api_key="",
-            additional_args={
-                "complete_input_dict": data,
-                "api_base": api_base,
-                "headers": headers,
-            },
-        )
-
-        try:
-            response = sync_httpx_client.get(
-                url=url, headers=headers, params=data
-            )
-        except Exception as e:
-            raise self._handle_error(
-                e=e,
-                provider_config=responses_api_provider_config,
-            )
-
-        return responses_api_provider_config.transform_get_response_api_response(
-            raw_response=response,
-            logging_obj=logging_obj,
-        )
-
-    async def async_get_responses(
-        self,
-        response_id: str,
-        responses_api_provider_config: BaseResponsesAPIConfig,
-        litellm_params: GenericLiteLLMParams,
-        logging_obj: LiteLLMLoggingObj,
-        custom_llm_provider: Optional[str] = None,
-        extra_headers: Optional[Dict[str, Any]] = None,
-        extra_body: Optional[Dict[str, Any]] = None,
-        timeout: Optional[Union[float, httpx.Timeout]] = None,
-        client: Optional[Union[HTTPHandler, AsyncHTTPHandler]] = None,
-    ) -> ResponsesAPIResponse:
-        """
-        Async version of get_responses
-        """
-        if client is None or not isinstance(client, AsyncHTTPHandler):
-            async_httpx_client = get_async_httpx_client(
-                llm_provider=litellm.LlmProviders(custom_llm_provider),
-                params={"ssl_verify": litellm_params.get("ssl_verify", None)},
-            )
-        else:
-            async_httpx_client = client
-
-        headers = responses_api_provider_config.validate_environment(
-            api_key=litellm_params.api_key,
-            headers=extra_headers or {},
-            model="None",
-        )
-
-        if extra_headers:
-            headers.update(extra_headers)
-
-        api_base = responses_api_provider_config.get_complete_url(
-            api_base=litellm_params.api_base,
-            litellm_params=dict(litellm_params),
-        )
-
-        url, data = responses_api_provider_config.transform_get_response_api_request(
-            response_id=response_id,
-            api_base=api_base,
-            litellm_params=litellm_params,
-            headers=headers,
-        )
-
-        ## LOGGING
-        logging_obj.pre_call(
-            input="",
-            api_key="",
-            additional_args={
-                "complete_input_dict": data,
-                "api_base": api_base,
-                "headers": headers,
-            },
-        )
-
-        try:
-            response = await async_httpx_client.get(
-                url=url, headers=headers, params=data
-            )
-
-        except Exception as e:
-            verbose_logger.exception(f"Error retrieving response: {e}")
-            raise self._handle_error(
-                e=e,
-                provider_config=responses_api_provider_config,
-            )
-
-        return responses_api_provider_config.transform_get_response_api_response(
-            raw_response=response,
-            logging_obj=logging_obj,
-        )
-
    def create_file(
        self,
        create_file_data: CreateFileRequest,
--- a/litellm/llms/openai/responses/transformation.py
+++ b/litellm/llms/openai/responses/transformation.py
@ -7,7 +7,6 @@ from litellm._logging import verbose_logger
 from litellm.llms.base_llm.responses.transformation import BaseResponsesAPIConfig
 from litellm.secret_managers.main import get_secret_str
 from litellm.types.llms.openai import *
-from litellm.types.responses.main import *
 from litellm.types.router import GenericLiteLLMParams

 from ..common_utils import OpenAIError
@ -111,7 +110,11 @@ class OpenAIResponsesAPIConfig(BaseResponsesAPIConfig):
    def get_complete_url(
        self,
        api_base: Optional[str],
+        api_key: Optional[str],
+        model: str,
+        optional_params: dict,
        litellm_params: dict,
+        stream: Optional[bool] = None,
    ) -> str:
        """
        Get the endpoint for OpenAI responses API
@ -187,7 +190,7 @@ class OpenAIResponsesAPIConfig(BaseResponsesAPIConfig):

        model_class = event_models.get(cast(ResponsesAPIStreamEvents, event_type))
        if not model_class:
-            return GenericEvent
+            raise ValueError(f"Unknown event type: {event_type}")

        return model_class

@ -214,75 +217,3 @@ class OpenAIResponsesAPIConfig(BaseResponsesAPIConfig):
                    f"Error getting model info in OpenAIResponsesAPIConfig: {e}"
                )
        return False
-
-    #########################################################
-    ########## DELETE RESPONSE API TRANSFORMATION ##############
-    #########################################################
-    def transform_delete_response_api_request(
-        self,
-        response_id: str,
-        api_base: str,
-        litellm_params: GenericLiteLLMParams,
-        headers: dict,
-    ) -> Tuple[str, Dict]:
-        """
-        Transform the delete response API request into a URL and data
-
-        OpenAI API expects the following request
-        - DELETE /v1/responses/{response_id}
-        """
-        url = f"{api_base}/{response_id}"
-        data: Dict = {}
-        return url, data
-
-    def transform_delete_response_api_response(
-        self,
-        raw_response: httpx.Response,
-        logging_obj: LiteLLMLoggingObj,
-    ) -> DeleteResponseResult:
-        """
-        Transform the delete response API response into a DeleteResponseResult
-        """
-        try:
-            raw_response_json = raw_response.json()
-        except Exception:
-            raise OpenAIError(
-                message=raw_response.text, status_code=raw_response.status_code
-            )
-        return DeleteResponseResult(**raw_response_json)
-    
-    #########################################################
-    ########## GET RESPONSE API TRANSFORMATION ###############
-    #########################################################
-    def transform_get_response_api_request(
-        self,
-        response_id: str,
-        api_base: str,
-        litellm_params: GenericLiteLLMParams,
-        headers: dict,
-    ) -> Tuple[str, Dict]:
-        """
-        Transform the get response API request into a URL and data
-
-        OpenAI API expects the following request
-        - GET /v1/responses/{response_id}
-        """
-        url = f"{api_base}/{response_id}"
-        data: Dict = {}
-        return url, data
-    
-    def transform_get_response_api_response(
-        self,
-        raw_response: httpx.Response,
-        logging_obj: LiteLLMLoggingObj,
-    ) -> ResponsesAPIResponse:
-        """
-        Transform the get response API response into a ResponsesAPIResponse
-        """
-        try:
-            raw_response_json = raw_response.json()
-        except Exception:
-            raise OpenAIError(
-                message=raw_response.text, status_code=raw_response.status_code
-            )
-        return ResponsesAPIResponse(**raw_response_json)
--- a/litellm/llms/sagemaker/completion/transformation.py
+++ b/litellm/llms/sagemaker/completion/transformation.py
@ -37,7 +37,6 @@ class SagemakerConfig(BaseConfig):
    """

    max_new_tokens: Optional[int] = None
-    max_completion_tokens: Optional[int] = None
    top_p: Optional[float] = None
    temperature: Optional[float] = None
    return_full_text: Optional[bool] = None
@ -45,7 +44,6 @@ class SagemakerConfig(BaseConfig):
    def __init__(
        self,
        max_new_tokens: Optional[int] = None,
-        max_completion_tokens: Optional[int] = None,
        top_p: Optional[float] = None,
        temperature: Optional[float] = None,
        return_full_text: Optional[bool] = None,
@ -67,7 +65,7 @@ class SagemakerConfig(BaseConfig):
        )

    def get_supported_openai_params(self, model: str) -> List:
-        return ["stream", "temperature", "max_tokens", "max_completion_tokens", "top_p", "stop", "n"]
+        return ["stream", "temperature", "max_tokens", "top_p", "stop", "n"]

    def map_openai_params(
        self,
@ -104,8 +102,6 @@ class SagemakerConfig(BaseConfig):
                if value == 0:
                    value = 1
                optional_params["max_new_tokens"] = value
-            if param == "max_completion_tokens":
-                optional_params["max_new_tokens"] = value
        non_default_params.pop("aws_sagemaker_allow_zero_temp", None)
        return optional_params

--- a/litellm/main.py
+++ b/litellm/main.py
@ -182,7 +182,6 @@ from .types.llms.openai import (
    ChatCompletionPredictionContentParam,
    ChatCompletionUserMessage,
    HttpxBinaryResponseContent,
-    ImageGenerationRequestQuality,
 )
 from .types.utils import (
    LITELLM_IMAGE_VARIATION_PROVIDERS,
@ -2689,9 +2688,9 @@ def completion(  # type: ignore # noqa: PLR0915
                    "aws_region_name" not in optional_params
                    or optional_params["aws_region_name"] is None
                ):
-                    optional_params["aws_region_name"] = (
-                        aws_bedrock_client.meta.region_name
-                    )
+                    optional_params[
+                        "aws_region_name"
+                    ] = aws_bedrock_client.meta.region_name

            bedrock_route = BedrockModelInfo.get_bedrock_route(model)
            if bedrock_route == "converse":
@ -4413,9 +4412,9 @@ def adapter_completion(
    new_kwargs = translation_obj.translate_completion_input_params(kwargs=kwargs)

    response: Union[ModelResponse, CustomStreamWrapper] = completion(**new_kwargs)  # type: ignore
-    translated_response: Optional[Union[BaseModel, AdapterCompletionStreamWrapper]] = (
-        None
-    )
+    translated_response: Optional[
+        Union[BaseModel, AdapterCompletionStreamWrapper]
+    ] = None
    if isinstance(response, ModelResponse):
        translated_response = translation_obj.translate_completion_output_params(
            response=response
@ -4568,7 +4567,7 @@ def image_generation(  # noqa: PLR0915
    prompt: str,
    model: Optional[str] = None,
    n: Optional[int] = None,
-    quality: Optional[Union[str, ImageGenerationRequestQuality]] = None,
+    quality: Optional[str] = None,
    response_format: Optional[str] = None,
    size: Optional[str] = None,
    style: Optional[str] = None,
@ -5835,9 +5834,9 @@ def stream_chunk_builder(  # noqa: PLR0915
        ]

        if len(content_chunks) > 0:
-            response["choices"][0]["message"]["content"] = (
-                processor.get_combined_content(content_chunks)
-            )
+            response["choices"][0]["message"][
+                "content"
+            ] = processor.get_combined_content(content_chunks)

        reasoning_chunks = [
            chunk
@ -5848,9 +5847,9 @@ def stream_chunk_builder(  # noqa: PLR0915
        ]

        if len(reasoning_chunks) > 0:
-            response["choices"][0]["message"]["reasoning_content"] = (
-                processor.get_combined_reasoning_content(reasoning_chunks)
-            )
+            response["choices"][0]["message"][
+                "reasoning_content"
+            ] = processor.get_combined_reasoning_content(reasoning_chunks)

        audio_chunks = [
            chunk
--- a/litellm/model_prices_and_context_window_backup.json
+++ b/litellm/model_prices_and_context_window_backup.json
@ -1437,76 +1437,6 @@
        "output_cost_per_pixel": 0.0,
        "litellm_provider": "openai"
    },
-    "gpt-image-1": {
-        "mode": "image_generation",
-        "input_cost_per_pixel": 4.0054321e-8,
-        "output_cost_per_pixel": 0.0,
-        "litellm_provider": "openai",
-        "supported_endpoints": ["/v1/images/generations"]
-    },
-    "low/1024-x-1024/gpt-image-1": {
-        "mode": "image_generation",
-        "input_cost_per_pixel": 1.0490417e-8,
-        "output_cost_per_pixel": 0.0,
-        "litellm_provider": "openai",
-        "supported_endpoints": ["/v1/images/generations"]
-    },
-    "medium/1024-x-1024/gpt-image-1": {
-        "mode": "image_generation",
-        "input_cost_per_pixel": 4.0054321e-8,
-        "output_cost_per_pixel": 0.0,
-        "litellm_provider": "openai",
-        "supported_endpoints": ["/v1/images/generations"]
-    },
-    "high/1024-x-1024/gpt-image-1": {
-        "mode": "image_generation",
-        "input_cost_per_pixel": 1.59263611e-7,
-        "output_cost_per_pixel": 0.0,
-        "litellm_provider": "openai",
-        "supported_endpoints": ["/v1/images/generations"]
-    },
-    "low/1024-x-1536/gpt-image-1": {
-        "mode": "image_generation",
-        "input_cost_per_pixel": 1.0172526e-8,
-        "output_cost_per_pixel": 0.0,
-        "litellm_provider": "openai",
-        "supported_endpoints": ["/v1/images/generations"]
-    },
-    "medium/1024-x-1536/gpt-image-1": {
-        "mode": "image_generation",
-        "input_cost_per_pixel": 4.0054321e-8,
-        "output_cost_per_pixel": 0.0,
-        "litellm_provider": "openai",
-        "supported_endpoints": ["/v1/images/generations"]
-    },
-    "high/1024-x-1536/gpt-image-1": {
-        "mode": "image_generation",
-        "input_cost_per_pixel": 1.58945719e-7,
-        "output_cost_per_pixel": 0.0,
-        "litellm_provider": "openai",
-        "supported_endpoints": ["/v1/images/generations"]
-    },
-    "low/1536-x-1024/gpt-image-1": {
-        "mode": "image_generation",
-        "input_cost_per_pixel": 1.0172526e-8,
-        "output_cost_per_pixel": 0.0,
-        "litellm_provider": "openai",
-        "supported_endpoints": ["/v1/images/generations"]
-    },
-    "medium/1536-x-1024/gpt-image-1": {
-        "mode": "image_generation",
-        "input_cost_per_pixel": 4.0054321e-8,
-        "output_cost_per_pixel": 0.0,
-        "litellm_provider": "openai",
-        "supported_endpoints": ["/v1/images/generations"]
-    },
-    "high/1536-x-1024/gpt-image-1": {
-        "mode": "image_generation",
-        "input_cost_per_pixel": 1.58945719e-7,
-        "output_cost_per_pixel": 0.0,
-        "litellm_provider": "openai",
-        "supported_endpoints": ["/v1/images/generations"]
-    },
    "gpt-4o-transcribe": {
        "mode": "audio_transcription",
        "input_cost_per_token": 0.0000025,
@ -1560,6 +1490,7 @@
        "supports_prompt_caching": false,
        "supports_system_messages": true,
        "supports_tool_choice": true,
+        "supports_native_streaming": false,
        "supports_reasoning": true
    },
    "azure/gpt-4o-audio-preview-2024-12-17": {
@ -7058,17 +6989,6 @@
        "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models",
        "supports_tool_choice": true
    },
-    "command-a-03-2025": {
-        "max_tokens": 8000,
-        "max_input_tokens": 256000,
-        "max_output_tokens": 8000,
-        "input_cost_per_token": 0.0000025,
-        "output_cost_per_token": 0.00001,
-        "litellm_provider": "cohere_chat",
-        "mode": "chat",
-        "supports_function_calling": true,
-        "supports_tool_choice": true
-    },
    "command-r": {
        "max_tokens": 4096,
        "max_input_tokens": 128000,
--- a/litellm/openai-responses-starter-app
+++ b/litellm/openai-responses-starter-app
@ -0,0 +1 @@
+Subproject commit bf0485467c343957ba5c217db777f407b2e65453
--- a/litellm/proxy/_experimental/out/_next/static/FPIQgzUY81b7nl8zNun4_/_buildManifest.js
+++ b/litellm/proxy/_experimental/out/_next/static/FPIQgzUY81b7nl8zNun4_/_buildManifest.js
--- a/litellm/proxy/_experimental/out/_next/static/FPIQgzUY81b7nl8zNun4_/_ssgManifest.js
+++ b/litellm/proxy/_experimental/out/_next/static/FPIQgzUY81b7nl8zNun4_/_ssgManifest.js
--- a/litellm/proxy/_experimental/out/_next/static/chunks/117-87ec698bfca6820e.js
+++ b/litellm/proxy/_experimental/out/_next/static/chunks/117-87ec698bfca6820e.js
--- a/litellm/proxy/_experimental/out/_next/static/chunks/250-a927a558002d8fb9.js
+++ b/litellm/proxy/_experimental/out/_next/static/chunks/250-a927a558002d8fb9.js
--- a/litellm/proxy/_experimental/out/_next/static/chunks/250-e4cc2ceb9ff1c37a.js
+++ b/litellm/proxy/_experimental/out/_next/static/chunks/250-e4cc2ceb9ff1c37a.js
--- a/litellm/proxy/_experimental/out/_next/static/chunks/261-57d48f76eec1e568.js
+++ b/litellm/proxy/_experimental/out/_next/static/chunks/261-57d48f76eec1e568.js
--- a/litellm/proxy/_experimental/out/_next/static/chunks/3014691f-0b72c78cfebbd712.js
+++ b/litellm/proxy/_experimental/out/_next/static/chunks/3014691f-0b72c78cfebbd712.js
--- a/litellm/proxy/_experimental/out/_next/static/chunks/42-014374badc35fe9b.js
+++ b/litellm/proxy/_experimental/out/_next/static/chunks/42-014374badc35fe9b.js
--- a/litellm/proxy/_experimental/out/_next/static/chunks/42-59f99bfbf676f282.js
+++ b/litellm/proxy/_experimental/out/_next/static/chunks/42-59f99bfbf676f282.js
--- a/litellm/proxy/_experimental/out/_next/static/chunks/699-99a8a36b70ac90c1.js
+++ b/litellm/proxy/_experimental/out/_next/static/chunks/699-99a8a36b70ac90c1.js
--- a/litellm/proxy/_experimental/out/_next/static/chunks/860-c1d8f124df444312.js
+++ b/litellm/proxy/_experimental/out/_next/static/chunks/860-c1d8f124df444312.js
--- a/litellm/proxy/_experimental/out/_next/static/chunks/899-9af4feaf6f21839c.js
+++ b/litellm/proxy/_experimental/out/_next/static/chunks/899-9af4feaf6f21839c.js
--- a/litellm/proxy/_experimental/out/_next/static/chunks/978-3e0bd2034b623309.js
+++ b/litellm/proxy/_experimental/out/_next/static/chunks/978-3e0bd2034b623309.js
--- a/litellm/proxy/_experimental/out/_next/static/chunks/app/_not-found/page-8311f948357d161e.js
+++ b/litellm/proxy/_experimental/out/_next/static/chunks/app/_not-found/page-8311f948357d161e.js
--- a/litellm/proxy/_experimental/out/_next/static/chunks/app/model_hub/page-cde2fb783e81a6c1.js
+++ b/litellm/proxy/_experimental/out/_next/static/chunks/app/model_hub/page-cde2fb783e81a6c1.js
--- a/litellm/proxy/_experimental/out/_next/static/chunks/app/onboarding/page-4f4c436bd23d48a0.js
+++ b/litellm/proxy/_experimental/out/_next/static/chunks/app/onboarding/page-4f4c436bd23d48a0.js
--- a/litellm/proxy/_experimental/out/_next/static/chunks/app/page-8f2fcc2af91a32fd.js
+++ b/litellm/proxy/_experimental/out/_next/static/chunks/app/page-8f2fcc2af91a32fd.js
--- a/litellm/proxy/_experimental/out/_next/static/chunks/app/page-9bd76bfe1ce0a80a.js
+++ b/litellm/proxy/_experimental/out/_next/static/chunks/app/page-9bd76bfe1ce0a80a.js
--- a/litellm/proxy/_experimental/out/_next/static/chunks/fd9d1056-524b80e1a6b8bb06.js
+++ b/litellm/proxy/_experimental/out/_next/static/chunks/fd9d1056-524b80e1a6b8bb06.js
--- a/litellm/proxy/_experimental/out/_next/static/css/005c96178151b9fd.css
+++ b/litellm/proxy/_experimental/out/_next/static/css/005c96178151b9fd.css
--- a/litellm/proxy/_experimental/out/_next/static/css/3da1b0cfa7d4e161.css
+++ b/litellm/proxy/_experimental/out/_next/static/css/3da1b0cfa7d4e161.css
--- a/litellm/proxy/_experimental/out/index.html
+++ b/litellm/proxy/_experimental/out/index.html
@ -1 +1 @@
-<!DOCTYPE html><html id="__next_error__"><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width, initial-scale=1"/><link rel="preload" as="script" fetchPriority="low" href="/ui/_next/static/chunks/webpack-75a5453f51d60261.js"/><script src="/ui/_next/static/chunks/fd9d1056-205af899b895cbac.js" async=""></script><script src="/ui/_next/static/chunks/117-1c5bfc45bfc4237d.js" async=""></script><script src="/ui/_next/static/chunks/main-app-4f7318ae681a6d94.js" async=""></script><title>LiteLLM Dashboard</title><meta name="description" content="LiteLLM Proxy Admin UI"/><link rel="icon" href="/ui/favicon.ico" type="image/x-icon" sizes="16x16"/><meta name="next-size-adjust"/><script src="/ui/_next/static/chunks/polyfills-42372ed130431b0a.js" noModule=""></script></head><body><script src="/ui/_next/static/chunks/webpack-75a5453f51d60261.js" async=""></script><script>(self.__next_f=self.__next_f||[]).push([0]);self.__next_f.push([2,null])</script><script>self.__next_f.push([1,"1:HL[\"/ui/_next/static/media/a34f9d1faa5f3315-s.p.woff2\",\"font\",{\"crossOrigin\":\"\",\"type\":\"font/woff2\"}]\n2:HL[\"/ui/_next/static/css/86f6cc749f6b8493.css\",\"style\"]\n3:HL[\"/ui/_next/static/css/005c96178151b9fd.css\",\"style\"]\n"])</script><script>self.__next_f.push([1,"4:I[12846,[],\"\"]\n6:I[19107,[],\"ClientPageRoot\"]\n7:I[69451,[\"665\",\"static/chunks/3014691f-b7b79b78e27792f3.js\",\"990\",\"static/chunks/13b76428-ebdf3012af0e4489.js\",\"42\",\"static/chunks/42-014374badc35fe9b.js\",\"261\",\"static/chunks/261-92d8946249b3296e.js\",\"899\",\"static/chunks/899-54ea329f41297bf0.js\",\"978\",\"static/chunks/978-3e0bd2034b623309.js\",\"250\",\"static/chunks/250-e4cc2ceb9ff1c37a.js\",\"699\",\"static/chunks/699-f4066c747670f979.js\",\"931\",\"static/chunks/app/page-9bd76bfe1ce0a80a.js\"],\"default\",1]\n8:I[4707,[],\"\"]\n9:I[36423,[],\"\"]\nb:I[61060,[],\"\"]\nc:[]\n"])</script><script>self.__next_f.push([1,"0:[\"$\",\"$L4\",null,{\"buildId\":\"sh3mKTgIKifNl8lsgZ675\",\"assetPrefix\":\"/ui\",\"urlParts\":[\"\",\"\"],\"initialTree\":[\"\",{\"children\":[\"__PAGE__\",{}]},\"$undefined\",\"$undefined\",true],\"initialSeedData\":[\"\",{\"children\":[\"__PAGE__\",{},[[\"$L5\",[\"$\",\"$L6\",null,{\"props\":{\"params\":{},\"searchParams\":{}},\"Component\":\"$7\"}],null],null],null]},[[[[\"$\",\"link\",\"0\",{\"rel\":\"stylesheet\",\"href\":\"/ui/_next/static/css/86f6cc749f6b8493.css\",\"precedence\":\"next\",\"crossOrigin\":\"$undefined\"}],[\"$\",\"link\",\"1\",{\"rel\":\"stylesheet\",\"href\":\"/ui/_next/static/css/005c96178151b9fd.css\",\"precedence\":\"next\",\"crossOrigin\":\"$undefined\"}]],[\"$\",\"html\",null,{\"lang\":\"en\",\"children\":[\"$\",\"body\",null,{\"className\":\"__className_cf7686\",\"children\":[\"$\",\"$L8\",null,{\"parallelRouterKey\":\"children\",\"segmentPath\":[\"children\"],\"error\":\"$undefined\",\"errorStyles\":\"$undefined\",\"errorScripts\":\"$undefined\",\"template\":[\"$\",\"$L9\",null,{}],\"templateStyles\":\"$undefined\",\"templateScripts\":\"$undefined\",\"notFound\":[[\"$\",\"title\",null,{\"children\":\"404: This page could not be found.\"}],[\"$\",\"div\",null,{\"style\":{\"fontFamily\":\"system-ui,\\\"Segoe UI\\\",Roboto,Helvetica,Arial,sans-serif,\\\"Apple Color Emoji\\\",\\\"Segoe UI Emoji\\\"\",\"height\":\"100vh\",\"textAlign\":\"center\",\"display\":\"flex\",\"flexDirection\":\"column\",\"alignItems\":\"center\",\"justifyContent\":\"center\"},\"children\":[\"$\",\"div\",null,{\"children\":[[\"$\",\"style\",null,{\"dangerouslySetInnerHTML\":{\"__html\":\"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}\"}}],[\"$\",\"h1\",null,{\"className\":\"next-error-h1\",\"style\":{\"display\":\"inline-block\",\"margin\":\"0 20px 0 0\",\"padding\":\"0 23px 0 0\",\"fontSize\":24,\"fontWeight\":500,\"verticalAlign\":\"top\",\"lineHeight\":\"49px\"},\"children\":\"404\"}],[\"$\",\"div\",null,{\"style\":{\"display\":\"inline-block\"},\"children\":[\"$\",\"h2\",null,{\"style\":{\"fontSize\":14,\"fontWeight\":400,\"lineHeight\":\"49px\",\"margin\":0},\"children\":\"This page could not be found.\"}]}]]}]}]],\"notFoundStyles\":[]}]}]}]],null],null],\"couldBeIntercepted\":false,\"initialHead\":[null,\"$La\"],\"globalErrorComponent\":\"$b\",\"missingSlots\":\"$Wc\"}]\n"])</script><script>self.__next_f.push([1,"a:[[\"$\",\"meta\",\"0\",{\"name\":\"viewport\",\"content\":\"width=device-width, initial-scale=1\"}],[\"$\",\"meta\",\"1\",{\"charSet\":\"utf-8\"}],[\"$\",\"title\",\"2\",{\"children\":\"LiteLLM Dashboard\"}],[\"$\",\"meta\",\"3\",{\"name\":\"description\",\"content\":\"LiteLLM Proxy Admin UI\"}],[\"$\",\"link\",\"4\",{\"rel\":\"icon\",\"href\":\"/ui/favicon.ico\",\"type\":\"image/x-icon\",\"sizes\":\"16x16\"}],[\"$\",\"meta\",\"5\",{\"name\":\"next-size-adjust\"}]]\n5:null\n"])</script></body></html>
+<!DOCTYPE html><html id="__next_error__"><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width, initial-scale=1"/><link rel="preload" as="script" fetchPriority="low" href="/ui/_next/static/chunks/webpack-75a5453f51d60261.js"/><script src="/ui/_next/static/chunks/fd9d1056-524b80e1a6b8bb06.js" async=""></script><script src="/ui/_next/static/chunks/117-87ec698bfca6820e.js" async=""></script><script src="/ui/_next/static/chunks/main-app-4f7318ae681a6d94.js" async=""></script><title>LiteLLM Dashboard</title><meta name="description" content="LiteLLM Proxy Admin UI"/><link rel="icon" href="/ui/favicon.ico" type="image/x-icon" sizes="16x16"/><meta name="next-size-adjust"/><script src="/ui/_next/static/chunks/polyfills-42372ed130431b0a.js" noModule=""></script></head><body><script src="/ui/_next/static/chunks/webpack-75a5453f51d60261.js" async=""></script><script>(self.__next_f=self.__next_f||[]).push([0]);self.__next_f.push([2,null])</script><script>self.__next_f.push([1,"1:HL[\"/ui/_next/static/media/a34f9d1faa5f3315-s.p.woff2\",\"font\",{\"crossOrigin\":\"\",\"type\":\"font/woff2\"}]\n2:HL[\"/ui/_next/static/css/86f6cc749f6b8493.css\",\"style\"]\n3:HL[\"/ui/_next/static/css/3da1b0cfa7d4e161.css\",\"style\"]\n"])</script><script>self.__next_f.push([1,"4:I[12846,[],\"\"]\n6:I[19107,[],\"ClientPageRoot\"]\n7:I[25762,[\"665\",\"static/chunks/3014691f-0b72c78cfebbd712.js\",\"990\",\"static/chunks/13b76428-ebdf3012af0e4489.js\",\"42\",\"static/chunks/42-59f99bfbf676f282.js\",\"261\",\"static/chunks/261-57d48f76eec1e568.js\",\"899\",\"static/chunks/899-9af4feaf6f21839c.js\",\"860\",\"static/chunks/860-c1d8f124df444312.js\",\"250\",\"static/chunks/250-a927a558002d8fb9.js\",\"699\",\"static/chunks/699-99a8a36b70ac90c1.js\",\"931\",\"static/chunks/app/page-8f2fcc2af91a32fd.js\"],\"default\",1]\n8:I[4707,[],\"\"]\n9:I[36423,[],\"\"]\nb:I[61060,[],\"\"]\nc:[]\n"])</script><script>self.__next_f.push([1,"0:[\"$\",\"$L4\",null,{\"buildId\":\"FPIQgzUY81b7nl8zNun4_\",\"assetPrefix\":\"/ui\",\"urlParts\":[\"\",\"\"],\"initialTree\":[\"\",{\"children\":[\"__PAGE__\",{}]},\"$undefined\",\"$undefined\",true],\"initialSeedData\":[\"\",{\"children\":[\"__PAGE__\",{},[[\"$L5\",[\"$\",\"$L6\",null,{\"props\":{\"params\":{},\"searchParams\":{}},\"Component\":\"$7\"}],null],null],null]},[[[[\"$\",\"link\",\"0\",{\"rel\":\"stylesheet\",\"href\":\"/ui/_next/static/css/86f6cc749f6b8493.css\",\"precedence\":\"next\",\"crossOrigin\":\"$undefined\"}],[\"$\",\"link\",\"1\",{\"rel\":\"stylesheet\",\"href\":\"/ui/_next/static/css/3da1b0cfa7d4e161.css\",\"precedence\":\"next\",\"crossOrigin\":\"$undefined\"}]],[\"$\",\"html\",null,{\"lang\":\"en\",\"children\":[\"$\",\"body\",null,{\"className\":\"__className_cf7686\",\"children\":[\"$\",\"$L8\",null,{\"parallelRouterKey\":\"children\",\"segmentPath\":[\"children\"],\"error\":\"$undefined\",\"errorStyles\":\"$undefined\",\"errorScripts\":\"$undefined\",\"template\":[\"$\",\"$L9\",null,{}],\"templateStyles\":\"$undefined\",\"templateScripts\":\"$undefined\",\"notFound\":[[\"$\",\"title\",null,{\"children\":\"404: This page could not be found.\"}],[\"$\",\"div\",null,{\"style\":{\"fontFamily\":\"system-ui,\\\"Segoe UI\\\",Roboto,Helvetica,Arial,sans-serif,\\\"Apple Color Emoji\\\",\\\"Segoe UI Emoji\\\"\",\"height\":\"100vh\",\"textAlign\":\"center\",\"display\":\"flex\",\"flexDirection\":\"column\",\"alignItems\":\"center\",\"justifyContent\":\"center\"},\"children\":[\"$\",\"div\",null,{\"children\":[[\"$\",\"style\",null,{\"dangerouslySetInnerHTML\":{\"__html\":\"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}\"}}],[\"$\",\"h1\",null,{\"className\":\"next-error-h1\",\"style\":{\"display\":\"inline-block\",\"margin\":\"0 20px 0 0\",\"padding\":\"0 23px 0 0\",\"fontSize\":24,\"fontWeight\":500,\"verticalAlign\":\"top\",\"lineHeight\":\"49px\"},\"children\":\"404\"}],[\"$\",\"div\",null,{\"style\":{\"display\":\"inline-block\"},\"children\":[\"$\",\"h2\",null,{\"style\":{\"fontSize\":14,\"fontWeight\":400,\"lineHeight\":\"49px\",\"margin\":0},\"children\":\"This page could not be found.\"}]}]]}]}]],\"notFoundStyles\":[]}]}]}]],null],null],\"couldBeIntercepted\":false,\"initialHead\":[null,\"$La\"],\"globalErrorComponent\":\"$b\",\"missingSlots\":\"$Wc\"}]\n"])</script><script>self.__next_f.push([1,"a:[[\"$\",\"meta\",\"0\",{\"name\":\"viewport\",\"content\":\"width=device-width, initial-scale=1\"}],[\"$\",\"meta\",\"1\",{\"charSet\":\"utf-8\"}],[\"$\",\"title\",\"2\",{\"children\":\"LiteLLM Dashboard\"}],[\"$\",\"meta\",\"3\",{\"name\":\"description\",\"content\":\"LiteLLM Proxy Admin UI\"}],[\"$\",\"link\",\"4\",{\"rel\":\"icon\",\"href\":\"/ui/favicon.ico\",\"type\":\"image/x-icon\",\"sizes\":\"16x16\"}],[\"$\",\"meta\",\"5\",{\"name\":\"next-size-adjust\"}]]\n5:null\n"])</script></body></html>
--- a/litellm/proxy/_experimental/out/index.txt
+++ b/litellm/proxy/_experimental/out/index.txt
@ -1,7 +1,7 @@
 2:I[19107,[],"ClientPageRoot"]
-3:I[69451,["665","static/chunks/3014691f-b7b79b78e27792f3.js","990","static/chunks/13b76428-ebdf3012af0e4489.js","42","static/chunks/42-014374badc35fe9b.js","261","static/chunks/261-92d8946249b3296e.js","899","static/chunks/899-54ea329f41297bf0.js","978","static/chunks/978-3e0bd2034b623309.js","250","static/chunks/250-e4cc2ceb9ff1c37a.js","699","static/chunks/699-f4066c747670f979.js","931","static/chunks/app/page-9bd76bfe1ce0a80a.js"],"default",1]
+3:I[25762,["665","static/chunks/3014691f-0b72c78cfebbd712.js","990","static/chunks/13b76428-ebdf3012af0e4489.js","42","static/chunks/42-59f99bfbf676f282.js","261","static/chunks/261-57d48f76eec1e568.js","899","static/chunks/899-9af4feaf6f21839c.js","860","static/chunks/860-c1d8f124df444312.js","250","static/chunks/250-a927a558002d8fb9.js","699","static/chunks/699-99a8a36b70ac90c1.js","931","static/chunks/app/page-8f2fcc2af91a32fd.js"],"default",1]
 4:I[4707,[],""]
 5:I[36423,[],""]
-0:["sh3mKTgIKifNl8lsgZ675",[[["",{"children":["__PAGE__",{}]},"$undefined","$undefined",true],["",{"children":["__PAGE__",{},[["$L1",["$","$L2",null,{"props":{"params":{},"searchParams":{}},"Component":"$3"}],null],null],null]},[[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/86f6cc749f6b8493.css","precedence":"next","crossOrigin":"$undefined"}],["$","link","1",{"rel":"stylesheet","href":"/ui/_next/static/css/005c96178151b9fd.css","precedence":"next","crossOrigin":"$undefined"}]],["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_cf7686","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[]}]}]}]],null],null],["$L6",null]]]]
+0:["FPIQgzUY81b7nl8zNun4_",[[["",{"children":["__PAGE__",{}]},"$undefined","$undefined",true],["",{"children":["__PAGE__",{},[["$L1",["$","$L2",null,{"props":{"params":{},"searchParams":{}},"Component":"$3"}],null],null],null]},[[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/86f6cc749f6b8493.css","precedence":"next","crossOrigin":"$undefined"}],["$","link","1",{"rel":"stylesheet","href":"/ui/_next/static/css/3da1b0cfa7d4e161.css","precedence":"next","crossOrigin":"$undefined"}]],["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_cf7686","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[]}]}]}]],null],null],["$L6",null]]]]
 6:[["$","meta","0",{"name":"viewport","content":"width=device-width, initial-scale=1"}],["$","meta","1",{"charSet":"utf-8"}],["$","title","2",{"children":"LiteLLM Dashboard"}],["$","meta","3",{"name":"description","content":"LiteLLM Proxy Admin UI"}],["$","link","4",{"rel":"icon","href":"/ui/favicon.ico","type":"image/x-icon","sizes":"16x16"}],["$","meta","5",{"name":"next-size-adjust"}]]
 1:null
--- a/litellm/proxy/_experimental/out/model_hub.txt
+++ b/litellm/proxy/_experimental/out/model_hub.txt
@ -1,7 +1,7 @@
 2:I[19107,[],"ClientPageRoot"]
-3:I[52829,["42","static/chunks/42-014374badc35fe9b.js","261","static/chunks/261-92d8946249b3296e.js","250","static/chunks/250-e4cc2ceb9ff1c37a.js","699","static/chunks/699-f4066c747670f979.js","418","static/chunks/app/model_hub/page-3d2c374ee41b38e5.js"],"default",1]
+3:I[52829,["42","static/chunks/42-59f99bfbf676f282.js","261","static/chunks/261-57d48f76eec1e568.js","250","static/chunks/250-a927a558002d8fb9.js","699","static/chunks/699-99a8a36b70ac90c1.js","418","static/chunks/app/model_hub/page-cde2fb783e81a6c1.js"],"default",1]
 4:I[4707,[],""]
 5:I[36423,[],""]
-0:["sh3mKTgIKifNl8lsgZ675",[[["",{"children":["model_hub",{"children":["__PAGE__",{}]}]},"$undefined","$undefined",true],["",{"children":["model_hub",{"children":["__PAGE__",{},[["$L1",["$","$L2",null,{"props":{"params":{},"searchParams":{}},"Component":"$3"}],null],null],null]},[null,["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children","model_hub","children"],"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":"$undefined","notFoundStyles":"$undefined"}]],null]},[[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/86f6cc749f6b8493.css","precedence":"next","crossOrigin":"$undefined"}],["$","link","1",{"rel":"stylesheet","href":"/ui/_next/static/css/005c96178151b9fd.css","precedence":"next","crossOrigin":"$undefined"}]],["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_cf7686","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[]}]}]}]],null],null],["$L6",null]]]]
+0:["FPIQgzUY81b7nl8zNun4_",[[["",{"children":["model_hub",{"children":["__PAGE__",{}]}]},"$undefined","$undefined",true],["",{"children":["model_hub",{"children":["__PAGE__",{},[["$L1",["$","$L2",null,{"props":{"params":{},"searchParams":{}},"Component":"$3"}],null],null],null]},[null,["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children","model_hub","children"],"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":"$undefined","notFoundStyles":"$undefined"}]],null]},[[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/86f6cc749f6b8493.css","precedence":"next","crossOrigin":"$undefined"}],["$","link","1",{"rel":"stylesheet","href":"/ui/_next/static/css/3da1b0cfa7d4e161.css","precedence":"next","crossOrigin":"$undefined"}]],["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_cf7686","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[]}]}]}]],null],null],["$L6",null]]]]
 6:[["$","meta","0",{"name":"viewport","content":"width=device-width, initial-scale=1"}],["$","meta","1",{"charSet":"utf-8"}],["$","title","2",{"children":"LiteLLM Dashboard"}],["$","meta","3",{"name":"description","content":"LiteLLM Proxy Admin UI"}],["$","link","4",{"rel":"icon","href":"/ui/favicon.ico","type":"image/x-icon","sizes":"16x16"}],["$","meta","5",{"name":"next-size-adjust"}]]
 1:null
--- a/litellm/proxy/_experimental/out/onboarding.html
+++ b/litellm/proxy/_experimental/out/onboarding.html
--- a/litellm/proxy/_experimental/out/onboarding.txt
+++ b/litellm/proxy/_experimental/out/onboarding.txt
@ -1,7 +1,7 @@
 2:I[19107,[],"ClientPageRoot"]
-3:I[12011,["665","static/chunks/3014691f-b7b79b78e27792f3.js","42","static/chunks/42-014374badc35fe9b.js","899","static/chunks/899-54ea329f41297bf0.js","250","static/chunks/250-e4cc2ceb9ff1c37a.js","461","static/chunks/app/onboarding/page-4809c2f644098f19.js"],"default",1]
+3:I[12011,["665","static/chunks/3014691f-0b72c78cfebbd712.js","42","static/chunks/42-59f99bfbf676f282.js","899","static/chunks/899-9af4feaf6f21839c.js","250","static/chunks/250-a927a558002d8fb9.js","461","static/chunks/app/onboarding/page-4f4c436bd23d48a0.js"],"default",1]
 4:I[4707,[],""]
 5:I[36423,[],""]
-0:["sh3mKTgIKifNl8lsgZ675",[[["",{"children":["onboarding",{"children":["__PAGE__",{}]}]},"$undefined","$undefined",true],["",{"children":["onboarding",{"children":["__PAGE__",{},[["$L1",["$","$L2",null,{"props":{"params":{},"searchParams":{}},"Component":"$3"}],null],null],null]},[null,["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children","onboarding","children"],"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":"$undefined","notFoundStyles":"$undefined"}]],null]},[[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/86f6cc749f6b8493.css","precedence":"next","crossOrigin":"$undefined"}],["$","link","1",{"rel":"stylesheet","href":"/ui/_next/static/css/005c96178151b9fd.css","precedence":"next","crossOrigin":"$undefined"}]],["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_cf7686","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[]}]}]}]],null],null],["$L6",null]]]]
+0:["FPIQgzUY81b7nl8zNun4_",[[["",{"children":["onboarding",{"children":["__PAGE__",{}]}]},"$undefined","$undefined",true],["",{"children":["onboarding",{"children":["__PAGE__",{},[["$L1",["$","$L2",null,{"props":{"params":{},"searchParams":{}},"Component":"$3"}],null],null],null]},[null,["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children","onboarding","children"],"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":"$undefined","notFoundStyles":"$undefined"}]],null]},[[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/86f6cc749f6b8493.css","precedence":"next","crossOrigin":"$undefined"}],["$","link","1",{"rel":"stylesheet","href":"/ui/_next/static/css/3da1b0cfa7d4e161.css","precedence":"next","crossOrigin":"$undefined"}]],["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_cf7686","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[]}]}]}]],null],null],["$L6",null]]]]
 6:[["$","meta","0",{"name":"viewport","content":"width=device-width, initial-scale=1"}],["$","meta","1",{"charSet":"utf-8"}],["$","title","2",{"children":"LiteLLM Dashboard"}],["$","meta","3",{"name":"description","content":"LiteLLM Proxy Admin UI"}],["$","link","4",{"rel":"icon","href":"/ui/favicon.ico","type":"image/x-icon","sizes":"16x16"}],["$","meta","5",{"name":"next-size-adjust"}]]
 1:null
--- a/litellm/proxy/_new_secret_config.yaml
+++ b/litellm/proxy/_new_secret_config.yaml
@ -33,7 +33,7 @@ model_list:

 litellm_settings:
  num_retries: 0
-  callbacks: ["datadog_llm_observability"]
+  callbacks: ["prometheus"]
  check_provider_endpoint: true

 files_settings:
--- a/litellm/proxy/_types.py
+++ b/litellm/proxy/_types.py
@ -687,8 +687,6 @@ class GenerateKeyResponse(KeyRequestBase):
    token: Optional[str] = None
    created_by: Optional[str] = None
    updated_by: Optional[str] = None
-    created_at: Optional[datetime] = None
-    updated_at: Optional[datetime] = None

    @model_validator(mode="before")
    @classmethod
--- a/litellm/proxy/common_request_processing.py
+++ b/litellm/proxy/common_request_processing.py
@ -108,13 +108,7 @@ class ProxyBaseLLMRequestProcessing:
        user_api_key_dict: UserAPIKeyAuth,
        proxy_logging_obj: ProxyLogging,
        proxy_config: ProxyConfig,
-        route_type: Literal[
-            "acompletion",
-            "aresponses",
-            "_arealtime",
-            "aget_responses",
-            "adelete_responses",
-        ],
+        route_type: Literal["acompletion", "aresponses", "_arealtime"],
        version: Optional[str] = None,
        user_model: Optional[str] = None,
        user_temperature: Optional[float] = None,
@ -184,13 +178,7 @@ class ProxyBaseLLMRequestProcessing:
        request: Request,
        fastapi_response: Response,
        user_api_key_dict: UserAPIKeyAuth,
-        route_type: Literal[
-            "acompletion",
-            "aresponses",
-            "_arealtime",
-            "aget_responses",
-            "adelete_responses",
-        ],
+        route_type: Literal["acompletion", "aresponses", "_arealtime"],
        proxy_logging_obj: ProxyLogging,
        general_settings: dict,
        proxy_config: ProxyConfig,
--- a/litellm/proxy/common_utils/http_parsing_utils.py
+++ b/litellm/proxy/common_utils/http_parsing_utils.py
@ -1,5 +1,5 @@
 import json
-from typing import Any, Dict, List, Optional
+from typing import Dict, List, Optional

 import orjson
 from fastapi import Request, UploadFile, status
@ -147,11 +147,11 @@ def check_file_size_under_limit(

    if llm_router is not None and request_data["model"] in router_model_names:
        try:
-            deployment: Optional[Deployment] = (
-                llm_router.get_deployment_by_model_group_name(
+            deployment: Optional[
+                Deployment
+            ] = llm_router.get_deployment_by_model_group_name(
                model_group_name=request_data["model"]
            )
-            )
            if (
                deployment
                and deployment.litellm_params is not None
@ -185,23 +185,3 @@ def check_file_size_under_limit(
            )

    return True
-
-
-async def get_form_data(request: Request) -> Dict[str, Any]:
-    """
-    Read form data from request
-
-    Handles when OpenAI SDKs pass form keys as `timestamp_granularities[]="word"` instead of `timestamp_granularities=["word", "sentence"]`
-    """
-    form = await request.form()
-    form_data = dict(form)
-    parsed_form_data: dict[str, Any] = {}
-    for key, value in form_data.items():
-
-        # OpenAI SDKs pass form keys as `timestamp_granularities[]="word"` instead of `timestamp_granularities=["word", "sentence"]`
-        if key.endswith("[]"):
-            clean_key = key[:-2]
-            parsed_form_data.setdefault(clean_key, []).append(value)
-        else:
-            parsed_form_data[key] = value
-    return parsed_form_data
--- a/litellm/proxy/management_endpoints/internal_user_endpoints.py
+++ b/litellm/proxy/management_endpoints/internal_user_endpoints.py
@ -43,9 +43,6 @@ from litellm.types.proxy.management_endpoints.common_daily_activity import (
    SpendAnalyticsPaginatedResponse,
    SpendMetrics,
 )
-from litellm.types.proxy.management_endpoints.internal_user_endpoints import (
-    UserListResponse,
-)

 router = APIRouter()

@ -902,47 +899,15 @@ async def get_user_key_counts(
    return result


-def _validate_sort_params(
-    sort_by: Optional[str], sort_order: str
-) -> Optional[Dict[str, str]]:
-    order_by: Dict[str, str] = {}
-
-    if sort_by is None:
-        return None
-    # Validate sort_by is a valid column
-    valid_columns = [
-        "user_id",
-        "user_email",
-        "created_at",
-        "spend",
-        "user_alias",
-        "user_role",
-    ]
-    if sort_by not in valid_columns:
-        raise HTTPException(
-            status_code=400,
-            detail={
-                "error": f"Invalid sort column. Must be one of: {', '.join(valid_columns)}"
-            },
+@router.get(
+    "/user/get_users",
+    tags=["Internal User management"],
+    dependencies=[Depends(user_api_key_auth)],
 )
-
-    # Validate sort_order
-    if sort_order.lower() not in ["asc", "desc"]:
-        raise HTTPException(
-            status_code=400,
-            detail={"error": "Invalid sort order. Must be 'asc' or 'desc'"},
-        )
-
-    order_by[sort_by] = sort_order.lower()
-
-    return order_by
-
-
@router.get(
    "/user/list",
    tags=["Internal User management"],
    dependencies=[Depends(user_api_key_auth)],
-    response_model=UserListResponse,
 )
 async def get_users(
    role: Optional[str] = fastapi.Query(
@ -951,29 +916,15 @@ async def get_users(
    user_ids: Optional[str] = fastapi.Query(
        default=None, description="Get list of users by user_ids"
    ),
-    sso_user_ids: Optional[str] = fastapi.Query(
-        default=None, description="Get list of users by sso_user_id"
-    ),
-    user_email: Optional[str] = fastapi.Query(
-        default=None, description="Filter users by partial email match"
-    ),
-    team: Optional[str] = fastapi.Query(
-        default=None, description="Filter users by team id"
-    ),
    page: int = fastapi.Query(default=1, ge=1, description="Page number"),
    page_size: int = fastapi.Query(
        default=25, ge=1, le=100, description="Number of items per page"
    ),
-    sort_by: Optional[str] = fastapi.Query(
-        default=None,
-        description="Column to sort by (e.g. 'user_id', 'user_email', 'created_at', 'spend')",
-    ),
-    sort_order: str = fastapi.Query(
-        default="asc", description="Sort order ('asc' or 'desc')"
-    ),
 ):
    """
-    Get a paginated list of users with filtering and sorting options.
+    Get a paginated list of users, optionally filtered by role.
+
+    Used by the UI to populate the user lists.

    Parameters:
        role: Optional[str]
@ -984,20 +935,17 @@ async def get_users(
            - internal_user_viewer
        user_ids: Optional[str]
            Get list of users by user_ids. Comma separated list of user_ids.
-        sso_ids: Optional[str]
-            Get list of users by sso_ids. Comma separated list of sso_ids.
-        user_email: Optional[str]
-            Filter users by partial email match
-        team: Optional[str]
-            Filter users by team id. Will match if user has this team in their teams array.
        page: int
            The page number to return
        page_size: int
            The number of items per page
-        sort_by: Optional[str]
-            Column to sort by (e.g. 'user_id', 'user_email', 'created_at', 'spend')
-        sort_order: Optional[str]
-            Sort order ('asc' or 'desc')
+
+    Currently - admin-only endpoint.
+
+    Example curl:
+    ```
+    http://0.0.0.0:4000/user/list?user_ids=default_user_id,693c1a4a-1cc0-4c7c-afe8-b5d2c8d52e17
+    ```
    """
    from litellm.proxy.proxy_server import prisma_client

@ -1010,57 +958,35 @@ async def get_users(
    # Calculate skip and take for pagination
    skip = (page - 1) * page_size

+    # Prepare the query conditions
    # Build where conditions based on provided parameters
    where_conditions: Dict[str, Any] = {}

    if role:
-        where_conditions["user_role"] = role  # Exact match instead of contains
+        where_conditions["user_role"] = {
+            "contains": role,
+            "mode": "insensitive",  # Case-insensitive search
+        }

    if user_ids and isinstance(user_ids, str):
        user_id_list = [uid.strip() for uid in user_ids.split(",") if uid.strip()]
        where_conditions["user_id"] = {
-            "in": user_id_list,
+            "in": user_id_list,  # Now passing a list of strings as required by Prisma
        }

-    if user_email is not None and isinstance(user_email, str):
-        where_conditions["user_email"] = {
-            "contains": user_email,
-            "mode": "insensitive",  # Case-insensitive search
-        }
-
-    if team is not None and isinstance(team, str):
-        where_conditions["teams"] = {
-            "has": team  # Array contains for string arrays in Prisma
-        }
-
-    if sso_user_ids is not None and isinstance(sso_user_ids, str):
-        sso_id_list = [sid.strip() for sid in sso_user_ids.split(",") if sid.strip()]
-        where_conditions["sso_user_id"] = {
-            "in": sso_id_list,
-        }
-
-    ## Filter any none fastapi.Query params - e.g. where_conditions: {'user_email': {'contains': Query(None), 'mode': 'insensitive'}, 'teams': {'has': Query(None)}}
-    where_conditions = {k: v for k, v in where_conditions.items() if v is not None}
-
-    # Build order_by conditions
-
-    order_by: Optional[Dict[str, str]] = (
-        _validate_sort_params(sort_by, sort_order)
-        if sort_by is not None and isinstance(sort_by, str)
-        else None
-    )
-
-    users = await prisma_client.db.litellm_usertable.find_many(
+    users: Optional[
+        List[LiteLLM_UserTable]
+    ] = await prisma_client.db.litellm_usertable.find_many(
        where=where_conditions,
        skip=skip,
        take=page_size,
-        order=order_by
-        if order_by
-        else {"created_at": "desc"},  # Default to created_at desc if no sort specified
+        order={"created_at": "desc"},
    )

    # Get total count of user rows
-    total_count = await prisma_client.db.litellm_usertable.count(where=where_conditions)
+    total_count = await prisma_client.db.litellm_usertable.count(
+        where=where_conditions  # type: ignore
+    )

    # Get key count for each user
    if users is not None:
@ -1083,7 +1009,7 @@ async def get_users(
                LiteLLM_UserTableWithKeyCount(
                    **user.model_dump(), key_count=user_key_counts.get(user.user_id, 0)
                )
-            )
+            )  # Return full key object
    else:
        user_list = []

--- a/litellm/proxy/management_endpoints/key_management_endpoints.py
+++ b/litellm/proxy/management_endpoints/key_management_endpoints.py
@ -1347,13 +1347,10 @@ async def generate_key_helper_fn(  # noqa: PLR0915
            create_key_response = await prisma_client.insert_data(
                data=key_data, table_name="key"
            )
-
            key_data["token_id"] = getattr(create_key_response, "token", None)
            key_data["litellm_budget_table"] = getattr(
                create_key_response, "litellm_budget_table", None
            )
-            key_data["created_at"] = getattr(create_key_response, "created_at", None)
-            key_data["updated_at"] = getattr(create_key_response, "updated_at", None)
    except Exception as e:
        verbose_proxy_logger.error(
            "litellm.proxy.proxy_server.generate_key_helper_fn(): Exception occured - {}".format(
--- a/litellm/proxy/proxy_config.yaml
+++ b/litellm/proxy/proxy_config.yaml
@ -1,8 +1,16 @@
 model_list:
-  - model_name: openai/*
+  - model_name: azure-computer-use-preview
    litellm_params:
-      model: openai/*
-      api_key: os.environ/OPENAI_API_KEY
+      model: azure/computer-use-preview
+      api_key: mock-api-key
+      api_version: mock-api-version
+      api_base: https://mock-endpoint.openai.azure.com
+  - model_name: azure-computer-use-preview
+    litellm_params:
+      model: azure/computer-use-preview-2
+      api_key: mock-api-key-2
+      api_version: mock-api-version-2
+      api_base: https://mock-endpoint-2.openai.azure.com

 router_settings:
  optional_pre_call_checks: ["responses_api_deployment_check"]
--- a/litellm/proxy/proxy_server.py
+++ b/litellm/proxy/proxy_server.py
@ -179,7 +179,6 @@ from litellm.proxy.common_utils.html_forms.ui_login import html_form
 from litellm.proxy.common_utils.http_parsing_utils import (
    _read_request_body,
    check_file_size_under_limit,
-    get_form_data,
 )
 from litellm.proxy.common_utils.load_config_utils import (
    get_config_file_contents_from_gcs,
@ -805,9 +804,9 @@ model_max_budget_limiter = _PROXY_VirtualKeyModelMaxBudgetLimiter(
    dual_cache=user_api_key_cache
 )
 litellm.logging_callback_manager.add_litellm_callback(model_max_budget_limiter)
-redis_usage_cache: Optional[RedisCache] = (
-    None  # redis cache used for tracking spend, tpm/rpm limits
-)
+redis_usage_cache: Optional[
+    RedisCache
+] = None  # redis cache used for tracking spend, tpm/rpm limits
 user_custom_auth = None
 user_custom_key_generate = None
 user_custom_sso = None
@ -1133,9 +1132,9 @@ async def update_cache(  # noqa: PLR0915
        _id = "team_id:{}".format(team_id)
        try:
            # Fetch the existing cost for the given user
-            existing_spend_obj: Optional[LiteLLM_TeamTable] = (
-                await user_api_key_cache.async_get_cache(key=_id)
-            )
+            existing_spend_obj: Optional[
+                LiteLLM_TeamTable
+            ] = await user_api_key_cache.async_get_cache(key=_id)
            if existing_spend_obj is None:
                # do nothing if team not in api key cache
                return
@ -1297,7 +1296,7 @@ class ProxyConfig:
            config=config, base_dir=os.path.dirname(os.path.abspath(file_path or ""))
        )

-        # verbose_proxy_logger.debug(f"loaded config={json.dumps(config, indent=4)}")
+        verbose_proxy_logger.debug(f"loaded config={json.dumps(config, indent=4)}")
        return config

    def _process_includes(self, config: dict, base_dir: str) -> dict:
@ -2807,9 +2806,9 @@ async def initialize(  # noqa: PLR0915
        user_api_base = api_base
        dynamic_config[user_model]["api_base"] = api_base
    if api_version:
-        os.environ["AZURE_API_VERSION"] = (
-            api_version  # set this for azure - litellm can read this from the env
-        )
+        os.environ[
+            "AZURE_API_VERSION"
+        ] = api_version  # set this for azure - litellm can read this from the env
    if max_tokens:  # model-specific param
        dynamic_config[user_model]["max_tokens"] = max_tokens
    if temperature:  # model-specific param
@ -4121,7 +4120,7 @@ async def audio_transcriptions(
    data: Dict = {}
    try:
        # Use orjson to parse JSON data, orjson speeds up requests significantly
-        form_data = await get_form_data(request)
+        form_data = await request.form()
        data = {key: value for key, value in form_data.items() if key != "file"}

        # Include original request and headers in the data
@ -7759,9 +7758,9 @@ async def get_config_list(
                            hasattr(sub_field_info, "description")
                            and sub_field_info.description is not None
                        ):
-                            nested_fields[idx].field_description = (
-                                sub_field_info.description
-                            )
+                            nested_fields[
+                                idx
+                            ].field_description = sub_field_info.description
                        idx += 1

                    _stored_in_db = None
--- a/litellm/proxy/response_api_endpoints/endpoints.py
+++ b/litellm/proxy/response_api_endpoints/endpoints.py
@ -106,50 +106,8 @@ async def get_response(
    -H "Authorization: Bearer sk-1234"
    ```
    """
-    from litellm.proxy.proxy_server import (
-        _read_request_body,
-        general_settings,
-        llm_router,
-        proxy_config,
-        proxy_logging_obj,
-        select_data_generator,
-        user_api_base,
-        user_max_tokens,
-        user_model,
-        user_request_timeout,
-        user_temperature,
-        version,
-    )
-
-    data = await _read_request_body(request=request)
-    data["response_id"] = response_id
-    processor = ProxyBaseLLMRequestProcessing(data=data)
-    try:
-        return await processor.base_process_llm_request(
-            request=request,
-            fastapi_response=fastapi_response,
-            user_api_key_dict=user_api_key_dict,
-            route_type="aget_responses",
-            proxy_logging_obj=proxy_logging_obj,
-            llm_router=llm_router,
-            general_settings=general_settings,
-            proxy_config=proxy_config,
-            select_data_generator=select_data_generator,
-            model=None,
-            user_model=user_model,
-            user_temperature=user_temperature,
-            user_request_timeout=user_request_timeout,
-            user_max_tokens=user_max_tokens,
-            user_api_base=user_api_base,
-            version=version,
-        )
-    except Exception as e:
-        raise await processor._handle_llm_api_exception(
-            e=e,
-            user_api_key_dict=user_api_key_dict,
-            proxy_logging_obj=proxy_logging_obj,
-            version=version,
-        )
+    # TODO: Implement response retrieval logic
+    pass


@router.delete(
@ -178,50 +136,8 @@ async def delete_response(
    -H "Authorization: Bearer sk-1234"
    ```
    """
-    from litellm.proxy.proxy_server import (
-        _read_request_body,
-        general_settings,
-        llm_router,
-        proxy_config,
-        proxy_logging_obj,
-        select_data_generator,
-        user_api_base,
-        user_max_tokens,
-        user_model,
-        user_request_timeout,
-        user_temperature,
-        version,
-    )
-
-    data = await _read_request_body(request=request)
-    data["response_id"] = response_id
-    processor = ProxyBaseLLMRequestProcessing(data=data)
-    try:
-        return await processor.base_process_llm_request(
-            request=request,
-            fastapi_response=fastapi_response,
-            user_api_key_dict=user_api_key_dict,
-            route_type="adelete_responses",
-            proxy_logging_obj=proxy_logging_obj,
-            llm_router=llm_router,
-            general_settings=general_settings,
-            proxy_config=proxy_config,
-            select_data_generator=select_data_generator,
-            model=None,
-            user_model=user_model,
-            user_temperature=user_temperature,
-            user_request_timeout=user_request_timeout,
-            user_max_tokens=user_max_tokens,
-            user_api_base=user_api_base,
-            version=version,
-        )
-    except Exception as e:
-        raise await processor._handle_llm_api_exception(
-            e=e,
-            user_api_key_dict=user_api_key_dict,
-            proxy_logging_obj=proxy_logging_obj,
-            version=version,
-        )
+    # TODO: Implement response deletion logic
+    pass


@router.get(
--- a/litellm/proxy/route_llm_request.py
+++ b/litellm/proxy/route_llm_request.py
@ -47,8 +47,6 @@ async def route_request(
        "amoderation",
        "arerank",
        "aresponses",
-        "aget_responses",
-        "adelete_responses",
        "_arealtime",  # private function for realtime API
    ],
 ):
--- a/litellm/responses/main.py
+++ b/litellm/responses/main.py
@ -1,7 +1,7 @@
 import asyncio
 import contextvars
 from functools import partial
-from typing import Any, Coroutine, Dict, Iterable, List, Literal, Optional, Union
+from typing import Any, Dict, Iterable, List, Literal, Optional, Union

 import httpx

@ -24,7 +24,6 @@ from litellm.types.llms.openai import (
    ToolChoice,
    ToolParam,
 )
-from litellm.types.responses.main import *
 from litellm.types.router import GenericLiteLLMParams
 from litellm.utils import ProviderConfigManager, client

@ -122,8 +121,7 @@ async def aresponses(
        if isinstance(response, ResponsesAPIResponse):
            response = ResponsesAPIRequestUtils._update_responses_api_response_id_with_model_id(
                responses_api_response=response,
-                litellm_metadata=kwargs.get("litellm_metadata", {}),
-                custom_llm_provider=custom_llm_provider,
+                kwargs=kwargs,
            )
        return response
    except Exception as e:
@ -255,15 +253,13 @@ def responses(
            fake_stream=responses_api_provider_config.should_fake_stream(
                model=model, stream=stream, custom_llm_provider=custom_llm_provider
            ),
-            litellm_metadata=kwargs.get("litellm_metadata", {}),
        )

        # Update the responses_api_response_id with the model_id
        if isinstance(response, ResponsesAPIResponse):
            response = ResponsesAPIRequestUtils._update_responses_api_response_id_with_model_id(
                responses_api_response=response,
-                litellm_metadata=kwargs.get("litellm_metadata", {}),
-                custom_llm_provider=custom_llm_provider,
+                kwargs=kwargs,
            )

        return response
@ -275,347 +271,3 @@ def responses(
            completion_kwargs=local_vars,
            extra_kwargs=kwargs,
        )
-
-
-@client
-async def adelete_responses(
-    response_id: str,
-    # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-    # The extra values given here take precedence over values defined on the client or passed to this method.
-    extra_headers: Optional[Dict[str, Any]] = None,
-    extra_query: Optional[Dict[str, Any]] = None,
-    extra_body: Optional[Dict[str, Any]] = None,
-    timeout: Optional[Union[float, httpx.Timeout]] = None,
-    # LiteLLM specific params,
-    custom_llm_provider: Optional[str] = None,
-    **kwargs,
-) -> DeleteResponseResult:
-    """
-    Async version of the DELETE Responses API
-
-    DELETE /v1/responses/{response_id} endpoint in the responses API
-
-    """
-    local_vars = locals()
-    try:
-        loop = asyncio.get_event_loop()
-        kwargs["adelete_responses"] = True
-
-        # get custom llm provider from response_id
-        decoded_response_id: DecodedResponseId = (
-            ResponsesAPIRequestUtils._decode_responses_api_response_id(
-                response_id=response_id,
-            )
-        )
-        response_id = decoded_response_id.get("response_id") or response_id
-        custom_llm_provider = (
-            decoded_response_id.get("custom_llm_provider") or custom_llm_provider
-        )
-
-        func = partial(
-            delete_responses,
-            response_id=response_id,
-            custom_llm_provider=custom_llm_provider,
-            extra_headers=extra_headers,
-            extra_query=extra_query,
-            extra_body=extra_body,
-            timeout=timeout,
-            **kwargs,
-        )
-
-        ctx = contextvars.copy_context()
-        func_with_context = partial(ctx.run, func)
-        init_response = await loop.run_in_executor(None, func_with_context)
-
-        if asyncio.iscoroutine(init_response):
-            response = await init_response
-        else:
-            response = init_response
-        return response
-    except Exception as e:
-        raise litellm.exception_type(
-            model=None,
-            custom_llm_provider=custom_llm_provider,
-            original_exception=e,
-            completion_kwargs=local_vars,
-            extra_kwargs=kwargs,
-        )
-
-
-@client
-def delete_responses(
-    response_id: str,
-    # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-    # The extra values given here take precedence over values defined on the client or passed to this method.
-    extra_headers: Optional[Dict[str, Any]] = None,
-    extra_query: Optional[Dict[str, Any]] = None,
-    extra_body: Optional[Dict[str, Any]] = None,
-    timeout: Optional[Union[float, httpx.Timeout]] = None,
-    # LiteLLM specific params,
-    custom_llm_provider: Optional[str] = None,
-    **kwargs,
-) -> Union[DeleteResponseResult, Coroutine[Any, Any, DeleteResponseResult]]:
-    """
-    Synchronous version of the DELETE Responses API
-
-    DELETE /v1/responses/{response_id} endpoint in the responses API
-
-    """
-    local_vars = locals()
-    try:
-        litellm_logging_obj: LiteLLMLoggingObj = kwargs.get("litellm_logging_obj")  # type: ignore
-        litellm_call_id: Optional[str] = kwargs.get("litellm_call_id", None)
-        _is_async = kwargs.pop("adelete_responses", False) is True
-
-        # get llm provider logic
-        litellm_params = GenericLiteLLMParams(**kwargs)
-
-        # get custom llm provider from response_id
-        decoded_response_id: DecodedResponseId = (
-            ResponsesAPIRequestUtils._decode_responses_api_response_id(
-                response_id=response_id,
-            )
-        )
-        response_id = decoded_response_id.get("response_id") or response_id
-        custom_llm_provider = (
-            decoded_response_id.get("custom_llm_provider") or custom_llm_provider
-        )
-
-        if custom_llm_provider is None:
-            raise ValueError("custom_llm_provider is required but passed as None")
-
-        # get provider config
-        responses_api_provider_config: Optional[BaseResponsesAPIConfig] = (
-            ProviderConfigManager.get_provider_responses_api_config(
-                model=None,
-                provider=litellm.LlmProviders(custom_llm_provider),
-            )
-        )
-
-        if responses_api_provider_config is None:
-            raise ValueError(
-                f"DELETE responses is not supported for {custom_llm_provider}"
-            )
-
-        local_vars.update(kwargs)
-
-        # Pre Call logging
-        litellm_logging_obj.update_environment_variables(
-            model=None,
-            optional_params={
-                "response_id": response_id,
-            },
-            litellm_params={
-                "litellm_call_id": litellm_call_id,
-            },
-            custom_llm_provider=custom_llm_provider,
-        )
-
-        # Call the handler with _is_async flag instead of directly calling the async handler
-        response = base_llm_http_handler.delete_response_api_handler(
-            response_id=response_id,
-            custom_llm_provider=custom_llm_provider,
-            responses_api_provider_config=responses_api_provider_config,
-            litellm_params=litellm_params,
-            logging_obj=litellm_logging_obj,
-            extra_headers=extra_headers,
-            extra_body=extra_body,
-            timeout=timeout or request_timeout,
-            _is_async=_is_async,
-            client=kwargs.get("client"),
-        )
-
-        return response
-    except Exception as e:
-        raise litellm.exception_type(
-            model=None,
-            custom_llm_provider=custom_llm_provider,
-            original_exception=e,
-            completion_kwargs=local_vars,
-            extra_kwargs=kwargs,
-        )
-
-@client
-async def aget_responses(
-    response_id: str,
-    # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-    # The extra values given here take precedence over values defined on the client or passed to this method.
-    extra_headers: Optional[Dict[str, Any]] = None,
-    extra_query: Optional[Dict[str, Any]] = None,
-    extra_body: Optional[Dict[str, Any]] = None,
-    timeout: Optional[Union[float, httpx.Timeout]] = None,
-    # LiteLLM specific params,
-    custom_llm_provider: Optional[str] = None,
-    **kwargs,
-) -> ResponsesAPIResponse:
-    """
-    Async: Fetch a response by its ID.
-    
-    GET /v1/responses/{response_id} endpoint in the responses API
-    
-    Args:
-        response_id: The ID of the response to fetch.
-        custom_llm_provider: Optional provider name. If not specified, will be decoded from response_id.
-        
-    Returns:
-        The response object with complete information about the stored response.
-    """
-    local_vars = locals()
-    try:
-        loop = asyncio.get_event_loop()
-        kwargs["aget_responses"] = True
-
-        # get custom llm provider from response_id
-        decoded_response_id: DecodedResponseId = (
-            ResponsesAPIRequestUtils._decode_responses_api_response_id(
-                response_id=response_id,
-            )
-        )
-        response_id = decoded_response_id.get("response_id") or response_id
-        custom_llm_provider = (
-            decoded_response_id.get("custom_llm_provider") or custom_llm_provider
-        )
-
-        func = partial(
-            get_responses,
-            response_id=response_id,
-            custom_llm_provider=custom_llm_provider,
-            extra_headers=extra_headers,
-            extra_query=extra_query,
-            extra_body=extra_body,
-            timeout=timeout,
-            **kwargs,
-        )
-
-        ctx = contextvars.copy_context()
-        func_with_context = partial(ctx.run, func)
-        init_response = await loop.run_in_executor(None, func_with_context)
-
-        if asyncio.iscoroutine(init_response):
-            response = await init_response
-        else:
-            response = init_response
-
-         # Update the responses_api_response_id with the model_id
-        if isinstance(response, ResponsesAPIResponse):
-            response = ResponsesAPIRequestUtils._update_responses_api_response_id_with_model_id(
-                responses_api_response=response,
-                litellm_metadata=kwargs.get("litellm_metadata", {}),
-                custom_llm_provider=custom_llm_provider,
-            )
-        return response
-    except Exception as e:
-        raise litellm.exception_type(
-            model=None,
-            custom_llm_provider=custom_llm_provider,
-            original_exception=e,
-            completion_kwargs=local_vars,
-            extra_kwargs=kwargs,
-        )
-
-@client
-def get_responses(
-    response_id: str,
-    # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-    # The extra values given here take precedence over values defined on the client or passed to this method.
-    extra_headers: Optional[Dict[str, Any]] = None,
-    extra_query: Optional[Dict[str, Any]] = None,
-    extra_body: Optional[Dict[str, Any]] = None,
-    timeout: Optional[Union[float, httpx.Timeout]] = None,
-    # LiteLLM specific params,
-    custom_llm_provider: Optional[str] = None,
-    **kwargs,
-) -> Union[ResponsesAPIResponse, Coroutine[Any, Any, ResponsesAPIResponse]]:
-    """
-    Fetch a response by its ID.
-    
-    GET /v1/responses/{response_id} endpoint in the responses API
-    
-    Args:
-        response_id: The ID of the response to fetch.
-        custom_llm_provider: Optional provider name. If not specified, will be decoded from response_id.
-        
-    Returns:
-        The response object with complete information about the stored response.
-    """
-    local_vars = locals()
-    try:
-        litellm_logging_obj: LiteLLMLoggingObj = kwargs.get("litellm_logging_obj")  # type: ignore
-        litellm_call_id: Optional[str] = kwargs.get("litellm_call_id", None)
-        _is_async = kwargs.pop("aget_responses", False) is True
-
-        # get llm provider logic
-        litellm_params = GenericLiteLLMParams(**kwargs)
-
-        # get custom llm provider from response_id
-        decoded_response_id: DecodedResponseId = (
-            ResponsesAPIRequestUtils._decode_responses_api_response_id(
-                response_id=response_id,
-            )
-        )
-        response_id = decoded_response_id.get("response_id") or response_id
-        custom_llm_provider = (
-            decoded_response_id.get("custom_llm_provider") or custom_llm_provider
-        )
-
-        if custom_llm_provider is None:
-            raise ValueError("custom_llm_provider is required but passed as None")
-
-        # get provider config
-        responses_api_provider_config: Optional[BaseResponsesAPIConfig] = (
-            ProviderConfigManager.get_provider_responses_api_config(
-                model=None,
-                provider=litellm.LlmProviders(custom_llm_provider),
-            )
-        )
-
-        if responses_api_provider_config is None:
-            raise ValueError(
-                f"GET responses is not supported for {custom_llm_provider}"
-            )
-
-        local_vars.update(kwargs)
-
-        # Pre Call logging
-        litellm_logging_obj.update_environment_variables(
-            model=None,
-            optional_params={
-                "response_id": response_id,
-            },
-            litellm_params={
-                "litellm_call_id": litellm_call_id,
-            },
-            custom_llm_provider=custom_llm_provider,
-        )
-
-        # Call the handler with _is_async flag instead of directly calling the async handler
-        response = base_llm_http_handler.get_responses(
-            response_id=response_id,
-            custom_llm_provider=custom_llm_provider,
-            responses_api_provider_config=responses_api_provider_config,
-            litellm_params=litellm_params,
-            logging_obj=litellm_logging_obj,
-            extra_headers=extra_headers,
-            extra_body=extra_body,
-            timeout=timeout or request_timeout,
-            _is_async=_is_async,
-            client=kwargs.get("client"),
-        )
-
-        # Update the responses_api_response_id with the model_id
-        if isinstance(response, ResponsesAPIResponse):
-            response = ResponsesAPIRequestUtils._update_responses_api_response_id_with_model_id(
-                responses_api_response=response,
-                litellm_metadata=kwargs.get("litellm_metadata", {}),
-                custom_llm_provider=custom_llm_provider,
-            )
-
-        return response
-    except Exception as e:
-        raise litellm.exception_type(
-            model=None,
-            custom_llm_provider=custom_llm_provider,
-            original_exception=e,
-            completion_kwargs=local_vars,
-            extra_kwargs=kwargs,
-        )
--- a/litellm/responses/streaming_iterator.py
+++ b/litellm/responses/streaming_iterator.py
@ -1,7 +1,7 @@
 import asyncio
 import json
 from datetime import datetime
-from typing import Any, Dict, Optional
+from typing import Optional

 import httpx

@ -10,7 +10,6 @@ from litellm.litellm_core_utils.asyncify import run_async_function
 from litellm.litellm_core_utils.litellm_logging import Logging as LiteLLMLoggingObj
 from litellm.litellm_core_utils.thread_pool_executor import executor
 from litellm.llms.base_llm.responses.transformation import BaseResponsesAPIConfig
-from litellm.responses.utils import ResponsesAPIRequestUtils
 from litellm.types.llms.openai import (
    OutputTextDeltaEvent,
    ResponseCompletedEvent,
@ -34,8 +33,6 @@ class BaseResponsesAPIStreamingIterator:
        model: str,
        responses_api_provider_config: BaseResponsesAPIConfig,
        logging_obj: LiteLLMLoggingObj,
-        litellm_metadata: Optional[Dict[str, Any]] = None,
-        custom_llm_provider: Optional[str] = None,
    ):
        self.response = response
        self.model = model
@ -45,11 +42,7 @@ class BaseResponsesAPIStreamingIterator:
        self.completed_response: Optional[ResponsesAPIStreamingResponse] = None
        self.start_time = datetime.now()

-        # set request kwargs
-        self.litellm_metadata = litellm_metadata
-        self.custom_llm_provider = custom_llm_provider
-
-    def _process_chunk(self, chunk) -> Optional[ResponsesAPIStreamingResponse]:
+    def _process_chunk(self, chunk):
        """Process a single chunk of data from the stream"""
        if not chunk:
            return None
@ -77,17 +70,6 @@ class BaseResponsesAPIStreamingIterator:
                        logging_obj=self.logging_obj,
                    )
                )
-
-                # if "response" in parsed_chunk, then encode litellm specific information like custom_llm_provider
-                response_object = getattr(openai_responses_api_chunk, "response", None)
-                if response_object:
-                    response = ResponsesAPIRequestUtils._update_responses_api_response_id_with_model_id(
-                        responses_api_response=response_object,
-                        litellm_metadata=self.litellm_metadata,
-                        custom_llm_provider=self.custom_llm_provider,
-                    )
-                    setattr(openai_responses_api_chunk, "response", response)
-
                # Store the completed response
                if (
                    openai_responses_api_chunk
@ -120,17 +102,8 @@ class ResponsesAPIStreamingIterator(BaseResponsesAPIStreamingIterator):
        model: str,
        responses_api_provider_config: BaseResponsesAPIConfig,
        logging_obj: LiteLLMLoggingObj,
-        litellm_metadata: Optional[Dict[str, Any]] = None,
-        custom_llm_provider: Optional[str] = None,
    ):
-        super().__init__(
-            response,
-            model,
-            responses_api_provider_config,
-            logging_obj,
-            litellm_metadata,
-            custom_llm_provider,
-        )
+        super().__init__(response, model, responses_api_provider_config, logging_obj)
        self.stream_iterator = response.aiter_lines()

    def __aiter__(self):
@ -190,17 +163,8 @@ class SyncResponsesAPIStreamingIterator(BaseResponsesAPIStreamingIterator):
        model: str,
        responses_api_provider_config: BaseResponsesAPIConfig,
        logging_obj: LiteLLMLoggingObj,
-        litellm_metadata: Optional[Dict[str, Any]] = None,
-        custom_llm_provider: Optional[str] = None,
    ):
-        super().__init__(
-            response,
-            model,
-            responses_api_provider_config,
-            logging_obj,
-            litellm_metadata,
-            custom_llm_provider,
-        )
+        super().__init__(response, model, responses_api_provider_config, logging_obj)
        self.stream_iterator = response.iter_lines()

    def __iter__(self):
@ -264,16 +228,12 @@ class MockResponsesAPIStreamingIterator(BaseResponsesAPIStreamingIterator):
        model: str,
        responses_api_provider_config: BaseResponsesAPIConfig,
        logging_obj: LiteLLMLoggingObj,
-        litellm_metadata: Optional[Dict[str, Any]] = None,
-        custom_llm_provider: Optional[str] = None,
    ):
        super().__init__(
            response=response,
            model=model,
            responses_api_provider_config=responses_api_provider_config,
            logging_obj=logging_obj,
-            litellm_metadata=litellm_metadata,
-            custom_llm_provider=custom_llm_provider,
        )

        # one-time transform
--- a/litellm/responses/utils.py
+++ b/litellm/responses/utils.py
@ -1,5 +1,5 @@
 import base64
-from typing import Any, Dict, Optional, Union, cast, get_type_hints
+from typing import Any, Dict, Optional, Tuple, Union, cast, get_type_hints

 import litellm
 from litellm._logging import verbose_logger
@ -9,7 +9,6 @@ from litellm.types.llms.openai import (
    ResponsesAPIOptionalRequestParams,
    ResponsesAPIResponse,
 )
-from litellm.types.responses.main import DecodedResponseId
 from litellm.types.utils import SpecialEnums, Usage


@ -84,36 +83,30 @@ class ResponsesAPIRequestUtils:
    @staticmethod
    def _update_responses_api_response_id_with_model_id(
        responses_api_response: ResponsesAPIResponse,
-        custom_llm_provider: Optional[str],
-        litellm_metadata: Optional[Dict[str, Any]] = None,
+        kwargs: Dict[str, Any],
    ) -> ResponsesAPIResponse:
-        """
-        Update the responses_api_response_id with model_id and custom_llm_provider
-
-        This builds a composite ID containing the custom LLM provider, model ID, and original response ID
-        """
-        litellm_metadata = litellm_metadata or {}
+        """Update the responses_api_response_id with the model_id"""
+        litellm_metadata: Dict[str, Any] = kwargs.get("litellm_metadata", {}) or {}
        model_info: Dict[str, Any] = litellm_metadata.get("model_info", {}) or {}
        model_id = model_info.get("id")
        updated_id = ResponsesAPIRequestUtils._build_responses_api_response_id(
            model_id=model_id,
-            custom_llm_provider=custom_llm_provider,
            response_id=responses_api_response.id,
        )
-
        responses_api_response.id = updated_id
        return responses_api_response

    @staticmethod
    def _build_responses_api_response_id(
-        custom_llm_provider: Optional[str],
        model_id: Optional[str],
        response_id: str,
    ) -> str:
        """Build the responses_api_response_id"""
+        if model_id is None:
+            return response_id
        assembled_id: str = str(
            SpecialEnums.LITELLM_MANAGED_RESPONSE_COMPLETE_STR.value
-        ).format(custom_llm_provider, model_id, response_id)
+        ).format(model_id, response_id)
        base64_encoded_id: str = base64.b64encode(assembled_id.encode("utf-8")).decode(
            "utf-8"
        )
@ -122,12 +115,12 @@ class ResponsesAPIRequestUtils:
    @staticmethod
    def _decode_responses_api_response_id(
        response_id: str,
-    ) -> DecodedResponseId:
+    ) -> Tuple[Optional[str], str]:
        """
        Decode the responses_api_response_id

        Returns:
-            DecodedResponseId: Structured tuple with custom_llm_provider, model_id, and response_id
+            Tuple of model_id, response_id (from upstream provider)
        """
        try:
            # Remove prefix and decode
@ -136,55 +129,16 @@ class ResponsesAPIRequestUtils:

            # Parse components using known prefixes
            if ";" not in decoded_id:
-                return DecodedResponseId(
-                    custom_llm_provider=None,
-                    model_id=None,
-                    response_id=response_id,
-                )
+                return None, response_id

-            parts = decoded_id.split(";")
-
-            # Format: litellm:custom_llm_provider:{};model_id:{};response_id:{}
-            custom_llm_provider = None
-            model_id = None
-
-            if (
-                len(parts) >= 3
-            ):  # Full format with custom_llm_provider, model_id, and response_id
-                custom_llm_provider_part = parts[0]
-                model_id_part = parts[1]
-                response_part = parts[2]
-
-                custom_llm_provider = custom_llm_provider_part.replace(
-                    "litellm:custom_llm_provider:", ""
-                )
-                model_id = model_id_part.replace("model_id:", "")
+            model_part, response_part = decoded_id.split(";", 1)
+            model_id = model_part.replace("litellm:model_id:", "")
            decoded_response_id = response_part.replace("response_id:", "")
-            else:
-                decoded_response_id = response_id

-            return DecodedResponseId(
-                custom_llm_provider=custom_llm_provider,
-                model_id=model_id,
-                response_id=decoded_response_id,
-            )
+            return model_id, decoded_response_id
        except Exception as e:
            verbose_logger.debug(f"Error decoding response_id '{response_id}': {e}")
-            return DecodedResponseId(
-                custom_llm_provider=None,
-                model_id=None,
-                response_id=response_id,
-            )
-
-    @staticmethod
-    def get_model_id_from_response_id(response_id: Optional[str]) -> Optional[str]:
-        """Get the model_id from the response_id"""
-        if response_id is None:
-            return None
-        decoded_response_id = (
-            ResponsesAPIRequestUtils._decode_responses_api_response_id(response_id)
-        )
-        return decoded_response_id.get("model_id") or None
+            return None, response_id


 class ResponseAPILoggingUtils:
--- a/litellm/router.py
+++ b/litellm/router.py
@ -739,12 +739,6 @@ class Router:
            litellm.afile_content, call_type="afile_content"
        )
        self.responses = self.factory_function(litellm.responses, call_type="responses")
-        self.aget_responses = self.factory_function(
-            litellm.aget_responses, call_type="aget_responses"
-        )
-        self.adelete_responses = self.factory_function(
-            litellm.adelete_responses, call_type="adelete_responses"
-        )

    def validate_fallbacks(self, fallback_param: Optional[List]):
        """
@ -3087,8 +3081,6 @@ class Router:
            "anthropic_messages",
            "aresponses",
            "responses",
-            "aget_responses",
-            "adelete_responses",
            "afile_delete",
            "afile_content",
        ] = "assistants",
@ -3143,11 +3135,6 @@ class Router:
                    original_function=original_function,
                    **kwargs,
                )
-            elif call_type in ("aget_responses", "adelete_responses"):
-                return await self._init_responses_api_endpoints(
-                    original_function=original_function,
-                    **kwargs,
-                )
            elif call_type in ("afile_delete", "afile_content"):
                return await self._ageneric_api_call_with_fallbacks(
                    original_function=original_function,
@ -3158,28 +3145,6 @@ class Router:

        return async_wrapper

-    async def _init_responses_api_endpoints(
-        self,
-        original_function: Callable,
-        **kwargs,
-    ):
-        """
-        Initialize the Responses API endpoints on the router.
-
-        GET, DELETE Responses API Requests encode the model_id in the response_id, this function decodes the response_id and sets the model to the model_id.
-        """
-        from litellm.responses.utils import ResponsesAPIRequestUtils
-
-        model_id = ResponsesAPIRequestUtils.get_model_id_from_response_id(
-            kwargs.get("response_id")
-        )
-        if model_id is not None:
-            kwargs["model"] = model_id
-        return await self._ageneric_api_call_with_fallbacks(
-            original_function=original_function,
-            **kwargs,
-        )
-
    async def _pass_through_assistants_endpoint_factory(
        self,
        original_function: Callable,
--- a/litellm/router_utils/pre_call_checks/responses_api_deployment_check.py
+++ b/litellm/router_utils/pre_call_checks/responses_api_deployment_check.py
@ -31,10 +31,11 @@ class ResponsesApiDeploymentCheck(CustomLogger):
        if previous_response_id is None:
            return healthy_deployments

-        decoded_response = ResponsesAPIRequestUtils._decode_responses_api_response_id(
+        model_id, response_id = (
+            ResponsesAPIRequestUtils._decode_responses_api_response_id(
                response_id=previous_response_id,
            )
-        model_id = decoded_response.get("model_id")
+        )
        if model_id is None:
            return healthy_deployments

--- a/litellm/types/integrations/datadog_llm_obs.py
+++ b/litellm/types/integrations/datadog_llm_obs.py
@ -8,9 +8,7 @@ from typing import Any, Dict, List, Literal, Optional, TypedDict


 class InputMeta(TypedDict):
-    messages: List[
-        Dict[str, str]
-    ]  # Relevant Issue: https://github.com/BerriAI/litellm/issues/9494
+    messages: List[Any]


 class OutputMeta(TypedDict):
--- a/litellm/types/llms/openai.py
+++ b/litellm/types/llms/openai.py
@ -50,7 +50,7 @@ from openai.types.responses.response_create_params import (
    ToolParam,
 )
 from openai.types.responses.response_function_tool_call import ResponseFunctionToolCall
-from pydantic import BaseModel, ConfigDict, Discriminator, Field, PrivateAttr
+from pydantic import BaseModel, Discriminator, Field, PrivateAttr
 from typing_extensions import Annotated, Dict, Required, TypedDict, override

 from litellm.types.llms.base import BaseLiteLLMOpenAIResponseObject
@ -824,12 +824,12 @@ class OpenAIChatCompletionChunk(ChatCompletionChunk):

 class Hyperparameters(BaseModel):
    batch_size: Optional[Union[str, int]] = None  # "Number of examples in each batch."
-    learning_rate_multiplier: Optional[Union[str, float]] = (
-        None  # Scaling factor for the learning rate
-    )
-    n_epochs: Optional[Union[str, int]] = (
-        None  # "The number of epochs to train the model for"
-    )
+    learning_rate_multiplier: Optional[
+        Union[str, float]
+    ] = None  # Scaling factor for the learning rate
+    n_epochs: Optional[
+        Union[str, int]
+    ] = None  # "The number of epochs to train the model for"


 class FineTuningJobCreate(BaseModel):
@ -856,18 +856,18 @@ class FineTuningJobCreate(BaseModel):

    model: str  # "The name of the model to fine-tune."
    training_file: str  # "The ID of an uploaded file that contains training data."
-    hyperparameters: Optional[Hyperparameters] = (
-        None  # "The hyperparameters used for the fine-tuning job."
-    )
-    suffix: Optional[str] = (
-        None  # "A string of up to 18 characters that will be added to your fine-tuned model name."
-    )
-    validation_file: Optional[str] = (
-        None  # "The ID of an uploaded file that contains validation data."
-    )
-    integrations: Optional[List[str]] = (
-        None  # "A list of integrations to enable for your fine-tuning job."
-    )
+    hyperparameters: Optional[
+        Hyperparameters
+    ] = None  # "The hyperparameters used for the fine-tuning job."
+    suffix: Optional[
+        str
+    ] = None  # "A string of up to 18 characters that will be added to your fine-tuned model name."
+    validation_file: Optional[
+        str
+    ] = None  # "The ID of an uploaded file that contains validation data."
+    integrations: Optional[
+        List[str]
+    ] = None  # "A list of integrations to enable for your fine-tuning job."
    seed: Optional[int] = None  # "The seed controls the reproducibility of the job."


@ -1013,9 +1013,6 @@ class ResponsesAPIStreamEvents(str, Enum):
    RESPONSE_FAILED = "response.failed"
    RESPONSE_INCOMPLETE = "response.incomplete"

-    # Part added
-    RESPONSE_PART_ADDED = "response.reasoning_summary_part.added"
-
    # Output item events
    OUTPUT_ITEM_ADDED = "response.output_item.added"
    OUTPUT_ITEM_DONE = "response.output_item.done"
@ -1203,12 +1200,6 @@ class ErrorEvent(BaseLiteLLMOpenAIResponseObject):
    param: Optional[str]


-class GenericEvent(BaseLiteLLMOpenAIResponseObject):
-    type: str
-
-    model_config = ConfigDict(extra="allow", protected_namespaces=())
-
-
 # Union type for all possible streaming responses
 ResponsesAPIStreamingResponse = Annotated[
    Union[
@ -1235,7 +1226,6 @@ ResponsesAPIStreamingResponse = Annotated[
        WebSearchCallSearchingEvent,
        WebSearchCallCompletedEvent,
        ErrorEvent,
-        GenericEvent,
    ],
    Discriminator("type"),
 ]
@ -1259,12 +1249,3 @@ class OpenAIRealtimeStreamResponseBaseObject(TypedDict):
 OpenAIRealtimeStreamList = List[
    Union[OpenAIRealtimeStreamResponseBaseObject, OpenAIRealtimeStreamSessionEvents]
 ]
-
-
-class ImageGenerationRequestQuality(str, Enum):
-    LOW = "low"
-    MEDIUM = "medium"
-    HIGH = "high"
-    AUTO = "auto"
-    STANDARD = "standard"
-    HD = "hd"
--- a/litellm/types/proxy/management_endpoints/internal_user_endpoints.py
+++ b/litellm/types/proxy/management_endpoints/internal_user_endpoints.py
@ -1,18 +0,0 @@
-from typing import Any, Dict, List, Literal, Optional, Union
-
-from fastapi import HTTPException
-from pydantic import BaseModel, EmailStr
-
-from litellm.proxy._types import LiteLLM_UserTableWithKeyCount
-
-
-class UserListResponse(BaseModel):
-    """
-    Response model for the user list endpoint
-    """
-
-    users: List[LiteLLM_UserTableWithKeyCount]
-    total: int
-    page: int
-    page_size: int
-    total_pages: int
--- a/litellm/types/responses/main.py
+++ b/litellm/types/responses/main.py
@ -1,6 +1,5 @@
 from typing import Literal

-from pydantic import PrivateAttr
 from typing_extensions import Any, List, Optional, TypedDict

 from litellm.types.llms.base import BaseLiteLLMOpenAIResponseObject
@ -47,30 +46,3 @@ class GenericResponseOutputItem(BaseLiteLLMOpenAIResponseObject):
    status: str  # "completed", "in_progress", etc.
    role: str  # "assistant", "user", etc.
    content: List[OutputText]
-
-
-class DeleteResponseResult(BaseLiteLLMOpenAIResponseObject):
-    """
-    Result of a delete response request
-
-    {
-        "id": "resp_6786a1bec27481909a17d673315b29f6",
-        "object": "response",
-        "deleted": true
-    }
-    """
-
-    id: Optional[str]
-    object: Optional[str]
-    deleted: Optional[bool]
-
-    # Define private attributes using PrivateAttr
-    _hidden_params: dict = PrivateAttr(default_factory=dict)
-
-
-class DecodedResponseId(TypedDict, total=False):
-    """Structure representing a decoded response ID"""
-
-    custom_llm_provider: Optional[str]
-    model_id: Optional[str]
-    response_id: str
--- a/litellm/types/utils.py
+++ b/litellm/types/utils.py
@ -2254,9 +2254,7 @@ class SpecialEnums(Enum):
    LITELM_MANAGED_FILE_ID_PREFIX = "litellm_proxy"
    LITELLM_MANAGED_FILE_COMPLETE_STR = "litellm_proxy:{};unified_id,{}"

-    LITELLM_MANAGED_RESPONSE_COMPLETE_STR = (
-        "litellm:custom_llm_provider:{};model_id:{};response_id:{}"
-    )
+    LITELLM_MANAGED_RESPONSE_COMPLETE_STR = "litellm:model_id:{};response_id:{}"


 LLMResponseTypes = Union[
--- a/litellm/utils.py
+++ b/litellm/utils.py
@ -180,18 +180,10 @@ from litellm.types.utils import (
    all_litellm_params,
 )

-try:
-    # Python 3.9+
-    with resources.files("litellm.litellm_core_utils.tokenizers").joinpath(
-        "anthropic_tokenizer.json"
-    ).open("r") as f:
-        json_data = json.load(f)
-except (ImportError, AttributeError, TypeError):
 with resources.open_text(
    "litellm.litellm_core_utils.tokenizers", "anthropic_tokenizer.json"
 ) as f:
    json_data = json.load(f)
-
 # Convert to str (if necessary)
 claude_json_str = json.dumps(json_data)
 import importlib.metadata
@ -524,9 +516,9 @@ def function_setup(  # noqa: PLR0915
        function_id: Optional[str] = kwargs["id"] if "id" in kwargs else None

        ## DYNAMIC CALLBACKS ##
-        dynamic_callbacks: Optional[List[Union[str, Callable, CustomLogger]]] = (
-            kwargs.pop("callbacks", None)
-        )
+        dynamic_callbacks: Optional[
+            List[Union[str, Callable, CustomLogger]]
+        ] = kwargs.pop("callbacks", None)
        all_callbacks = get_dynamic_callbacks(dynamic_callbacks=dynamic_callbacks)

        if len(all_callbacks) > 0:
@ -1210,9 +1202,9 @@ def client(original_function):  # noqa: PLR0915
                        exception=e,
                        retry_policy=kwargs.get("retry_policy"),
                    )
-                    kwargs["retry_policy"] = (
-                        reset_retry_policy()
-                    )  # prevent infinite loops
+                    kwargs[
+                        "retry_policy"
+                    ] = reset_retry_policy()  # prevent infinite loops
                litellm.num_retries = (
                    None  # set retries to None to prevent infinite loops
                )
@ -3036,16 +3028,16 @@ def get_optional_params(  # noqa: PLR0915
                    True  # so that main.py adds the function call to the prompt
                )
                if "tools" in non_default_params:
-                    optional_params["functions_unsupported_model"] = (
-                        non_default_params.pop("tools")
-                    )
+                    optional_params[
+                        "functions_unsupported_model"
+                    ] = non_default_params.pop("tools")
                    non_default_params.pop(
                        "tool_choice", None
                    )  # causes ollama requests to hang
                elif "functions" in non_default_params:
-                    optional_params["functions_unsupported_model"] = (
-                        non_default_params.pop("functions")
-                    )
+                    optional_params[
+                        "functions_unsupported_model"
+                    ] = non_default_params.pop("functions")
            elif (
                litellm.add_function_to_prompt
            ):  # if user opts to add it to prompt instead
@ -3068,11 +3060,11 @@ def get_optional_params(  # noqa: PLR0915

    if "response_format" in non_default_params:
        if provider_config is not None:
-            non_default_params["response_format"] = (
-                provider_config.get_json_schema_from_pydantic_object(
+            non_default_params[
+                "response_format"
+            ] = provider_config.get_json_schema_from_pydantic_object(
                response_format=non_default_params["response_format"]
            )
-            )
        else:
            non_default_params["response_format"] = type_to_response_format_param(
                response_format=non_default_params["response_format"]
@ -4087,9 +4079,9 @@ def _count_characters(text: str) -> int:


 def get_response_string(response_obj: Union[ModelResponse, ModelResponseStream]) -> str:
-    _choices: Union[List[Union[Choices, StreamingChoices]], List[StreamingChoices]] = (
-        response_obj.choices
-    )
+    _choices: Union[
+        List[Union[Choices, StreamingChoices]], List[StreamingChoices]
+    ] = response_obj.choices

    response_str = ""
    for choice in _choices:
@ -6633,8 +6625,8 @@ class ProviderConfigManager:

    @staticmethod
    def get_provider_responses_api_config(
+        model: str,
        provider: LlmProviders,
-        model: Optional[str] = None,
    ) -> Optional[BaseResponsesAPIConfig]:
        if litellm.LlmProviders.OPENAI == provider:
            return litellm.OpenAIResponsesAPIConfig()
--- a/model_prices_and_context_window.json
+++ b/model_prices_and_context_window.json
@ -1437,76 +1437,6 @@
        "output_cost_per_pixel": 0.0,
        "litellm_provider": "openai"
    },
-    "gpt-image-1": {
-        "mode": "image_generation",
-        "input_cost_per_pixel": 4.0054321e-8,
-        "output_cost_per_pixel": 0.0,
-        "litellm_provider": "openai",
-        "supported_endpoints": ["/v1/images/generations"]
-    },
-    "low/1024-x-1024/gpt-image-1": {
-        "mode": "image_generation",
-        "input_cost_per_pixel": 1.0490417e-8,
-        "output_cost_per_pixel": 0.0,
-        "litellm_provider": "openai",
-        "supported_endpoints": ["/v1/images/generations"]
-    },
-    "medium/1024-x-1024/gpt-image-1": {
-        "mode": "image_generation",
-        "input_cost_per_pixel": 4.0054321e-8,
-        "output_cost_per_pixel": 0.0,
-        "litellm_provider": "openai",
-        "supported_endpoints": ["/v1/images/generations"]
-    },
-    "high/1024-x-1024/gpt-image-1": {
-        "mode": "image_generation",
-        "input_cost_per_pixel": 1.59263611e-7,
-        "output_cost_per_pixel": 0.0,
-        "litellm_provider": "openai",
-        "supported_endpoints": ["/v1/images/generations"]
-    },
-    "low/1024-x-1536/gpt-image-1": {
-        "mode": "image_generation",
-        "input_cost_per_pixel": 1.0172526e-8,
-        "output_cost_per_pixel": 0.0,
-        "litellm_provider": "openai",
-        "supported_endpoints": ["/v1/images/generations"]
-    },
-    "medium/1024-x-1536/gpt-image-1": {
-        "mode": "image_generation",
-        "input_cost_per_pixel": 4.0054321e-8,
-        "output_cost_per_pixel": 0.0,
-        "litellm_provider": "openai",
-        "supported_endpoints": ["/v1/images/generations"]
-    },
-    "high/1024-x-1536/gpt-image-1": {
-        "mode": "image_generation",
-        "input_cost_per_pixel": 1.58945719e-7,
-        "output_cost_per_pixel": 0.0,
-        "litellm_provider": "openai",
-        "supported_endpoints": ["/v1/images/generations"]
-    },
-    "low/1536-x-1024/gpt-image-1": {
-        "mode": "image_generation",
-        "input_cost_per_pixel": 1.0172526e-8,
-        "output_cost_per_pixel": 0.0,
-        "litellm_provider": "openai",
-        "supported_endpoints": ["/v1/images/generations"]
-    },
-    "medium/1536-x-1024/gpt-image-1": {
-        "mode": "image_generation",
-        "input_cost_per_pixel": 4.0054321e-8,
-        "output_cost_per_pixel": 0.0,
-        "litellm_provider": "openai",
-        "supported_endpoints": ["/v1/images/generations"]
-    },
-    "high/1536-x-1024/gpt-image-1": {
-        "mode": "image_generation",
-        "input_cost_per_pixel": 1.58945719e-7,
-        "output_cost_per_pixel": 0.0,
-        "litellm_provider": "openai",
-        "supported_endpoints": ["/v1/images/generations"]
-    },
    "gpt-4o-transcribe": {
        "mode": "audio_transcription",
        "input_cost_per_token": 0.0000025,
@ -1560,6 +1490,7 @@
        "supports_prompt_caching": false,
        "supports_system_messages": true,
        "supports_tool_choice": true,
+        "supports_native_streaming": false,
        "supports_reasoning": true
    },
    "azure/gpt-4o-audio-preview-2024-12-17": {
@ -7058,17 +6989,6 @@
        "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models",
        "supports_tool_choice": true
    },
-    "command-a-03-2025": {
-        "max_tokens": 8000,
-        "max_input_tokens": 256000,
-        "max_output_tokens": 8000,
-        "input_cost_per_token": 0.0000025,
-        "output_cost_per_token": 0.00001,
-        "litellm_provider": "cohere_chat",
-        "mode": "chat",
-        "supports_function_calling": true,
-        "supports_tool_choice": true
-    },
    "command-r": {
        "max_tokens": 4096,
        "max_input_tokens": 128000,
--- a/poetry.lock
+++ b/poetry.lock
@ -548,7 +548,7 @@ version = "3.4.1"
 description = "The Real First Universal Charset Detector. Open, modern and actively maintained alternative to Chardet."
 optional = false
 python-versions = ">=3.7"
-groups = ["main", "dev", "proxy-dev"]
+groups = ["main"]
 files = [
    {file = "charset_normalizer-3.4.1-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:91b36a978b5ae0ee86c394f5a54d6ef44db1de0815eb43de826d41d21e4af3de"},
    {file = "charset_normalizer-3.4.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7461baadb4dc00fd9e0acbe254e3d7d2112e7f92ced2adc96e54ef6501c5f176"},
@ -742,24 +742,6 @@ ssh = ["bcrypt (>=3.1.5)"]
 test = ["certifi", "cryptography-vectors (==43.0.3)", "pretend", "pytest (>=6.2.0)", "pytest-benchmark", "pytest-cov", "pytest-xdist"]
 test-randomorder = ["pytest-randomly"]

-[[package]]
-name = "deprecated"
-version = "1.2.18"
-description = "Python @deprecated decorator to deprecate old python classes, functions or methods."
-optional = false
-python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,>=2.7"
-groups = ["dev", "proxy-dev"]
-files = [
-    {file = "Deprecated-1.2.18-py2.py3-none-any.whl", hash = "sha256:bd5011788200372a32418f888e326a09ff80d0214bd961147cfed01b5c018eec"},
-    {file = "deprecated-1.2.18.tar.gz", hash = "sha256:422b6f6d859da6f2ef57857761bfb392480502a64c3028ca9bbe86085d72115d"},
-]
-
-[package.dependencies]
-wrapt = ">=1.10,<2"
-
-[package.extras]
-dev = ["PyTest", "PyTest-Cov", "bump2version (<1)", "setuptools ; python_version >= \"3.12\"", "tox"]
-
 [[package]]
 name = "distro"
 version = "1.9.0"
@ -1134,14 +1116,14 @@ protobuf = ">=3.20.2,<4.21.0 || >4.21.0,<4.21.1 || >4.21.1,<4.21.2 || >4.21.2,<4
 name = "googleapis-common-protos"
 version = "1.70.0"
 description = "Common protobufs used in Google APIs"
-optional = false
+optional = true
 python-versions = ">=3.7"
-groups = ["main", "dev", "proxy-dev"]
+groups = ["main"]
+markers = "extra == \"extra-proxy\""
 files = [
    {file = "googleapis_common_protos-1.70.0-py3-none-any.whl", hash = "sha256:b8bfcca8c25a2bb253e0e0b0adaf8c00773e5e6af6fd92397576680b807e0fd8"},
    {file = "googleapis_common_protos-1.70.0.tar.gz", hash = "sha256:0e1b44e0ea153e6594f9f394fef15193a68aaaea2d843f83e2742717ca753257"},
 ]
-markers = {main = "extra == \"extra-proxy\""}

 [package.dependencies]
 grpcio = {version = ">=1.44.0,<2.0.0", optional = true, markers = "extra == \"grpc\""}
@ -1172,9 +1154,10 @@ protobuf = ">=3.20.2,<4.21.1 || >4.21.1,<4.21.2 || >4.21.2,<4.21.3 || >4.21.3,<4
 name = "grpcio"
 version = "1.70.0"
 description = "HTTP/2-based RPC framework"
-optional = false
+optional = true
 python-versions = ">=3.8"
-groups = ["main", "dev", "proxy-dev"]
+groups = ["main"]
+markers = "python_version < \"3.10\" and extra == \"extra-proxy\""
 files = [
    {file = "grpcio-1.70.0-cp310-cp310-linux_armv7l.whl", hash = "sha256:95469d1977429f45fe7df441f586521361e235982a0b39e33841549143ae2851"},
    {file = "grpcio-1.70.0-cp310-cp310-macosx_12_0_universal2.whl", hash = "sha256:ed9718f17fbdb472e33b869c77a16d0b55e166b100ec57b016dc7de9c8d236bf"},
@ -1232,7 +1215,6 @@ files = [
    {file = "grpcio-1.70.0-cp39-cp39-win_amd64.whl", hash = "sha256:a31d7e3b529c94e930a117b2175b2efd179d96eb3c7a21ccb0289a8ab05b645c"},
    {file = "grpcio-1.70.0.tar.gz", hash = "sha256:8d1584a68d5922330025881e63a6c1b54cc8117291d382e4fa69339b6d914c56"},
 ]
-markers = {main = "python_version < \"3.10\" and extra == \"extra-proxy\"", dev = "python_version < \"3.10\"", proxy-dev = "python_version < \"3.10\""}

 [package.extras]
 protobuf = ["grpcio-tools (>=1.70.0)"]
@ -1241,9 +1223,10 @@ protobuf = ["grpcio-tools (>=1.70.0)"]
 name = "grpcio"
 version = "1.71.0"
 description = "HTTP/2-based RPC framework"
-optional = false
+optional = true
 python-versions = ">=3.9"
-groups = ["main", "dev", "proxy-dev"]
+groups = ["main"]
+markers = "python_version >= \"3.10\" and extra == \"extra-proxy\""
 files = [
    {file = "grpcio-1.71.0-cp310-cp310-linux_armv7l.whl", hash = "sha256:c200cb6f2393468142eb50ab19613229dcc7829b5ccee8b658a36005f6669fdd"},
    {file = "grpcio-1.71.0-cp310-cp310-macosx_12_0_universal2.whl", hash = "sha256:b2266862c5ad664a380fbbcdbdb8289d71464c42a8c29053820ee78ba0119e5d"},
@ -1297,28 +1280,45 @@ files = [
    {file = "grpcio-1.71.0-cp39-cp39-win_amd64.whl", hash = "sha256:63e41b91032f298b3e973b3fa4093cbbc620c875e2da7b93e249d4728b54559a"},
    {file = "grpcio-1.71.0.tar.gz", hash = "sha256:2b85f7820475ad3edec209d3d89a7909ada16caab05d3f2e08a7e8ae3200a55c"},
 ]
-markers = {main = "python_version >= \"3.10\" and extra == \"extra-proxy\"", dev = "python_version >= \"3.10\"", proxy-dev = "python_version >= \"3.10\""}

 [package.extras]
 protobuf = ["grpcio-tools (>=1.71.0)"]

 [[package]]
 name = "grpcio-status"
-version = "1.62.3"
+version = "1.70.0"
 description = "Status proto mapping for gRPC"
 optional = true
-python-versions = ">=3.6"
+python-versions = ">=3.8"
 groups = ["main"]
-markers = "extra == \"extra-proxy\""
+markers = "python_version < \"3.10\" and extra == \"extra-proxy\""
 files = [
-    {file = "grpcio-status-1.62.3.tar.gz", hash = "sha256:289bdd7b2459794a12cf95dc0cb727bd4a1742c37bd823f760236c937e53a485"},
-    {file = "grpcio_status-1.62.3-py3-none-any.whl", hash = "sha256:f9049b762ba8de6b1086789d8315846e094edac2c50beaf462338b301a8fd4b8"},
+    {file = "grpcio_status-1.70.0-py3-none-any.whl", hash = "sha256:fc5a2ae2b9b1c1969cc49f3262676e6854aa2398ec69cb5bd6c47cd501904a85"},
+    {file = "grpcio_status-1.70.0.tar.gz", hash = "sha256:0e7b42816512433b18b9d764285ff029bde059e9d41f8fe10a60631bd8348101"},
 ]

 [package.dependencies]
 googleapis-common-protos = ">=1.5.5"
-grpcio = ">=1.62.3"
-protobuf = ">=4.21.6"
+grpcio = ">=1.70.0"
+protobuf = ">=5.26.1,<6.0dev"
+
+[[package]]
+name = "grpcio-status"
+version = "1.71.0"
+description = "Status proto mapping for gRPC"
+optional = true
+python-versions = ">=3.9"
+groups = ["main"]
+markers = "extra == \"extra-proxy\" and python_version >= \"3.10\""
+files = [
+    {file = "grpcio_status-1.71.0-py3-none-any.whl", hash = "sha256:843934ef8c09e3e858952887467f8256aac3910c55f077a359a65b2b3cde3e68"},
+    {file = "grpcio_status-1.71.0.tar.gz", hash = "sha256:11405fed67b68f406b3f3c7c5ae5104a79d2d309666d10d61b152e91d28fb968"},
+]
+
+[package.dependencies]
+googleapis-common-protos = ">=1.5.5"
+grpcio = ">=1.71.0"
+protobuf = ">=5.26.1,<6.0dev"

 [[package]]
 name = "gunicorn"
@ -1550,23 +1550,27 @@ all = ["flake8 (>=7.1.1)", "mypy (>=1.11.2)", "pytest (>=8.3.2)", "ruff (>=0.6.2

 [[package]]
 name = "importlib-metadata"
-version = "7.1.0"
+version = "8.5.0"
 description = "Read metadata from Python packages"
 optional = false
 python-versions = ">=3.8"
-groups = ["main", "dev", "proxy-dev"]
+groups = ["main"]
 files = [
-    {file = "importlib_metadata-7.1.0-py3-none-any.whl", hash = "sha256:30962b96c0c223483ed6cc7280e7f0199feb01a0e40cfae4d4450fc6fab1f570"},
-    {file = "importlib_metadata-7.1.0.tar.gz", hash = "sha256:b78938b926ee8d5f020fc4772d487045805a55ddbad2ecf21c6d60938dc7fcd2"},
+    {file = "importlib_metadata-8.5.0-py3-none-any.whl", hash = "sha256:45e54197d28b7a7f1559e60b95e7c567032b602131fbd588f1497f47880aa68b"},
+    {file = "importlib_metadata-8.5.0.tar.gz", hash = "sha256:71522656f0abace1d072b9e5481a48f07c138e00f079c38c8f883823f9c26bd7"},
 ]

 [package.dependencies]
-zipp = ">=0.5"
+zipp = ">=3.20"

 [package.extras]
-docs = ["furo", "jaraco.packaging (>=9.3)", "jaraco.tidelift (>=1.4)", "rst.linker (>=1.9)", "sphinx (>=3.5)", "sphinx-lint"]
+check = ["pytest-checkdocs (>=2.4)", "pytest-ruff (>=0.2.1) ; sys_platform != \"cygwin\""]
+cover = ["pytest-cov"]
+doc = ["furo", "jaraco.packaging (>=9.3)", "jaraco.tidelift (>=1.4)", "rst.linker (>=1.9)", "sphinx (>=3.5)", "sphinx-lint"]
+enabler = ["pytest-enabler (>=2.2)"]
 perf = ["ipython"]
-testing = ["flufl.flake8", "importlib-resources (>=1.3) ; python_version < \"3.9\"", "jaraco.test (>=5.4)", "packaging", "pyfakefs", "pytest (>=6)", "pytest-checkdocs (>=2.4)", "pytest-cov", "pytest-enabler (>=2.2)", "pytest-mypy ; platform_python_implementation != \"PyPy\"", "pytest-perf (>=0.9.2)", "pytest-ruff (>=0.2.1)"]
+test = ["flufl.flake8", "importlib-resources (>=1.3) ; python_version < \"3.9\"", "jaraco.test (>=5.4)", "packaging", "pyfakefs", "pytest (>=6,!=8.1.*)", "pytest-perf (>=0.9.2)"]
+type = ["pytest-mypy"]

 [[package]]
 name = "importlib-resources"
@ -2322,142 +2326,6 @@ datalib = ["numpy (>=1)", "pandas (>=1.2.3)", "pandas-stubs (>=1.1.0.11)"]
 realtime = ["websockets (>=13,<16)"]
 voice-helpers = ["numpy (>=2.0.2)", "sounddevice (>=0.5.1)"]

-[[package]]
-name = "opentelemetry-api"
-version = "1.25.0"
-description = "OpenTelemetry Python API"
-optional = false
-python-versions = ">=3.8"
-groups = ["dev", "proxy-dev"]
-files = [
-    {file = "opentelemetry_api-1.25.0-py3-none-any.whl", hash = "sha256:757fa1aa020a0f8fa139f8959e53dec2051cc26b832e76fa839a6d76ecefd737"},
-    {file = "opentelemetry_api-1.25.0.tar.gz", hash = "sha256:77c4985f62f2614e42ce77ee4c9da5fa5f0bc1e1821085e9a47533a9323ae869"},
-]
-
-[package.dependencies]
-deprecated = ">=1.2.6"
-importlib-metadata = ">=6.0,<=7.1"
-
-[[package]]
-name = "opentelemetry-exporter-otlp"
-version = "1.25.0"
-description = "OpenTelemetry Collector Exporters"
-optional = false
-python-versions = ">=3.8"
-groups = ["dev", "proxy-dev"]
-files = [
-    {file = "opentelemetry_exporter_otlp-1.25.0-py3-none-any.whl", hash = "sha256:d67a831757014a3bc3174e4cd629ae1493b7ba8d189e8a007003cacb9f1a6b60"},
-    {file = "opentelemetry_exporter_otlp-1.25.0.tar.gz", hash = "sha256:ce03199c1680a845f82e12c0a6a8f61036048c07ec7a0bd943142aca8fa6ced0"},
-]
-
-[package.dependencies]
-opentelemetry-exporter-otlp-proto-grpc = "1.25.0"
-opentelemetry-exporter-otlp-proto-http = "1.25.0"
-
-[[package]]
-name = "opentelemetry-exporter-otlp-proto-common"
-version = "1.25.0"
-description = "OpenTelemetry Protobuf encoding"
-optional = false
-python-versions = ">=3.8"
-groups = ["dev", "proxy-dev"]
-files = [
-    {file = "opentelemetry_exporter_otlp_proto_common-1.25.0-py3-none-any.whl", hash = "sha256:15637b7d580c2675f70246563363775b4e6de947871e01d0f4e3881d1848d693"},
-    {file = "opentelemetry_exporter_otlp_proto_common-1.25.0.tar.gz", hash = "sha256:c93f4e30da4eee02bacd1e004eb82ce4da143a2f8e15b987a9f603e0a85407d3"},
-]
-
-[package.dependencies]
-opentelemetry-proto = "1.25.0"
-
-[[package]]
-name = "opentelemetry-exporter-otlp-proto-grpc"
-version = "1.25.0"
-description = "OpenTelemetry Collector Protobuf over gRPC Exporter"
-optional = false
-python-versions = ">=3.8"
-groups = ["dev", "proxy-dev"]
-files = [
-    {file = "opentelemetry_exporter_otlp_proto_grpc-1.25.0-py3-none-any.whl", hash = "sha256:3131028f0c0a155a64c430ca600fd658e8e37043cb13209f0109db5c1a3e4eb4"},
-    {file = "opentelemetry_exporter_otlp_proto_grpc-1.25.0.tar.gz", hash = "sha256:c0b1661415acec5af87625587efa1ccab68b873745ca0ee96b69bb1042087eac"},
-]
-
-[package.dependencies]
-deprecated = ">=1.2.6"
-googleapis-common-protos = ">=1.52,<2.0"
-grpcio = ">=1.0.0,<2.0.0"
-opentelemetry-api = ">=1.15,<2.0"
-opentelemetry-exporter-otlp-proto-common = "1.25.0"
-opentelemetry-proto = "1.25.0"
-opentelemetry-sdk = ">=1.25.0,<1.26.0"
-
-[[package]]
-name = "opentelemetry-exporter-otlp-proto-http"
-version = "1.25.0"
-description = "OpenTelemetry Collector Protobuf over HTTP Exporter"
-optional = false
-python-versions = ">=3.8"
-groups = ["dev", "proxy-dev"]
-files = [
-    {file = "opentelemetry_exporter_otlp_proto_http-1.25.0-py3-none-any.whl", hash = "sha256:2eca686ee11b27acd28198b3ea5e5863a53d1266b91cda47c839d95d5e0541a6"},
-    {file = "opentelemetry_exporter_otlp_proto_http-1.25.0.tar.gz", hash = "sha256:9f8723859e37c75183ea7afa73a3542f01d0fd274a5b97487ea24cb683d7d684"},
-]
-
-[package.dependencies]
-deprecated = ">=1.2.6"
-googleapis-common-protos = ">=1.52,<2.0"
-opentelemetry-api = ">=1.15,<2.0"
-opentelemetry-exporter-otlp-proto-common = "1.25.0"
-opentelemetry-proto = "1.25.0"
-opentelemetry-sdk = ">=1.25.0,<1.26.0"
-requests = ">=2.7,<3.0"
-
-[[package]]
-name = "opentelemetry-proto"
-version = "1.25.0"
-description = "OpenTelemetry Python Proto"
-optional = false
-python-versions = ">=3.8"
-groups = ["dev", "proxy-dev"]
-files = [
-    {file = "opentelemetry_proto-1.25.0-py3-none-any.whl", hash = "sha256:f07e3341c78d835d9b86665903b199893befa5e98866f63d22b00d0b7ca4972f"},
-    {file = "opentelemetry_proto-1.25.0.tar.gz", hash = "sha256:35b6ef9dc4a9f7853ecc5006738ad40443701e52c26099e197895cbda8b815a3"},
-]
-
-[package.dependencies]
-protobuf = ">=3.19,<5.0"
-
-[[package]]
-name = "opentelemetry-sdk"
-version = "1.25.0"
-description = "OpenTelemetry Python SDK"
-optional = false
-python-versions = ">=3.8"
-groups = ["dev", "proxy-dev"]
-files = [
-    {file = "opentelemetry_sdk-1.25.0-py3-none-any.whl", hash = "sha256:d97ff7ec4b351692e9d5a15af570c693b8715ad78b8aafbec5c7100fe966b4c9"},
-    {file = "opentelemetry_sdk-1.25.0.tar.gz", hash = "sha256:ce7fc319c57707ef5bf8b74fb9f8ebdb8bfafbe11898410e0d2a761d08a98ec7"},
-]
-
-[package.dependencies]
-opentelemetry-api = "1.25.0"
-opentelemetry-semantic-conventions = "0.46b0"
-typing-extensions = ">=3.7.4"
-
-[[package]]
-name = "opentelemetry-semantic-conventions"
-version = "0.46b0"
-description = "OpenTelemetry Semantic Conventions"
-optional = false
-python-versions = ">=3.8"
-groups = ["dev", "proxy-dev"]
-files = [
-    {file = "opentelemetry_semantic_conventions-0.46b0-py3-none-any.whl", hash = "sha256:6daef4ef9fa51d51855d9f8e0ccd3a1bd59e0e545abe99ac6203804e36ab3e07"},
-    {file = "opentelemetry_semantic_conventions-0.46b0.tar.gz", hash = "sha256:fbc982ecbb6a6e90869b15c1673be90bd18c8a56ff1cffc0864e38e2edffaefa"},
-]
-
-[package.dependencies]
-opentelemetry-api = "1.25.0"
-
 [[package]]
 name = "orjson"
 version = "3.10.15"
@ -2800,25 +2668,25 @@ testing = ["google-api-core (>=1.31.5)"]

 [[package]]
 name = "protobuf"
-version = "4.25.6"
+version = "5.29.4"
 description = ""
-optional = false
+optional = true
 python-versions = ">=3.8"
-groups = ["main", "dev", "proxy-dev"]
+groups = ["main"]
+markers = "extra == \"extra-proxy\""
 files = [
-    {file = "protobuf-4.25.6-cp310-abi3-win32.whl", hash = "sha256:61df6b5786e2b49fc0055f636c1e8f0aff263808bb724b95b164685ac1bcc13a"},
-    {file = "protobuf-4.25.6-cp310-abi3-win_amd64.whl", hash = "sha256:b8f837bfb77513fe0e2f263250f423217a173b6d85135be4d81e96a4653bcd3c"},
-    {file = "protobuf-4.25.6-cp37-abi3-macosx_10_9_universal2.whl", hash = "sha256:6d4381f2417606d7e01750e2729fe6fbcda3f9883aa0c32b51d23012bded6c91"},
-    {file = "protobuf-4.25.6-cp37-abi3-manylinux2014_aarch64.whl", hash = "sha256:5dd800da412ba7f6f26d2c08868a5023ce624e1fdb28bccca2dc957191e81fb5"},
-    {file = "protobuf-4.25.6-cp37-abi3-manylinux2014_x86_64.whl", hash = "sha256:4434ff8bb5576f9e0c78f47c41cdf3a152c0b44de475784cd3fd170aef16205a"},
-    {file = "protobuf-4.25.6-cp38-cp38-win32.whl", hash = "sha256:8bad0f9e8f83c1fbfcc34e573352b17dfce7d0519512df8519994168dc015d7d"},
-    {file = "protobuf-4.25.6-cp38-cp38-win_amd64.whl", hash = "sha256:b6905b68cde3b8243a198268bb46fbec42b3455c88b6b02fb2529d2c306d18fc"},
-    {file = "protobuf-4.25.6-cp39-cp39-win32.whl", hash = "sha256:3f3b0b39db04b509859361ac9bca65a265fe9342e6b9406eda58029f5b1d10b2"},
-    {file = "protobuf-4.25.6-cp39-cp39-win_amd64.whl", hash = "sha256:6ef2045f89d4ad8d95fd43cd84621487832a61d15b49500e4c1350e8a0ef96be"},
-    {file = "protobuf-4.25.6-py3-none-any.whl", hash = "sha256:07972021c8e30b870cfc0863409d033af940213e0e7f64e27fe017b929d2c9f7"},
-    {file = "protobuf-4.25.6.tar.gz", hash = "sha256:f8cfbae7c5afd0d0eaccbe73267339bff605a2315860bb1ba08eb66670a9a91f"},
+    {file = "protobuf-5.29.4-cp310-abi3-win32.whl", hash = "sha256:13eb236f8eb9ec34e63fc8b1d6efd2777d062fa6aaa68268fb67cf77f6839ad7"},
+    {file = "protobuf-5.29.4-cp310-abi3-win_amd64.whl", hash = "sha256:bcefcdf3976233f8a502d265eb65ea740c989bacc6c30a58290ed0e519eb4b8d"},
+    {file = "protobuf-5.29.4-cp38-abi3-macosx_10_9_universal2.whl", hash = "sha256:307ecba1d852ec237e9ba668e087326a67564ef83e45a0189a772ede9e854dd0"},
+    {file = "protobuf-5.29.4-cp38-abi3-manylinux2014_aarch64.whl", hash = "sha256:aec4962f9ea93c431d5714ed1be1c93f13e1a8618e70035ba2b0564d9e633f2e"},
+    {file = "protobuf-5.29.4-cp38-abi3-manylinux2014_x86_64.whl", hash = "sha256:d7d3f7d1d5a66ed4942d4fefb12ac4b14a29028b209d4bfb25c68ae172059922"},
+    {file = "protobuf-5.29.4-cp38-cp38-win32.whl", hash = "sha256:1832f0515b62d12d8e6ffc078d7e9eb06969aa6dc13c13e1036e39d73bebc2de"},
+    {file = "protobuf-5.29.4-cp38-cp38-win_amd64.whl", hash = "sha256:476cb7b14914c780605a8cf62e38c2a85f8caff2e28a6a0bad827ec7d6c85d68"},
+    {file = "protobuf-5.29.4-cp39-cp39-win32.whl", hash = "sha256:fd32223020cb25a2cc100366f1dedc904e2d71d9322403224cdde5fdced0dabe"},
+    {file = "protobuf-5.29.4-cp39-cp39-win_amd64.whl", hash = "sha256:678974e1e3a9b975b8bc2447fca458db5f93a2fb6b0c8db46b6675b5b5346812"},
+    {file = "protobuf-5.29.4-py3-none-any.whl", hash = "sha256:3fde11b505e1597f71b875ef2fc52062b6a9740e5f7c8997ce878b6009145862"},
+    {file = "protobuf-5.29.4.tar.gz", hash = "sha256:4f1dfcd7997b31ef8f53ec82781ff434a28bf71d9102ddde14d076adcfc78c99"},
 ]
-markers = {main = "extra == \"extra-proxy\""}

 [[package]]
 name = "pyasn1"
@ -3474,7 +3342,7 @@ version = "2.31.0"
 description = "Python HTTP for Humans."
 optional = false
 python-versions = ">=3.7"
-groups = ["main", "dev", "proxy-dev"]
+groups = ["main"]
 files = [
    {file = "requests-2.31.0-py3-none-any.whl", hash = "sha256:58cd2187c01e70e6e26505bca751777aa9f2ee0b7f4300988b709f44e013003f"},
    {file = "requests-2.31.0.tar.gz", hash = "sha256:942c5a758f98d790eaed1a29cb6eefc7ffb0d1cf7af05c3d2791656dbd6ad1e1"},
@ -4159,7 +4027,7 @@ version = "1.26.20"
 description = "HTTP library with thread-safe connection pooling, file post, and more."
 optional = false
 python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,>=2.7"
-groups = ["main", "dev", "proxy-dev"]
+groups = ["main"]
 markers = "python_version < \"3.10\""
 files = [
    {file = "urllib3-1.26.20-py2.py3-none-any.whl", hash = "sha256:0ed14ccfbf1c30a9072c7ca157e4319b70d65f623e91e7b32fadb2853431016e"},
@ -4177,7 +4045,7 @@ version = "2.2.3"
 description = "HTTP library with thread-safe connection pooling, file post, and more."
 optional = false
 python-versions = ">=3.8"
-groups = ["main", "dev", "proxy-dev"]
+groups = ["main", "dev"]
 markers = "python_version >= \"3.10\""
 files = [
    {file = "urllib3-2.2.3-py3-none-any.whl", hash = "sha256:ca899ca043dcb1bafa3e262d73aa25c465bfb49e0bd9dd5d59f1d0acba2f8fac"},
@ -4361,95 +4229,6 @@ files = [
    {file = "websockets-13.1.tar.gz", hash = "sha256:a3b3366087c1bc0a2795111edcadddb8b3b59509d5db5d7ea3fdd69f954a8878"},
 ]

-[[package]]
-name = "wrapt"
-version = "1.17.2"
-description = "Module for decorators, wrappers and monkey patching."
-optional = false
-python-versions = ">=3.8"
-groups = ["dev", "proxy-dev"]
-files = [
-    {file = "wrapt-1.17.2-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:3d57c572081fed831ad2d26fd430d565b76aa277ed1d30ff4d40670b1c0dd984"},
-    {file = "wrapt-1.17.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:b5e251054542ae57ac7f3fba5d10bfff615b6c2fb09abeb37d2f1463f841ae22"},
-    {file = "wrapt-1.17.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:80dd7db6a7cb57ffbc279c4394246414ec99537ae81ffd702443335a61dbf3a7"},
-    {file = "wrapt-1.17.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0a6e821770cf99cc586d33833b2ff32faebdbe886bd6322395606cf55153246c"},
-    {file = "wrapt-1.17.2-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:b60fb58b90c6d63779cb0c0c54eeb38941bae3ecf7a73c764c52c88c2dcb9d72"},
-    {file = "wrapt-1.17.2-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b870b5df5b71d8c3359d21be8f0d6c485fa0ebdb6477dda51a1ea54a9b558061"},
-    {file = "wrapt-1.17.2-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:4011d137b9955791f9084749cba9a367c68d50ab8d11d64c50ba1688c9b457f2"},
-    {file = "wrapt-1.17.2-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:1473400e5b2733e58b396a04eb7f35f541e1fb976d0c0724d0223dd607e0f74c"},
-    {file = "wrapt-1.17.2-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:3cedbfa9c940fdad3e6e941db7138e26ce8aad38ab5fe9dcfadfed9db7a54e62"},
-    {file = "wrapt-1.17.2-cp310-cp310-win32.whl", hash = "sha256:582530701bff1dec6779efa00c516496968edd851fba224fbd86e46cc6b73563"},
-    {file = "wrapt-1.17.2-cp310-cp310-win_amd64.whl", hash = "sha256:58705da316756681ad3c9c73fd15499aa4d8c69f9fd38dc8a35e06c12468582f"},
-    {file = "wrapt-1.17.2-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:ff04ef6eec3eee8a5efef2401495967a916feaa353643defcc03fc74fe213b58"},
-    {file = "wrapt-1.17.2-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:4db983e7bca53819efdbd64590ee96c9213894272c776966ca6306b73e4affda"},
-    {file = "wrapt-1.17.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:9abc77a4ce4c6f2a3168ff34b1da9b0f311a8f1cfd694ec96b0603dff1c79438"},
-    {file = "wrapt-1.17.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0b929ac182f5ace000d459c59c2c9c33047e20e935f8e39371fa6e3b85d56f4a"},
-    {file = "wrapt-1.17.2-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:f09b286faeff3c750a879d336fb6d8713206fc97af3adc14def0cdd349df6000"},
-    {file = "wrapt-1.17.2-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1a7ed2d9d039bd41e889f6fb9364554052ca21ce823580f6a07c4ec245c1f5d6"},
-    {file = "wrapt-1.17.2-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:129a150f5c445165ff941fc02ee27df65940fcb8a22a61828b1853c98763a64b"},
-    {file = "wrapt-1.17.2-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:1fb5699e4464afe5c7e65fa51d4f99e0b2eadcc176e4aa33600a3df7801d6662"},
-    {file = "wrapt-1.17.2-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:9a2bce789a5ea90e51a02dfcc39e31b7f1e662bc3317979aa7e5538e3a034f72"},
-    {file = "wrapt-1.17.2-cp311-cp311-win32.whl", hash = "sha256:4afd5814270fdf6380616b321fd31435a462019d834f83c8611a0ce7484c7317"},
-    {file = "wrapt-1.17.2-cp311-cp311-win_amd64.whl", hash = "sha256:acc130bc0375999da18e3d19e5a86403667ac0c4042a094fefb7eec8ebac7cf3"},
-    {file = "wrapt-1.17.2-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:d5e2439eecc762cd85e7bd37161d4714aa03a33c5ba884e26c81559817ca0925"},
-    {file = "wrapt-1.17.2-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:3fc7cb4c1c744f8c05cd5f9438a3caa6ab94ce8344e952d7c45a8ed59dd88392"},
-    {file = "wrapt-1.17.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:8fdbdb757d5390f7c675e558fd3186d590973244fab0c5fe63d373ade3e99d40"},
-    {file = "wrapt-1.17.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5bb1d0dbf99411f3d871deb6faa9aabb9d4e744d67dcaaa05399af89d847a91d"},
-    {file = "wrapt-1.17.2-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:d18a4865f46b8579d44e4fe1e2bcbc6472ad83d98e22a26c963d46e4c125ef0b"},
-    {file = "wrapt-1.17.2-cp312-cp312-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bc570b5f14a79734437cb7b0500376b6b791153314986074486e0b0fa8d71d98"},
-    {file = "wrapt-1.17.2-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:6d9187b01bebc3875bac9b087948a2bccefe464a7d8f627cf6e48b1bbae30f82"},
-    {file = "wrapt-1.17.2-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:9e8659775f1adf02eb1e6f109751268e493c73716ca5761f8acb695e52a756ae"},
-    {file = "wrapt-1.17.2-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:e8b2816ebef96d83657b56306152a93909a83f23994f4b30ad4573b00bd11bb9"},
-    {file = "wrapt-1.17.2-cp312-cp312-win32.whl", hash = "sha256:468090021f391fe0056ad3e807e3d9034e0fd01adcd3bdfba977b6fdf4213ea9"},
-    {file = "wrapt-1.17.2-cp312-cp312-win_amd64.whl", hash = "sha256:ec89ed91f2fa8e3f52ae53cd3cf640d6feff92ba90d62236a81e4e563ac0e991"},
-    {file = "wrapt-1.17.2-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:6ed6ffac43aecfe6d86ec5b74b06a5be33d5bb9243d055141e8cabb12aa08125"},
-    {file = "wrapt-1.17.2-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:35621ae4c00e056adb0009f8e86e28eb4a41a4bfa8f9bfa9fca7d343fe94f998"},
-    {file = "wrapt-1.17.2-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:a604bf7a053f8362d27eb9fefd2097f82600b856d5abe996d623babd067b1ab5"},
-    {file = "wrapt-1.17.2-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5cbabee4f083b6b4cd282f5b817a867cf0b1028c54d445b7ec7cfe6505057cf8"},
-    {file = "wrapt-1.17.2-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:49703ce2ddc220df165bd2962f8e03b84c89fee2d65e1c24a7defff6f988f4d6"},
-    {file = "wrapt-1.17.2-cp313-cp313-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8112e52c5822fc4253f3901b676c55ddf288614dc7011634e2719718eaa187dc"},
-    {file = "wrapt-1.17.2-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:9fee687dce376205d9a494e9c121e27183b2a3df18037f89d69bd7b35bcf59e2"},
-    {file = "wrapt-1.17.2-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:18983c537e04d11cf027fbb60a1e8dfd5190e2b60cc27bc0808e653e7b218d1b"},
-    {file = "wrapt-1.17.2-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:703919b1633412ab54bcf920ab388735832fdcb9f9a00ae49387f0fe67dad504"},
-    {file = "wrapt-1.17.2-cp313-cp313-win32.whl", hash = "sha256:abbb9e76177c35d4e8568e58650aa6926040d6a9f6f03435b7a522bf1c487f9a"},
-    {file = "wrapt-1.17.2-cp313-cp313-win_amd64.whl", hash = "sha256:69606d7bb691b50a4240ce6b22ebb319c1cfb164e5f6569835058196e0f3a845"},
-    {file = "wrapt-1.17.2-cp313-cp313t-macosx_10_13_universal2.whl", hash = "sha256:4a721d3c943dae44f8e243b380cb645a709ba5bd35d3ad27bc2ed947e9c68192"},
-    {file = "wrapt-1.17.2-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:766d8bbefcb9e00c3ac3b000d9acc51f1b399513f44d77dfe0eb026ad7c9a19b"},
-    {file = "wrapt-1.17.2-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:e496a8ce2c256da1eb98bd15803a79bee00fc351f5dfb9ea82594a3f058309e0"},
-    {file = "wrapt-1.17.2-cp313-cp313t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:40d615e4fe22f4ad3528448c193b218e077656ca9ccb22ce2cb20db730f8d306"},
-    {file = "wrapt-1.17.2-cp313-cp313t-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:a5aaeff38654462bc4b09023918b7f21790efb807f54c000a39d41d69cf552cb"},
-    {file = "wrapt-1.17.2-cp313-cp313t-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9a7d15bbd2bc99e92e39f49a04653062ee6085c0e18b3b7512a4f2fe91f2d681"},
-    {file = "wrapt-1.17.2-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:e3890b508a23299083e065f435a492b5435eba6e304a7114d2f919d400888cc6"},
-    {file = "wrapt-1.17.2-cp313-cp313t-musllinux_1_2_i686.whl", hash = "sha256:8c8b293cd65ad716d13d8dd3624e42e5a19cc2a2f1acc74b30c2c13f15cb61a6"},
-    {file = "wrapt-1.17.2-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:4c82b8785d98cdd9fed4cac84d765d234ed3251bd6afe34cb7ac523cb93e8b4f"},
-    {file = "wrapt-1.17.2-cp313-cp313t-win32.whl", hash = "sha256:13e6afb7fe71fe7485a4550a8844cc9ffbe263c0f1a1eea569bc7091d4898555"},
-    {file = "wrapt-1.17.2-cp313-cp313t-win_amd64.whl", hash = "sha256:eaf675418ed6b3b31c7a989fd007fa7c3be66ce14e5c3b27336383604c9da85c"},
-    {file = "wrapt-1.17.2-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:5c803c401ea1c1c18de70a06a6f79fcc9c5acfc79133e9869e730ad7f8ad8ef9"},
-    {file = "wrapt-1.17.2-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:f917c1180fdb8623c2b75a99192f4025e412597c50b2ac870f156de8fb101119"},
-    {file = "wrapt-1.17.2-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:ecc840861360ba9d176d413a5489b9a0aff6d6303d7e733e2c4623cfa26904a6"},
-    {file = "wrapt-1.17.2-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:bb87745b2e6dc56361bfde481d5a378dc314b252a98d7dd19a651a3fa58f24a9"},
-    {file = "wrapt-1.17.2-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:58455b79ec2661c3600e65c0a716955adc2410f7383755d537584b0de41b1d8a"},
-    {file = "wrapt-1.17.2-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b4e42a40a5e164cbfdb7b386c966a588b1047558a990981ace551ed7e12ca9c2"},
-    {file = "wrapt-1.17.2-cp38-cp38-musllinux_1_2_aarch64.whl", hash = "sha256:91bd7d1773e64019f9288b7a5101f3ae50d3d8e6b1de7edee9c2ccc1d32f0c0a"},
-    {file = "wrapt-1.17.2-cp38-cp38-musllinux_1_2_i686.whl", hash = "sha256:bb90fb8bda722a1b9d48ac1e6c38f923ea757b3baf8ebd0c82e09c5c1a0e7a04"},
-    {file = "wrapt-1.17.2-cp38-cp38-musllinux_1_2_x86_64.whl", hash = "sha256:08e7ce672e35efa54c5024936e559469436f8b8096253404faeb54d2a878416f"},
-    {file = "wrapt-1.17.2-cp38-cp38-win32.whl", hash = "sha256:410a92fefd2e0e10d26210e1dfb4a876ddaf8439ef60d6434f21ef8d87efc5b7"},
-    {file = "wrapt-1.17.2-cp38-cp38-win_amd64.whl", hash = "sha256:95c658736ec15602da0ed73f312d410117723914a5c91a14ee4cdd72f1d790b3"},
-    {file = "wrapt-1.17.2-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:99039fa9e6306880572915728d7f6c24a86ec57b0a83f6b2491e1d8ab0235b9a"},
-    {file = "wrapt-1.17.2-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:2696993ee1eebd20b8e4ee4356483c4cb696066ddc24bd70bcbb80fa56ff9061"},
-    {file = "wrapt-1.17.2-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:612dff5db80beef9e649c6d803a8d50c409082f1fedc9dbcdfde2983b2025b82"},
-    {file = "wrapt-1.17.2-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:62c2caa1585c82b3f7a7ab56afef7b3602021d6da34fbc1cf234ff139fed3cd9"},
-    {file = "wrapt-1.17.2-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:c958bcfd59bacc2d0249dcfe575e71da54f9dcf4a8bdf89c4cb9a68a1170d73f"},
-    {file = "wrapt-1.17.2-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fc78a84e2dfbc27afe4b2bd7c80c8db9bca75cc5b85df52bfe634596a1da846b"},
-    {file = "wrapt-1.17.2-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:ba0f0eb61ef00ea10e00eb53a9129501f52385c44853dbd6c4ad3f403603083f"},
-    {file = "wrapt-1.17.2-cp39-cp39-musllinux_1_2_i686.whl", hash = "sha256:1e1fe0e6ab7775fd842bc39e86f6dcfc4507ab0ffe206093e76d61cde37225c8"},
-    {file = "wrapt-1.17.2-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:c86563182421896d73858e08e1db93afdd2b947a70064b813d515d66549e15f9"},
-    {file = "wrapt-1.17.2-cp39-cp39-win32.whl", hash = "sha256:f393cda562f79828f38a819f4788641ac7c4085f30f1ce1a68672baa686482bb"},
-    {file = "wrapt-1.17.2-cp39-cp39-win_amd64.whl", hash = "sha256:36ccae62f64235cf8ddb682073a60519426fdd4725524ae38874adf72b5f2aeb"},
-    {file = "wrapt-1.17.2-py3-none-any.whl", hash = "sha256:b18f2d1533a71f069c7f82d524a52599053d4c7166e9dd374ae2136b7f40f7c8"},
-    {file = "wrapt-1.17.2.tar.gz", hash = "sha256:41388e9d4d1522446fe79d3213196bd9e3b301a336965b9e27ca2788ebd122f3"},
-]
-
 [[package]]
 name = "wsproto"
 version = "1.2.0"
@ -4584,7 +4363,7 @@ version = "3.20.2"
 description = "Backport of pathlib-compatible object wrapper for zip files"
 optional = false
 python-versions = ">=3.8"
-groups = ["main", "dev", "proxy-dev"]
+groups = ["main"]
 files = [
    {file = "zipp-3.20.2-py3-none-any.whl", hash = "sha256:a817ac80d6cf4b23bf7f2828b7cabf326f15a001bea8b1f9b49631780ba28350"},
    {file = "zipp-3.20.2.tar.gz", hash = "sha256:bc9eb26f4506fda01b81bcde0ca78103b6e62f991b381fec825435c836edbc29"},
@ -4605,4 +4384,4 @@ proxy = ["PyJWT", "apscheduler", "backoff", "boto3", "cryptography", "fastapi",
 [metadata]
 lock-version = "2.1"
 python-versions = ">=3.8.1,<4.0, !=3.9.7"
-content-hash = "adefc5c35b625ab156ff674c880256643a22880012451d4ade7fa2ef11f5885d"
+content-hash = "40074b2e47aae8ece058be9a42eda3ca0618e27e4fc9d6529793816df7adb6c8"
--- a/Show more
+++ b/Show more
				`@ -0,0 +1 @@`
				`Subproject commit bf0485467c343957ba5c217db777f407b2e65453`