Merge branch 'main' into litellm_dev_03_10_2025_p3

This commit is contained in:
Krish Dholakia 2025-03-12 14:56:01 -07:00 committed by GitHub
commit 2d957a0ed9
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
105 changed files with 3874 additions and 437 deletions

View file

@ -49,7 +49,7 @@ jobs:
pip install opentelemetry-api==1.25.0 pip install opentelemetry-api==1.25.0
pip install opentelemetry-sdk==1.25.0 pip install opentelemetry-sdk==1.25.0
pip install opentelemetry-exporter-otlp==1.25.0 pip install opentelemetry-exporter-otlp==1.25.0
pip install openai==1.54.0 pip install openai==1.66.1
pip install prisma==0.11.0 pip install prisma==0.11.0
pip install "detect_secrets==1.5.0" pip install "detect_secrets==1.5.0"
pip install "httpx==0.24.1" pip install "httpx==0.24.1"
@ -168,7 +168,7 @@ jobs:
pip install opentelemetry-api==1.25.0 pip install opentelemetry-api==1.25.0
pip install opentelemetry-sdk==1.25.0 pip install opentelemetry-sdk==1.25.0
pip install opentelemetry-exporter-otlp==1.25.0 pip install opentelemetry-exporter-otlp==1.25.0
pip install openai==1.54.0 pip install openai==1.66.1
pip install prisma==0.11.0 pip install prisma==0.11.0
pip install "detect_secrets==1.5.0" pip install "detect_secrets==1.5.0"
pip install "httpx==0.24.1" pip install "httpx==0.24.1"
@ -267,7 +267,7 @@ jobs:
pip install opentelemetry-api==1.25.0 pip install opentelemetry-api==1.25.0
pip install opentelemetry-sdk==1.25.0 pip install opentelemetry-sdk==1.25.0
pip install opentelemetry-exporter-otlp==1.25.0 pip install opentelemetry-exporter-otlp==1.25.0
pip install openai==1.54.0 pip install openai==1.66.1
pip install prisma==0.11.0 pip install prisma==0.11.0
pip install "detect_secrets==1.5.0" pip install "detect_secrets==1.5.0"
pip install "httpx==0.24.1" pip install "httpx==0.24.1"
@ -511,7 +511,7 @@ jobs:
pip install opentelemetry-api==1.25.0 pip install opentelemetry-api==1.25.0
pip install opentelemetry-sdk==1.25.0 pip install opentelemetry-sdk==1.25.0
pip install opentelemetry-exporter-otlp==1.25.0 pip install opentelemetry-exporter-otlp==1.25.0
pip install openai==1.54.0 pip install openai==1.66.1
pip install prisma==0.11.0 pip install prisma==0.11.0
pip install "detect_secrets==1.5.0" pip install "detect_secrets==1.5.0"
pip install "httpx==0.24.1" pip install "httpx==0.24.1"
@ -678,6 +678,48 @@ jobs:
paths: paths:
- llm_translation_coverage.xml - llm_translation_coverage.xml
- llm_translation_coverage - llm_translation_coverage
llm_responses_api_testing:
docker:
- image: cimg/python:3.11
auth:
username: ${DOCKERHUB_USERNAME}
password: ${DOCKERHUB_PASSWORD}
working_directory: ~/project
steps:
- checkout
- run:
name: Install Dependencies
command: |
python -m pip install --upgrade pip
python -m pip install -r requirements.txt
pip install "pytest==7.3.1"
pip install "pytest-retry==1.6.3"
pip install "pytest-cov==5.0.0"
pip install "pytest-asyncio==0.21.1"
pip install "respx==0.21.1"
# Run pytest and generate JUnit XML report
- run:
name: Run tests
command: |
pwd
ls
python -m pytest -vv tests/llm_responses_api_testing --cov=litellm --cov-report=xml -x -s -v --junitxml=test-results/junit.xml --durations=5
no_output_timeout: 120m
- run:
name: Rename the coverage files
command: |
mv coverage.xml llm_responses_api_coverage.xml
mv .coverage llm_responses_api_coverage
# Store test results
- store_test_results:
path: test-results
- persist_to_workspace:
root: .
paths:
- llm_responses_api_coverage.xml
- llm_responses_api_coverage
litellm_mapped_tests: litellm_mapped_tests:
docker: docker:
- image: cimg/python:3.11 - image: cimg/python:3.11
@ -1234,7 +1276,7 @@ jobs:
pip install "aiodynamo==23.10.1" pip install "aiodynamo==23.10.1"
pip install "asyncio==3.4.3" pip install "asyncio==3.4.3"
pip install "PyGithub==1.59.1" pip install "PyGithub==1.59.1"
pip install "openai==1.54.0 " pip install "openai==1.66.1"
- run: - run:
name: Install Grype name: Install Grype
command: | command: |
@ -1309,7 +1351,7 @@ jobs:
command: | command: |
pwd pwd
ls ls
python -m pytest -s -vv tests/*.py -x --junitxml=test-results/junit.xml --durations=5 --ignore=tests/otel_tests --ignore=tests/pass_through_tests --ignore=tests/proxy_admin_ui_tests --ignore=tests/load_tests --ignore=tests/llm_translation --ignore=tests/image_gen_tests --ignore=tests/pass_through_unit_tests python -m pytest -s -vv tests/*.py -x --junitxml=test-results/junit.xml --durations=5 --ignore=tests/otel_tests --ignore=tests/pass_through_tests --ignore=tests/proxy_admin_ui_tests --ignore=tests/load_tests --ignore=tests/llm_translation --ignore=tests/llm_responses_api_testing --ignore=tests/image_gen_tests --ignore=tests/pass_through_unit_tests
no_output_timeout: 120m no_output_timeout: 120m
# Store test results # Store test results
@ -1370,7 +1412,7 @@ jobs:
pip install "aiodynamo==23.10.1" pip install "aiodynamo==23.10.1"
pip install "asyncio==3.4.3" pip install "asyncio==3.4.3"
pip install "PyGithub==1.59.1" pip install "PyGithub==1.59.1"
pip install "openai==1.54.0 " pip install "openai==1.66.1"
# Run pytest and generate JUnit XML report # Run pytest and generate JUnit XML report
- run: - run:
name: Build Docker image name: Build Docker image
@ -1492,7 +1534,7 @@ jobs:
pip install "aiodynamo==23.10.1" pip install "aiodynamo==23.10.1"
pip install "asyncio==3.4.3" pip install "asyncio==3.4.3"
pip install "PyGithub==1.59.1" pip install "PyGithub==1.59.1"
pip install "openai==1.54.0 " pip install "openai==1.66.1"
- run: - run:
name: Build Docker image name: Build Docker image
command: docker build -t my-app:latest -f ./docker/Dockerfile.database . command: docker build -t my-app:latest -f ./docker/Dockerfile.database .
@ -1921,7 +1963,7 @@ jobs:
pip install "pytest-asyncio==0.21.1" pip install "pytest-asyncio==0.21.1"
pip install "google-cloud-aiplatform==1.43.0" pip install "google-cloud-aiplatform==1.43.0"
pip install aiohttp pip install aiohttp
pip install "openai==1.54.0 " pip install "openai==1.66.1"
pip install "assemblyai==0.37.0" pip install "assemblyai==0.37.0"
python -m pip install --upgrade pip python -m pip install --upgrade pip
pip install "pydantic==2.7.1" pip install "pydantic==2.7.1"
@ -2068,7 +2110,7 @@ jobs:
python -m venv venv python -m venv venv
. venv/bin/activate . venv/bin/activate
pip install coverage pip install coverage
coverage combine llm_translation_coverage logging_coverage litellm_router_coverage local_testing_coverage litellm_assistants_api_coverage auth_ui_unit_tests_coverage langfuse_coverage caching_coverage litellm_proxy_unit_tests_coverage image_gen_coverage pass_through_unit_tests_coverage batches_coverage litellm_proxy_security_tests_coverage coverage combine llm_translation_coverage llm_responses_api_coverage logging_coverage litellm_router_coverage local_testing_coverage litellm_assistants_api_coverage auth_ui_unit_tests_coverage langfuse_coverage caching_coverage litellm_proxy_unit_tests_coverage image_gen_coverage pass_through_unit_tests_coverage batches_coverage litellm_proxy_security_tests_coverage
coverage xml coverage xml
- codecov/upload: - codecov/upload:
file: ./coverage.xml file: ./coverage.xml
@ -2197,7 +2239,7 @@ jobs:
pip install "pytest-retry==1.6.3" pip install "pytest-retry==1.6.3"
pip install "pytest-asyncio==0.21.1" pip install "pytest-asyncio==0.21.1"
pip install aiohttp pip install aiohttp
pip install "openai==1.54.0 " pip install "openai==1.66.1"
python -m pip install --upgrade pip python -m pip install --upgrade pip
pip install "pydantic==2.7.1" pip install "pydantic==2.7.1"
pip install "pytest==7.3.1" pip install "pytest==7.3.1"
@ -2429,6 +2471,12 @@ workflows:
only: only:
- main - main
- /litellm_.*/ - /litellm_.*/
- llm_responses_api_testing:
filters:
branches:
only:
- main
- /litellm_.*/
- litellm_mapped_tests: - litellm_mapped_tests:
filters: filters:
branches: branches:
@ -2468,6 +2516,7 @@ workflows:
- upload-coverage: - upload-coverage:
requires: requires:
- llm_translation_testing - llm_translation_testing
- llm_responses_api_testing
- litellm_mapped_tests - litellm_mapped_tests
- batches_testing - batches_testing
- litellm_utils_testing - litellm_utils_testing
@ -2526,6 +2575,7 @@ workflows:
- load_testing - load_testing
- test_bad_database_url - test_bad_database_url
- llm_translation_testing - llm_translation_testing
- llm_responses_api_testing
- litellm_mapped_tests - litellm_mapped_tests
- batches_testing - batches_testing
- litellm_utils_testing - litellm_utils_testing

View file

@ -1,5 +1,5 @@
# used by CI/CD testing # used by CI/CD testing
openai==1.54.0 openai==1.66.1
python-dotenv python-dotenv
tiktoken tiktoken
importlib_metadata importlib_metadata

View file

@ -10,9 +10,9 @@
**Please complete all items before asking a LiteLLM maintainer to review your PR** **Please complete all items before asking a LiteLLM maintainer to review your PR**
- [ ] I have Added testing in the `tests/litellm/` directory, **Adding at least 1 test is a hard requirement** - [see details](https://docs.litellm.ai/docs/contributing#2-adding-testing-to-your-pr) - [ ] I have Added testing in the `tests/litellm/` directory, **Adding at least 1 test is a hard requirement** - [see details](https://docs.litellm.ai/docs/extras/contributing_code)
- [ ] I have added a screenshot of my new test passing locally - [ ] I have added a screenshot of my new test passing locally
- [ ] My PR passes all unit tests on `make unit-test` [https://docs.litellm.ai/docs/contributing] - [ ] My PR passes all unit tests on (`make test-unit`)[https://docs.litellm.ai/docs/extras/contributing_code]
- [ ] My PR's scope is as isolated as possible, it only solves 1 specific problem - [ ] My PR's scope is as isolated as possible, it only solves 1 specific problem

27
.github/workflows/helm_unit_test.yml vendored Normal file
View file

@ -0,0 +1,27 @@
name: Helm unit test
on:
pull_request:
push:
branches:
- main
jobs:
unit-test:
runs-on: ubuntu-latest
steps:
- name: Checkout
uses: actions/checkout@v2
- name: Set up Helm 3.11.1
uses: azure/setup-helm@v1
with:
version: '3.11.1'
- name: Install Helm Unit Test Plugin
run: |
helm plugin install https://github.com/helm-unittest/helm-unittest --version v0.4.4
- name: Run unit tests
run:
helm unittest -f 'tests/*.yaml' deploy/charts/litellm-helm

View file

@ -340,7 +340,7 @@ curl 'http://0.0.0.0:4000/key/generate' \
## Contributing ## Contributing
Interested in contributing? Contributions to LiteLLM Python SDK, Proxy Server, and contributing LLM integrations are both accepted and highly encouraged! [See our Contribution Guide for more details](https://docs.litellm.ai/docs/contributing) Interested in contributing? Contributions to LiteLLM Python SDK, Proxy Server, and contributing LLM integrations are both accepted and highly encouraged! [See our Contribution Guide for more details](https://docs.litellm.ai/docs/extras/contributing_code)
# Enterprise # Enterprise
For companies that need better security, user management and professional support For companies that need better security, user management and professional support

View file

@ -0,0 +1,54 @@
suite: test deployment
templates:
- deployment.yaml
- configmap-litellm.yaml
tests:
- it: should work
template: deployment.yaml
set:
image.tag: test
asserts:
- isKind:
of: Deployment
- matchRegex:
path: metadata.name
pattern: -litellm$
- equal:
path: spec.template.spec.containers[0].image
value: ghcr.io/berriai/litellm-database:test
- it: should work with tolerations
template: deployment.yaml
set:
tolerations:
- key: node-role.kubernetes.io/master
operator: Exists
effect: NoSchedule
asserts:
- equal:
path: spec.template.spec.tolerations[0].key
value: node-role.kubernetes.io/master
- equal:
path: spec.template.spec.tolerations[0].operator
value: Exists
- it: should work with affinity
template: deployment.yaml
set:
affinity:
nodeAffinity:
requiredDuringSchedulingIgnoredDuringExecution:
nodeSelectorTerms:
- matchExpressions:
- key: topology.kubernetes.io/zone
operator: In
values:
- antarctica-east1
asserts:
- equal:
path: spec.template.spec.affinity.nodeAffinity.requiredDuringSchedulingIgnoredDuringExecution.nodeSelectorTerms[0].matchExpressions[0].key
value: topology.kubernetes.io/zone
- equal:
path: spec.template.spec.affinity.nodeAffinity.requiredDuringSchedulingIgnoredDuringExecution.nodeSelectorTerms[0].matchExpressions[0].operator
value: In
- equal:
path: spec.template.spec.affinity.nodeAffinity.requiredDuringSchedulingIgnoredDuringExecution.nodeSelectorTerms[0].matchExpressions[0].values[0]
value: antarctica-east1

View file

@ -48,7 +48,7 @@ The `tests/litellm/` directory follows the same directory structure as `litellm/
- `litellm/proxy/test_caching_routes.py` maps to `litellm/proxy/caching_routes.py` - `litellm/proxy/test_caching_routes.py` maps to `litellm/proxy/caching_routes.py`
- `test_{filename}.py` maps to `litellm/{filename}.py` - `test_{filename}.py` maps to `litellm/{filename}.py`
### 3. Running Unit Tests ## 3. Running Unit Tests
run the following command on the root of the litellm directory run the following command on the root of the litellm directory
@ -56,7 +56,7 @@ run the following command on the root of the litellm directory
make test-unit make test-unit
``` ```
### 4. Submit a PR with your changes! ## 4. Submit a PR with your changes!
- push your fork to your GitHub repo - push your fork to your GitHub repo
- submit a PR from there - submit a PR from there

View file

@ -63,9 +63,9 @@ model_list:
- model_name: bedrock-claude-v1 - model_name: bedrock-claude-v1
litellm_params: litellm_params:
model: bedrock/anthropic.claude-instant-v1 model: bedrock/anthropic.claude-instant-v1
aws_access_key_id: os.environ/CUSTOM_AWS_ACCESS_KEY_ID aws_access_key_id: os.environ/AWS_ACCESS_KEY_ID
aws_secret_access_key: os.environ/CUSTOM_AWS_SECRET_ACCESS_KEY aws_secret_access_key: os.environ/AWS_SECRET_ACCESS_KEY
aws_region_name: os.environ/CUSTOM_AWS_REGION_NAME aws_region_name: os.environ/AWS_REGION_NAME
``` ```
All possible auth params: All possible auth params:
@ -1792,10 +1792,14 @@ print(response)
### Advanced - [Pass model/provider-specific Params](https://docs.litellm.ai/docs/completion/provider_specific_params#proxy-usage) ### Advanced - [Pass model/provider-specific Params](https://docs.litellm.ai/docs/completion/provider_specific_params#proxy-usage)
## Image Generation ## Image Generation
Use this for stable diffusion on bedrock Use this for stable diffusion, and amazon nova canvas on bedrock
### Usage ### Usage
<Tabs>
<TabItem value="sdk" label="SDK">
```python ```python
import os import os
from litellm import image_generation from litellm import image_generation
@ -1830,6 +1834,41 @@ response = image_generation(
) )
print(f"response: {response}") print(f"response: {response}")
``` ```
</TabItem>
<TabItem value="proxy" label="PROXY">
1. Setup config.yaml
```yaml
model_list:
- model_name: amazon.nova-canvas-v1:0
litellm_params:
model: bedrock/amazon.nova-canvas-v1:0
aws_region_name: "us-east-1"
aws_secret_access_key: my-key # OPTIONAL - all boto3 auth params supported
aws_secret_access_id: my-id # OPTIONAL - all boto3 auth params supported
```
2. Start proxy
```bash
litellm --config /path/to/config.yaml
```
3. Test it!
```bash
curl -L -X POST 'http://0.0.0.0:4000/v1/images/generations' \
-H 'Content-Type: application/json' \
-H 'Authorization: Bearer $LITELLM_VIRTUAL_KEY' \
-d '{
"model": "amazon.nova-canvas-v1:0",
"prompt": "A cute baby sea otter"
}'
```
</TabItem>
</Tabs>
## Supported AWS Bedrock Image Generation Models ## Supported AWS Bedrock Image Generation Models
@ -1910,6 +1949,8 @@ curl http://0.0.0.0:4000/rerank \
"Capital punishment has existed in the United States since before it was a country." "Capital punishment has existed in the United States since before it was a country."
], ],
"top_n": 3 "top_n": 3
}' }'
``` ```

View file

@ -903,6 +903,7 @@ from .llms.bedrock.chat.invoke_transformations.base_invoke_transformation import
from .llms.bedrock.image.amazon_stability1_transformation import AmazonStabilityConfig from .llms.bedrock.image.amazon_stability1_transformation import AmazonStabilityConfig
from .llms.bedrock.image.amazon_stability3_transformation import AmazonStability3Config from .llms.bedrock.image.amazon_stability3_transformation import AmazonStability3Config
from .llms.bedrock.image.amazon_nova_canvas_transformation import AmazonNovaCanvasConfig
from .llms.bedrock.embed.amazon_titan_g1_transformation import AmazonTitanG1Config from .llms.bedrock.embed.amazon_titan_g1_transformation import AmazonTitanG1Config
from .llms.bedrock.embed.amazon_titan_multimodal_transformation import ( from .llms.bedrock.embed.amazon_titan_multimodal_transformation import (
AmazonTitanMultimodalEmbeddingG1Config, AmazonTitanMultimodalEmbeddingG1Config,
@ -925,6 +926,7 @@ from .llms.groq.chat.transformation import GroqChatConfig
from .llms.voyage.embedding.transformation import VoyageEmbeddingConfig from .llms.voyage.embedding.transformation import VoyageEmbeddingConfig
from .llms.azure_ai.chat.transformation import AzureAIStudioConfig from .llms.azure_ai.chat.transformation import AzureAIStudioConfig
from .llms.mistral.mistral_chat_transformation import MistralConfig from .llms.mistral.mistral_chat_transformation import MistralConfig
from .llms.openai.responses.transformation import OpenAIResponsesAPIConfig
from .llms.openai.chat.o_series_transformation import ( from .llms.openai.chat.o_series_transformation import (
OpenAIOSeriesConfig as OpenAIO1Config, # maintain backwards compatibility OpenAIOSeriesConfig as OpenAIO1Config, # maintain backwards compatibility
OpenAIOSeriesConfig, OpenAIOSeriesConfig,
@ -1014,6 +1016,7 @@ from .batches.main import *
from .batch_completion.main import * # type: ignore from .batch_completion.main import * # type: ignore
from .rerank_api.main import * from .rerank_api.main import *
from .llms.anthropic.experimental_pass_through.messages.handler import * from .llms.anthropic.experimental_pass_through.messages.handler import *
from .responses.main import *
from .realtime_api.main import _arealtime from .realtime_api.main import _arealtime
from .fine_tuning.main import * from .fine_tuning.main import *
from .files.main import * from .files.main import *

View file

@ -18,6 +18,7 @@ SINGLE_DEPLOYMENT_TRAFFIC_FAILURE_THRESHOLD = 1000 # Minimum number of requests
REPEATED_STREAMING_CHUNK_LIMIT = 100 # catch if model starts looping the same chunk while streaming. Uses high default to prevent false positives. REPEATED_STREAMING_CHUNK_LIMIT = 100 # catch if model starts looping the same chunk while streaming. Uses high default to prevent false positives.
#### Networking settings #### #### Networking settings ####
request_timeout: float = 6000 # time in seconds request_timeout: float = 6000 # time in seconds
STREAM_SSE_DONE_STRING: str = "[DONE]"
LITELLM_CHAT_PROVIDERS = [ LITELLM_CHAT_PROVIDERS = [
"openai", "openai",

View file

@ -44,7 +44,12 @@ from litellm.llms.vertex_ai.cost_calculator import cost_router as google_cost_ro
from litellm.llms.vertex_ai.image_generation.cost_calculator import ( from litellm.llms.vertex_ai.image_generation.cost_calculator import (
cost_calculator as vertex_ai_image_cost_calculator, cost_calculator as vertex_ai_image_cost_calculator,
) )
from litellm.types.llms.openai import HttpxBinaryResponseContent from litellm.responses.utils import ResponseAPILoggingUtils
from litellm.types.llms.openai import (
HttpxBinaryResponseContent,
ResponseAPIUsage,
ResponsesAPIResponse,
)
from litellm.types.rerank import RerankBilledUnits, RerankResponse from litellm.types.rerank import RerankBilledUnits, RerankResponse
from litellm.types.utils import ( from litellm.types.utils import (
CallTypesLiteral, CallTypesLiteral,
@ -464,6 +469,13 @@ def _get_usage_object(
return usage_obj return usage_obj
def _is_known_usage_objects(usage_obj):
"""Returns True if the usage obj is a known Usage type"""
return isinstance(usage_obj, litellm.Usage) or isinstance(
usage_obj, ResponseAPIUsage
)
def _infer_call_type( def _infer_call_type(
call_type: Optional[CallTypesLiteral], completion_response: Any call_type: Optional[CallTypesLiteral], completion_response: Any
) -> Optional[CallTypesLiteral]: ) -> Optional[CallTypesLiteral]:
@ -573,9 +585,7 @@ def completion_cost( # noqa: PLR0915
base_model=base_model, base_model=base_model,
) )
verbose_logger.debug( verbose_logger.info(f"selected model name for cost calculation: {model}")
f"completion_response _select_model_name_for_cost_calc: {model}"
)
if completion_response is not None and ( if completion_response is not None and (
isinstance(completion_response, BaseModel) isinstance(completion_response, BaseModel)
@ -587,8 +597,8 @@ def completion_cost( # noqa: PLR0915
) )
else: else:
usage_obj = getattr(completion_response, "usage", {}) usage_obj = getattr(completion_response, "usage", {})
if isinstance(usage_obj, BaseModel) and not isinstance( if isinstance(usage_obj, BaseModel) and not _is_known_usage_objects(
usage_obj, litellm.Usage usage_obj=usage_obj
): ):
setattr( setattr(
completion_response, completion_response,
@ -601,6 +611,14 @@ def completion_cost( # noqa: PLR0915
_usage = usage_obj.model_dump() _usage = usage_obj.model_dump()
else: else:
_usage = usage_obj _usage = usage_obj
if ResponseAPILoggingUtils._is_response_api_usage(_usage):
_usage = (
ResponseAPILoggingUtils._transform_response_api_usage_to_chat_usage(
_usage
).model_dump()
)
# get input/output tokens from completion_response # get input/output tokens from completion_response
prompt_tokens = _usage.get("prompt_tokens", 0) prompt_tokens = _usage.get("prompt_tokens", 0)
completion_tokens = _usage.get("completion_tokens", 0) completion_tokens = _usage.get("completion_tokens", 0)
@ -799,6 +817,7 @@ def response_cost_calculator(
TextCompletionResponse, TextCompletionResponse,
HttpxBinaryResponseContent, HttpxBinaryResponseContent,
RerankResponse, RerankResponse,
ResponsesAPIResponse,
], ],
model: str, model: str,
custom_llm_provider: Optional[str], custom_llm_provider: Optional[str],

View file

@ -118,6 +118,7 @@ class BadRequestError(openai.BadRequestError): # type: ignore
litellm_debug_info: Optional[str] = None, litellm_debug_info: Optional[str] = None,
max_retries: Optional[int] = None, max_retries: Optional[int] = None,
num_retries: Optional[int] = None, num_retries: Optional[int] = None,
body: Optional[dict] = None,
): ):
self.status_code = 400 self.status_code = 400
self.message = "litellm.BadRequestError: {}".format(message) self.message = "litellm.BadRequestError: {}".format(message)
@ -133,7 +134,7 @@ class BadRequestError(openai.BadRequestError): # type: ignore
self.max_retries = max_retries self.max_retries = max_retries
self.num_retries = num_retries self.num_retries = num_retries
super().__init__( super().__init__(
self.message, response=response, body=None self.message, response=response, body=body
) # Call the base class constructor with the parameters it needs ) # Call the base class constructor with the parameters it needs
def __str__(self): def __str__(self):

View file

@ -331,6 +331,7 @@ def exception_type( # type: ignore # noqa: PLR0915
model=model, model=model,
response=getattr(original_exception, "response", None), response=getattr(original_exception, "response", None),
litellm_debug_info=extra_information, litellm_debug_info=extra_information,
body=getattr(original_exception, "body", None),
) )
elif ( elif (
"Web server is returning an unknown error" in error_str "Web server is returning an unknown error" in error_str
@ -421,6 +422,7 @@ def exception_type( # type: ignore # noqa: PLR0915
llm_provider=custom_llm_provider, llm_provider=custom_llm_provider,
response=getattr(original_exception, "response", None), response=getattr(original_exception, "response", None),
litellm_debug_info=extra_information, litellm_debug_info=extra_information,
body=getattr(original_exception, "body", None),
) )
elif original_exception.status_code == 429: elif original_exception.status_code == 429:
exception_mapping_worked = True exception_mapping_worked = True
@ -1960,6 +1962,7 @@ def exception_type( # type: ignore # noqa: PLR0915
model=model, model=model,
litellm_debug_info=extra_information, litellm_debug_info=extra_information,
response=getattr(original_exception, "response", None), response=getattr(original_exception, "response", None),
body=getattr(original_exception, "body", None),
) )
elif ( elif (
"The api_key client option must be set either by passing api_key to the client or by setting" "The api_key client option must be set either by passing api_key to the client or by setting"
@ -1991,6 +1994,7 @@ def exception_type( # type: ignore # noqa: PLR0915
model=model, model=model,
litellm_debug_info=extra_information, litellm_debug_info=extra_information,
response=getattr(original_exception, "response", None), response=getattr(original_exception, "response", None),
body=getattr(original_exception, "body", None),
) )
elif original_exception.status_code == 401: elif original_exception.status_code == 401:
exception_mapping_worked = True exception_mapping_worked = True

View file

@ -39,11 +39,14 @@ from litellm.litellm_core_utils.redact_messages import (
redact_message_input_output_from_custom_logger, redact_message_input_output_from_custom_logger,
redact_message_input_output_from_logging, redact_message_input_output_from_logging,
) )
from litellm.responses.utils import ResponseAPILoggingUtils
from litellm.types.llms.openai import ( from litellm.types.llms.openai import (
AllMessageValues, AllMessageValues,
Batch, Batch,
FineTuningJob, FineTuningJob,
HttpxBinaryResponseContent, HttpxBinaryResponseContent,
ResponseCompletedEvent,
ResponsesAPIResponse,
) )
from litellm.types.rerank import RerankResponse from litellm.types.rerank import RerankResponse
from litellm.types.router import SPECIAL_MODEL_INFO_PARAMS from litellm.types.router import SPECIAL_MODEL_INFO_PARAMS
@ -851,6 +854,8 @@ class Logging(LiteLLMLoggingBaseClass):
RerankResponse, RerankResponse,
Batch, Batch,
FineTuningJob, FineTuningJob,
ResponsesAPIResponse,
ResponseCompletedEvent,
], ],
cache_hit: Optional[bool] = None, cache_hit: Optional[bool] = None,
) -> Optional[float]: ) -> Optional[float]:
@ -1000,7 +1005,7 @@ class Logging(LiteLLMLoggingBaseClass):
standard_logging_object is None standard_logging_object is None
and result is not None and result is not None
and self.stream is not True and self.stream is not True
): # handle streaming separately ):
if ( if (
isinstance(result, ModelResponse) isinstance(result, ModelResponse)
or isinstance(result, ModelResponseStream) or isinstance(result, ModelResponseStream)
@ -1012,6 +1017,7 @@ class Logging(LiteLLMLoggingBaseClass):
or isinstance(result, RerankResponse) or isinstance(result, RerankResponse)
or isinstance(result, FineTuningJob) or isinstance(result, FineTuningJob)
or isinstance(result, LiteLLMBatch) or isinstance(result, LiteLLMBatch)
or isinstance(result, ResponsesAPIResponse)
): ):
## HIDDEN PARAMS ## ## HIDDEN PARAMS ##
hidden_params = getattr(result, "_hidden_params", {}) hidden_params = getattr(result, "_hidden_params", {})
@ -1111,7 +1117,7 @@ class Logging(LiteLLMLoggingBaseClass):
## BUILD COMPLETE STREAMED RESPONSE ## BUILD COMPLETE STREAMED RESPONSE
complete_streaming_response: Optional[ complete_streaming_response: Optional[
Union[ModelResponse, TextCompletionResponse] Union[ModelResponse, TextCompletionResponse, ResponsesAPIResponse]
] = None ] = None
if "complete_streaming_response" in self.model_call_details: if "complete_streaming_response" in self.model_call_details:
return # break out of this. return # break out of this.
@ -1633,7 +1639,7 @@ class Logging(LiteLLMLoggingBaseClass):
if "async_complete_streaming_response" in self.model_call_details: if "async_complete_streaming_response" in self.model_call_details:
return # break out of this. return # break out of this.
complete_streaming_response: Optional[ complete_streaming_response: Optional[
Union[ModelResponse, TextCompletionResponse] Union[ModelResponse, TextCompletionResponse, ResponsesAPIResponse]
] = self._get_assembled_streaming_response( ] = self._get_assembled_streaming_response(
result=result, result=result,
start_time=start_time, start_time=start_time,
@ -2343,16 +2349,24 @@ class Logging(LiteLLMLoggingBaseClass):
def _get_assembled_streaming_response( def _get_assembled_streaming_response(
self, self,
result: Union[ModelResponse, TextCompletionResponse, ModelResponseStream, Any], result: Union[
ModelResponse,
TextCompletionResponse,
ModelResponseStream,
ResponseCompletedEvent,
Any,
],
start_time: datetime.datetime, start_time: datetime.datetime,
end_time: datetime.datetime, end_time: datetime.datetime,
is_async: bool, is_async: bool,
streaming_chunks: List[Any], streaming_chunks: List[Any],
) -> Optional[Union[ModelResponse, TextCompletionResponse]]: ) -> Optional[Union[ModelResponse, TextCompletionResponse, ResponsesAPIResponse]]:
if isinstance(result, ModelResponse): if isinstance(result, ModelResponse):
return result return result
elif isinstance(result, TextCompletionResponse): elif isinstance(result, TextCompletionResponse):
return result return result
elif isinstance(result, ResponseCompletedEvent):
return result.response
elif isinstance(result, ModelResponseStream): elif isinstance(result, ModelResponseStream):
complete_streaming_response: Optional[ complete_streaming_response: Optional[
Union[ModelResponse, TextCompletionResponse] Union[ModelResponse, TextCompletionResponse]
@ -3111,6 +3125,12 @@ class StandardLoggingPayloadSetup:
elif isinstance(usage, Usage): elif isinstance(usage, Usage):
return usage return usage
elif isinstance(usage, dict): elif isinstance(usage, dict):
if ResponseAPILoggingUtils._is_response_api_usage(usage):
return (
ResponseAPILoggingUtils._transform_response_api_usage_to_chat_usage(
usage
)
)
return Usage(**usage) return Usage(**usage)
raise ValueError(f"usage is required, got={usage} of type {type(usage)}") raise ValueError(f"usage is required, got={usage} of type {type(usage)}")

View file

@ -1,4 +1,4 @@
from typing import Coroutine, Iterable, Literal, Optional, Union from typing import Any, Coroutine, Dict, Iterable, Literal, Optional, Union
import httpx import httpx
from openai import AsyncAzureOpenAI, AzureOpenAI from openai import AsyncAzureOpenAI, AzureOpenAI
@ -649,7 +649,7 @@ class AzureAssistantsAPI(BaseAzureLLM):
assistant_id: str, assistant_id: str,
additional_instructions: Optional[str], additional_instructions: Optional[str],
instructions: Optional[str], instructions: Optional[str],
metadata: Optional[object], metadata: Optional[Dict],
model: Optional[str], model: Optional[str],
stream: Optional[bool], stream: Optional[bool],
tools: Optional[Iterable[AssistantToolParam]], tools: Optional[Iterable[AssistantToolParam]],
@ -692,13 +692,13 @@ class AzureAssistantsAPI(BaseAzureLLM):
assistant_id: str, assistant_id: str,
additional_instructions: Optional[str], additional_instructions: Optional[str],
instructions: Optional[str], instructions: Optional[str],
metadata: Optional[object], metadata: Optional[Dict],
model: Optional[str], model: Optional[str],
tools: Optional[Iterable[AssistantToolParam]], tools: Optional[Iterable[AssistantToolParam]],
event_handler: Optional[AssistantEventHandler], event_handler: Optional[AssistantEventHandler],
litellm_params: Optional[dict] = None, litellm_params: Optional[dict] = None,
) -> AsyncAssistantStreamManager[AsyncAssistantEventHandler]: ) -> AsyncAssistantStreamManager[AsyncAssistantEventHandler]:
data = { data: Dict[str, Any] = {
"thread_id": thread_id, "thread_id": thread_id,
"assistant_id": assistant_id, "assistant_id": assistant_id,
"additional_instructions": additional_instructions, "additional_instructions": additional_instructions,
@ -718,13 +718,13 @@ class AzureAssistantsAPI(BaseAzureLLM):
assistant_id: str, assistant_id: str,
additional_instructions: Optional[str], additional_instructions: Optional[str],
instructions: Optional[str], instructions: Optional[str],
metadata: Optional[object], metadata: Optional[Dict],
model: Optional[str], model: Optional[str],
tools: Optional[Iterable[AssistantToolParam]], tools: Optional[Iterable[AssistantToolParam]],
event_handler: Optional[AssistantEventHandler], event_handler: Optional[AssistantEventHandler],
litellm_params: Optional[dict] = None, litellm_params: Optional[dict] = None,
) -> AssistantStreamManager[AssistantEventHandler]: ) -> AssistantStreamManager[AssistantEventHandler]:
data = { data: Dict[str, Any] = {
"thread_id": thread_id, "thread_id": thread_id,
"assistant_id": assistant_id, "assistant_id": assistant_id,
"additional_instructions": additional_instructions, "additional_instructions": additional_instructions,
@ -746,7 +746,7 @@ class AzureAssistantsAPI(BaseAzureLLM):
assistant_id: str, assistant_id: str,
additional_instructions: Optional[str], additional_instructions: Optional[str],
instructions: Optional[str], instructions: Optional[str],
metadata: Optional[object], metadata: Optional[Dict],
model: Optional[str], model: Optional[str],
stream: Optional[bool], stream: Optional[bool],
tools: Optional[Iterable[AssistantToolParam]], tools: Optional[Iterable[AssistantToolParam]],
@ -768,7 +768,7 @@ class AzureAssistantsAPI(BaseAzureLLM):
assistant_id: str, assistant_id: str,
additional_instructions: Optional[str], additional_instructions: Optional[str],
instructions: Optional[str], instructions: Optional[str],
metadata: Optional[object], metadata: Optional[Dict],
model: Optional[str], model: Optional[str],
stream: Optional[bool], stream: Optional[bool],
tools: Optional[Iterable[AssistantToolParam]], tools: Optional[Iterable[AssistantToolParam]],
@ -791,7 +791,7 @@ class AzureAssistantsAPI(BaseAzureLLM):
assistant_id: str, assistant_id: str,
additional_instructions: Optional[str], additional_instructions: Optional[str],
instructions: Optional[str], instructions: Optional[str],
metadata: Optional[object], metadata: Optional[Dict],
model: Optional[str], model: Optional[str],
stream: Optional[bool], stream: Optional[bool],
tools: Optional[Iterable[AssistantToolParam]], tools: Optional[Iterable[AssistantToolParam]],

View file

@ -7,7 +7,11 @@ from pydantic import BaseModel
import litellm import litellm
from litellm.litellm_core_utils.audio_utils.utils import get_audio_file_name from litellm.litellm_core_utils.audio_utils.utils import get_audio_file_name
from litellm.types.utils import FileTypes from litellm.types.utils import FileTypes
from litellm.utils import TranscriptionResponse, convert_to_model_response_object from litellm.utils import (
TranscriptionResponse,
convert_to_model_response_object,
extract_duration_from_srt_or_vtt,
)
from .azure import AzureChatCompletion from .azure import AzureChatCompletion
@ -140,6 +144,8 @@ class AzureAudioTranscription(AzureChatCompletion):
stringified_response = response.model_dump() stringified_response = response.model_dump()
else: else:
stringified_response = TranscriptionResponse(text=response).model_dump() stringified_response = TranscriptionResponse(text=response).model_dump()
duration = extract_duration_from_srt_or_vtt(response)
stringified_response["duration"] = duration
## LOGGING ## LOGGING
logging_obj.post_call( logging_obj.post_call(

View file

@ -430,10 +430,14 @@ class AzureChatCompletion(BaseAzureLLM, BaseLLM):
status_code = getattr(e, "status_code", 500) status_code = getattr(e, "status_code", 500)
error_headers = getattr(e, "headers", None) error_headers = getattr(e, "headers", None)
error_response = getattr(e, "response", None) error_response = getattr(e, "response", None)
error_body = getattr(e, "body", None)
if error_headers is None and error_response: if error_headers is None and error_response:
error_headers = getattr(error_response, "headers", None) error_headers = getattr(error_response, "headers", None)
raise AzureOpenAIError( raise AzureOpenAIError(
status_code=status_code, message=str(e), headers=error_headers status_code=status_code,
message=str(e),
headers=error_headers,
body=error_body,
) )
async def acompletion( async def acompletion(
@ -519,6 +523,7 @@ class AzureChatCompletion(BaseAzureLLM, BaseLLM):
raise AzureOpenAIError(status_code=500, message=str(e)) raise AzureOpenAIError(status_code=500, message=str(e))
except Exception as e: except Exception as e:
message = getattr(e, "message", str(e)) message = getattr(e, "message", str(e))
body = getattr(e, "body", None)
## LOGGING ## LOGGING
logging_obj.post_call( logging_obj.post_call(
input=data["messages"], input=data["messages"],
@ -529,7 +534,7 @@ class AzureChatCompletion(BaseAzureLLM, BaseLLM):
if hasattr(e, "status_code"): if hasattr(e, "status_code"):
raise e raise e
else: else:
raise AzureOpenAIError(status_code=500, message=message) raise AzureOpenAIError(status_code=500, message=message, body=body)
def streaming( def streaming(
self, self,
@ -656,10 +661,14 @@ class AzureChatCompletion(BaseAzureLLM, BaseLLM):
error_headers = getattr(e, "headers", None) error_headers = getattr(e, "headers", None)
error_response = getattr(e, "response", None) error_response = getattr(e, "response", None)
message = getattr(e, "message", str(e)) message = getattr(e, "message", str(e))
error_body = getattr(e, "body", None)
if error_headers is None and error_response: if error_headers is None and error_response:
error_headers = getattr(error_response, "headers", None) error_headers = getattr(error_response, "headers", None)
raise AzureOpenAIError( raise AzureOpenAIError(
status_code=status_code, message=message, headers=error_headers status_code=status_code,
message=message,
headers=error_headers,
body=error_body,
) )
async def aembedding( async def aembedding(

View file

@ -25,6 +25,7 @@ class AzureOpenAIError(BaseLLMException):
request: Optional[httpx.Request] = None, request: Optional[httpx.Request] = None,
response: Optional[httpx.Response] = None, response: Optional[httpx.Response] = None,
headers: Optional[Union[httpx.Headers, dict]] = None, headers: Optional[Union[httpx.Headers, dict]] = None,
body: Optional[dict] = None,
): ):
super().__init__( super().__init__(
status_code=status_code, status_code=status_code,
@ -32,6 +33,7 @@ class AzureOpenAIError(BaseLLMException):
request=request, request=request,
response=response, response=response,
headers=headers, headers=headers,
body=body,
) )

View file

@ -16,10 +16,23 @@ from litellm.llms.openai.openai import OpenAIConfig
from litellm.secret_managers.main import get_secret_str from litellm.secret_managers.main import get_secret_str
from litellm.types.llms.openai import AllMessageValues from litellm.types.llms.openai import AllMessageValues
from litellm.types.utils import ModelResponse, ProviderField from litellm.types.utils import ModelResponse, ProviderField
from litellm.utils import _add_path_to_api_base from litellm.utils import _add_path_to_api_base, supports_tool_choice
class AzureAIStudioConfig(OpenAIConfig): class AzureAIStudioConfig(OpenAIConfig):
def get_supported_openai_params(self, model: str) -> List:
model_supports_tool_choice = True # azure ai supports this by default
if not supports_tool_choice(model=f"azure_ai/{model}"):
model_supports_tool_choice = False
supported_params = super().get_supported_openai_params(model)
if not model_supports_tool_choice:
filtered_supported_params = []
for param in supported_params:
if param != "tool_choice":
filtered_supported_params.append(param)
return filtered_supported_params
return supported_params
def validate_environment( def validate_environment(
self, self,
headers: dict, headers: dict,

View file

@ -51,6 +51,7 @@ class BaseLLMException(Exception):
headers: Optional[Union[dict, httpx.Headers]] = None, headers: Optional[Union[dict, httpx.Headers]] = None,
request: Optional[httpx.Request] = None, request: Optional[httpx.Request] = None,
response: Optional[httpx.Response] = None, response: Optional[httpx.Response] = None,
body: Optional[dict] = None,
): ):
self.status_code = status_code self.status_code = status_code
self.message: str = message self.message: str = message
@ -67,6 +68,7 @@ class BaseLLMException(Exception):
self.response = httpx.Response( self.response = httpx.Response(
status_code=status_code, request=self.request status_code=status_code, request=self.request
) )
self.body = body
super().__init__( super().__init__(
self.message self.message
) # Call the base class constructor with the parameters it needs ) # Call the base class constructor with the parameters it needs

View file

@ -0,0 +1,133 @@
import types
from abc import ABC, abstractmethod
from typing import TYPE_CHECKING, Any, Dict, Optional, Union
import httpx
from litellm.types.llms.openai import (
ResponseInputParam,
ResponsesAPIOptionalRequestParams,
ResponsesAPIRequestParams,
ResponsesAPIResponse,
ResponsesAPIStreamingResponse,
)
from litellm.types.router import GenericLiteLLMParams
if TYPE_CHECKING:
from litellm.litellm_core_utils.litellm_logging import Logging as _LiteLLMLoggingObj
from ..chat.transformation import BaseLLMException as _BaseLLMException
LiteLLMLoggingObj = _LiteLLMLoggingObj
BaseLLMException = _BaseLLMException
else:
LiteLLMLoggingObj = Any
BaseLLMException = Any
class BaseResponsesAPIConfig(ABC):
def __init__(self):
pass
@classmethod
def get_config(cls):
return {
k: v
for k, v in cls.__dict__.items()
if not k.startswith("__")
and not k.startswith("_abc")
and not isinstance(
v,
(
types.FunctionType,
types.BuiltinFunctionType,
classmethod,
staticmethod,
),
)
and v is not None
}
@abstractmethod
def get_supported_openai_params(self, model: str) -> list:
pass
@abstractmethod
def map_openai_params(
self,
response_api_optional_params: ResponsesAPIOptionalRequestParams,
model: str,
drop_params: bool,
) -> Dict:
pass
@abstractmethod
def validate_environment(
self,
headers: dict,
model: str,
api_key: Optional[str] = None,
) -> dict:
return {}
@abstractmethod
def get_complete_url(
self,
api_base: Optional[str],
model: str,
stream: Optional[bool] = None,
) -> str:
"""
OPTIONAL
Get the complete url for the request
Some providers need `model` in `api_base`
"""
if api_base is None:
raise ValueError("api_base is required")
return api_base
@abstractmethod
def transform_responses_api_request(
self,
model: str,
input: Union[str, ResponseInputParam],
response_api_optional_request_params: Dict,
litellm_params: GenericLiteLLMParams,
headers: dict,
) -> ResponsesAPIRequestParams:
pass
@abstractmethod
def transform_response_api_response(
self,
model: str,
raw_response: httpx.Response,
logging_obj: LiteLLMLoggingObj,
) -> ResponsesAPIResponse:
pass
@abstractmethod
def transform_streaming_response(
self,
model: str,
parsed_chunk: dict,
logging_obj: LiteLLMLoggingObj,
) -> ResponsesAPIStreamingResponse:
"""
Transform a parsed streaming response chunk into a ResponsesAPIStreamingResponse
"""
pass
def get_error_class(
self, error_message: str, status_code: int, headers: Union[dict, httpx.Headers]
) -> BaseLLMException:
from ..chat.transformation import BaseLLMException
raise BaseLLMException(
status_code=status_code,
message=error_message,
headers=headers,
)

View file

@ -1231,7 +1231,9 @@ class AWSEventStreamDecoder:
if len(self.content_blocks) == 0: if len(self.content_blocks) == 0:
return False return False
if "text" in self.content_blocks[0]: if (
"toolUse" not in self.content_blocks[0]
): # be explicit - only do this if tool use block, as this is to prevent json decoding errors
return False return False
for block in self.content_blocks: for block in self.content_blocks:

View file

@ -129,7 +129,6 @@ class AmazonInvokeConfig(BaseConfig, BaseAWSLLM):
## CREDENTIALS ## ## CREDENTIALS ##
# pop aws_secret_access_key, aws_access_key_id, aws_session_token, aws_region_name from kwargs, since completion calls fail with them # pop aws_secret_access_key, aws_access_key_id, aws_session_token, aws_region_name from kwargs, since completion calls fail with them
extra_headers = optional_params.get("extra_headers", None)
aws_secret_access_key = optional_params.get("aws_secret_access_key", None) aws_secret_access_key = optional_params.get("aws_secret_access_key", None)
aws_access_key_id = optional_params.get("aws_access_key_id", None) aws_access_key_id = optional_params.get("aws_access_key_id", None)
aws_session_token = optional_params.get("aws_session_token", None) aws_session_token = optional_params.get("aws_session_token", None)
@ -155,9 +154,10 @@ class AmazonInvokeConfig(BaseConfig, BaseAWSLLM):
) )
sigv4 = SigV4Auth(credentials, "bedrock", aws_region_name) sigv4 = SigV4Auth(credentials, "bedrock", aws_region_name)
if headers is not None:
headers = {"Content-Type": "application/json", **headers}
else:
headers = {"Content-Type": "application/json"} headers = {"Content-Type": "application/json"}
if extra_headers is not None:
headers = {"Content-Type": "application/json", **extra_headers}
request = AWSRequest( request = AWSRequest(
method="POST", method="POST",
@ -166,12 +166,13 @@ class AmazonInvokeConfig(BaseConfig, BaseAWSLLM):
headers=headers, headers=headers,
) )
sigv4.add_auth(request) sigv4.add_auth(request)
if (
extra_headers is not None and "Authorization" in extra_headers
): # prevent sigv4 from overwriting the auth header
request.headers["Authorization"] = extra_headers["Authorization"]
return dict(request.headers) request_headers_dict = dict(request.headers)
if (
headers is not None and "Authorization" in headers
): # prevent sigv4 from overwriting the auth header
request_headers_dict["Authorization"] = headers["Authorization"]
return request_headers_dict
def transform_request( def transform_request(
self, self,
@ -443,7 +444,7 @@ class AmazonInvokeConfig(BaseConfig, BaseAWSLLM):
api_key: Optional[str] = None, api_key: Optional[str] = None,
api_base: Optional[str] = None, api_base: Optional[str] = None,
) -> dict: ) -> dict:
return {} return headers
def get_error_class( def get_error_class(
self, error_message: str, status_code: int, headers: Union[dict, httpx.Headers] self, error_message: str, status_code: int, headers: Union[dict, httpx.Headers]

View file

@ -0,0 +1,106 @@
import types
from typing import List, Optional
from openai.types.image import Image
from litellm.types.llms.bedrock import (
AmazonNovaCanvasTextToImageRequest, AmazonNovaCanvasTextToImageResponse,
AmazonNovaCanvasTextToImageParams, AmazonNovaCanvasRequestBase,
)
from litellm.types.utils import ImageResponse
class AmazonNovaCanvasConfig:
"""
Reference: https://us-east-1.console.aws.amazon.com/bedrock/home?region=us-east-1#/model-catalog/serverless/amazon.nova-canvas-v1:0
"""
@classmethod
def get_config(cls):
return {
k: v
for k, v in cls.__dict__.items()
if not k.startswith("__")
and not isinstance(
v,
(
types.FunctionType,
types.BuiltinFunctionType,
classmethod,
staticmethod,
),
)
and v is not None
}
@classmethod
def get_supported_openai_params(cls, model: Optional[str] = None) -> List:
"""
"""
return ["n", "size", "quality"]
@classmethod
def _is_nova_model(cls, model: Optional[str] = None) -> bool:
"""
Returns True if the model is a Nova Canvas model
Nova models follow this pattern:
"""
if model:
if "amazon.nova-canvas" in model:
return True
return False
@classmethod
def transform_request_body(
cls, text: str, optional_params: dict
) -> AmazonNovaCanvasRequestBase:
"""
Transform the request body for Amazon Nova Canvas model
"""
task_type = optional_params.pop("taskType", "TEXT_IMAGE")
image_generation_config = optional_params.pop("imageGenerationConfig", {})
image_generation_config = {**image_generation_config, **optional_params}
if task_type == "TEXT_IMAGE":
text_to_image_params = image_generation_config.pop("textToImageParams", {})
text_to_image_params = {"text" :text, **text_to_image_params}
text_to_image_params = AmazonNovaCanvasTextToImageParams(**text_to_image_params)
return AmazonNovaCanvasTextToImageRequest(textToImageParams=text_to_image_params, taskType=task_type,
imageGenerationConfig=image_generation_config)
raise NotImplementedError(f"Task type {task_type} is not supported")
@classmethod
def map_openai_params(cls, non_default_params: dict, optional_params: dict) -> dict:
"""
Map the OpenAI params to the Bedrock params
"""
_size = non_default_params.get("size")
if _size is not None:
width, height = _size.split("x")
optional_params["width"], optional_params["height"] = int(width), int(height)
if non_default_params.get("n") is not None:
optional_params["numberOfImages"] = non_default_params.get("n")
if non_default_params.get("quality") is not None:
if non_default_params.get("quality") in ("hd", "premium"):
optional_params["quality"] = "premium"
if non_default_params.get("quality") == "standard":
optional_params["quality"] = "standard"
return optional_params
@classmethod
def transform_response_dict_to_openai_response(
cls, model_response: ImageResponse, response_dict: dict
) -> ImageResponse:
"""
Transform the response dict to the OpenAI response
"""
nova_response = AmazonNovaCanvasTextToImageResponse(**response_dict)
openai_images: List[Image] = []
for _img in nova_response.get("images", []):
openai_images.append(Image(b64_json=_img))
model_response.data = openai_images
return model_response

View file

@ -266,6 +266,8 @@ class BedrockImageGeneration(BaseAWSLLM):
"text_prompts": [{"text": prompt, "weight": 1}], "text_prompts": [{"text": prompt, "weight": 1}],
**inference_params, **inference_params,
} }
elif provider == "amazon":
return dict(litellm.AmazonNovaCanvasConfig.transform_request_body(text=prompt, optional_params=optional_params))
else: else:
raise BedrockError( raise BedrockError(
status_code=422, message=f"Unsupported model={model}, passed in" status_code=422, message=f"Unsupported model={model}, passed in"
@ -301,6 +303,7 @@ class BedrockImageGeneration(BaseAWSLLM):
config_class = ( config_class = (
litellm.AmazonStability3Config litellm.AmazonStability3Config
if litellm.AmazonStability3Config._is_stability_3_model(model=model) if litellm.AmazonStability3Config._is_stability_3_model(model=model)
else litellm.AmazonNovaCanvasConfig if litellm.AmazonNovaCanvasConfig._is_nova_model(model=model)
else litellm.AmazonStabilityConfig else litellm.AmazonStabilityConfig
) )
config_class.transform_response_dict_to_openai_response( config_class.transform_response_dict_to_openai_response(

View file

@ -1,6 +1,6 @@
import io import io
import json import json
from typing import TYPE_CHECKING, Any, Optional, Tuple, Union from typing import TYPE_CHECKING, Any, Coroutine, Dict, Optional, Tuple, Union
import httpx # type: ignore import httpx # type: ignore
@ -11,13 +11,21 @@ import litellm.types.utils
from litellm.llms.base_llm.chat.transformation import BaseConfig from litellm.llms.base_llm.chat.transformation import BaseConfig
from litellm.llms.base_llm.embedding.transformation import BaseEmbeddingConfig from litellm.llms.base_llm.embedding.transformation import BaseEmbeddingConfig
from litellm.llms.base_llm.rerank.transformation import BaseRerankConfig from litellm.llms.base_llm.rerank.transformation import BaseRerankConfig
from litellm.llms.base_llm.responses.transformation import BaseResponsesAPIConfig
from litellm.llms.custom_httpx.http_handler import ( from litellm.llms.custom_httpx.http_handler import (
AsyncHTTPHandler, AsyncHTTPHandler,
HTTPHandler, HTTPHandler,
_get_httpx_client, _get_httpx_client,
get_async_httpx_client, get_async_httpx_client,
) )
from litellm.responses.streaming_iterator import (
BaseResponsesAPIStreamingIterator,
ResponsesAPIStreamingIterator,
SyncResponsesAPIStreamingIterator,
)
from litellm.types.llms.openai import ResponseInputParam, ResponsesAPIResponse
from litellm.types.rerank import OptionalRerankParams, RerankResponse from litellm.types.rerank import OptionalRerankParams, RerankResponse
from litellm.types.router import GenericLiteLLMParams
from litellm.types.utils import EmbeddingResponse, FileTypes, TranscriptionResponse from litellm.types.utils import EmbeddingResponse, FileTypes, TranscriptionResponse
from litellm.utils import CustomStreamWrapper, ModelResponse, ProviderConfigManager from litellm.utils import CustomStreamWrapper, ModelResponse, ProviderConfigManager
@ -873,7 +881,9 @@ class BaseLLMHTTPHandler:
elif isinstance(audio_file, bytes): elif isinstance(audio_file, bytes):
# Assume it's already binary data # Assume it's already binary data
binary_data = audio_file binary_data = audio_file
elif isinstance(audio_file, io.BufferedReader) or isinstance(audio_file, io.BytesIO): elif isinstance(audio_file, io.BufferedReader) or isinstance(
audio_file, io.BytesIO
):
# Handle file-like objects # Handle file-like objects
binary_data = audio_file.read() binary_data = audio_file.read()
@ -950,8 +960,235 @@ class BaseLLMHTTPHandler:
return returned_response return returned_response
return model_response return model_response
def response_api_handler(
self,
model: str,
input: Union[str, ResponseInputParam],
responses_api_provider_config: BaseResponsesAPIConfig,
response_api_optional_request_params: Dict,
custom_llm_provider: str,
litellm_params: GenericLiteLLMParams,
logging_obj: LiteLLMLoggingObj,
extra_headers: Optional[Dict[str, Any]] = None,
extra_body: Optional[Dict[str, Any]] = None,
timeout: Optional[Union[float, httpx.Timeout]] = None,
client: Optional[Union[HTTPHandler, AsyncHTTPHandler]] = None,
_is_async: bool = False,
) -> Union[
ResponsesAPIResponse,
BaseResponsesAPIStreamingIterator,
Coroutine[
Any, Any, Union[ResponsesAPIResponse, BaseResponsesAPIStreamingIterator]
],
]:
"""
Handles responses API requests.
When _is_async=True, returns a coroutine instead of making the call directly.
"""
if _is_async:
# Return the async coroutine if called with _is_async=True
return self.async_response_api_handler(
model=model,
input=input,
responses_api_provider_config=responses_api_provider_config,
response_api_optional_request_params=response_api_optional_request_params,
custom_llm_provider=custom_llm_provider,
litellm_params=litellm_params,
logging_obj=logging_obj,
extra_headers=extra_headers,
extra_body=extra_body,
timeout=timeout,
client=client if isinstance(client, AsyncHTTPHandler) else None,
)
if client is None or not isinstance(client, HTTPHandler):
sync_httpx_client = _get_httpx_client(
params={"ssl_verify": litellm_params.get("ssl_verify", None)}
)
else:
sync_httpx_client = client
headers = responses_api_provider_config.validate_environment(
api_key=litellm_params.api_key,
headers=response_api_optional_request_params.get("extra_headers", {}) or {},
model=model,
)
if extra_headers:
headers.update(extra_headers)
api_base = responses_api_provider_config.get_complete_url(
api_base=litellm_params.api_base,
model=model,
)
data = responses_api_provider_config.transform_responses_api_request(
model=model,
input=input,
response_api_optional_request_params=response_api_optional_request_params,
litellm_params=litellm_params,
headers=headers,
)
## LOGGING
logging_obj.pre_call(
input=input,
api_key="",
additional_args={
"complete_input_dict": data,
"api_base": api_base,
"headers": headers,
},
)
# Check if streaming is requested
stream = response_api_optional_request_params.get("stream", False)
try:
if stream:
# For streaming, use stream=True in the request
response = sync_httpx_client.post(
url=api_base,
headers=headers,
data=json.dumps(data),
timeout=timeout
or response_api_optional_request_params.get("timeout"),
stream=True,
)
return SyncResponsesAPIStreamingIterator(
response=response,
model=model,
logging_obj=logging_obj,
responses_api_provider_config=responses_api_provider_config,
)
else:
# For non-streaming requests
response = sync_httpx_client.post(
url=api_base,
headers=headers,
data=json.dumps(data),
timeout=timeout
or response_api_optional_request_params.get("timeout"),
)
except Exception as e:
raise self._handle_error(
e=e,
provider_config=responses_api_provider_config,
)
return responses_api_provider_config.transform_response_api_response(
model=model,
raw_response=response,
logging_obj=logging_obj,
)
async def async_response_api_handler(
self,
model: str,
input: Union[str, ResponseInputParam],
responses_api_provider_config: BaseResponsesAPIConfig,
response_api_optional_request_params: Dict,
custom_llm_provider: str,
litellm_params: GenericLiteLLMParams,
logging_obj: LiteLLMLoggingObj,
extra_headers: Optional[Dict[str, Any]] = None,
extra_body: Optional[Dict[str, Any]] = None,
timeout: Optional[Union[float, httpx.Timeout]] = None,
client: Optional[Union[HTTPHandler, AsyncHTTPHandler]] = None,
) -> Union[ResponsesAPIResponse, BaseResponsesAPIStreamingIterator]:
"""
Async version of the responses API handler.
Uses async HTTP client to make requests.
"""
if client is None or not isinstance(client, AsyncHTTPHandler):
async_httpx_client = get_async_httpx_client(
llm_provider=litellm.LlmProviders(custom_llm_provider),
params={"ssl_verify": litellm_params.get("ssl_verify", None)},
)
else:
async_httpx_client = client
headers = responses_api_provider_config.validate_environment(
api_key=litellm_params.api_key,
headers=response_api_optional_request_params.get("extra_headers", {}) or {},
model=model,
)
if extra_headers:
headers.update(extra_headers)
api_base = responses_api_provider_config.get_complete_url(
api_base=litellm_params.api_base,
model=model,
)
data = responses_api_provider_config.transform_responses_api_request(
model=model,
input=input,
response_api_optional_request_params=response_api_optional_request_params,
litellm_params=litellm_params,
headers=headers,
)
## LOGGING
logging_obj.pre_call(
input=input,
api_key="",
additional_args={
"complete_input_dict": data,
"api_base": api_base,
"headers": headers,
},
)
# Check if streaming is requested
stream = response_api_optional_request_params.get("stream", False)
try:
if stream:
# For streaming, we need to use stream=True in the request
response = await async_httpx_client.post(
url=api_base,
headers=headers,
data=json.dumps(data),
timeout=timeout
or response_api_optional_request_params.get("timeout"),
stream=True,
)
# Return the streaming iterator
return ResponsesAPIStreamingIterator(
response=response,
model=model,
logging_obj=logging_obj,
responses_api_provider_config=responses_api_provider_config,
)
else:
# For non-streaming, proceed as before
response = await async_httpx_client.post(
url=api_base,
headers=headers,
data=json.dumps(data),
timeout=timeout
or response_api_optional_request_params.get("timeout"),
)
except Exception as e:
raise self._handle_error(
e=e,
provider_config=responses_api_provider_config,
)
return responses_api_provider_config.transform_response_api_response(
model=model,
raw_response=response,
logging_obj=logging_obj,
)
def _handle_error( def _handle_error(
self, e: Exception, provider_config: Union[BaseConfig, BaseRerankConfig] self,
e: Exception,
provider_config: Union[BaseConfig, BaseRerankConfig, BaseResponsesAPIConfig],
): ):
status_code = getattr(e, "status_code", 500) status_code = getattr(e, "status_code", 500)
error_headers = getattr(e, "headers", None) error_headers = getattr(e, "headers", None)

View file

@ -19,6 +19,7 @@ class OpenAIError(BaseLLMException):
request: Optional[httpx.Request] = None, request: Optional[httpx.Request] = None,
response: Optional[httpx.Response] = None, response: Optional[httpx.Response] = None,
headers: Optional[Union[dict, httpx.Headers]] = None, headers: Optional[Union[dict, httpx.Headers]] = None,
body: Optional[dict] = None,
): ):
self.status_code = status_code self.status_code = status_code
self.message = message self.message = message
@ -39,6 +40,7 @@ class OpenAIError(BaseLLMException):
headers=self.headers, headers=self.headers,
request=self.request, request=self.request,
response=self.response, response=self.response,
body=body,
) )

View file

@ -732,10 +732,14 @@ class OpenAIChatCompletion(BaseLLM):
error_headers = getattr(e, "headers", None) error_headers = getattr(e, "headers", None)
error_text = getattr(e, "text", str(e)) error_text = getattr(e, "text", str(e))
error_response = getattr(e, "response", None) error_response = getattr(e, "response", None)
error_body = getattr(e, "body", None)
if error_headers is None and error_response: if error_headers is None and error_response:
error_headers = getattr(error_response, "headers", None) error_headers = getattr(error_response, "headers", None)
raise OpenAIError( raise OpenAIError(
status_code=status_code, message=error_text, headers=error_headers status_code=status_code,
message=error_text,
headers=error_headers,
body=error_body,
) )
async def acompletion( async def acompletion(
@ -828,13 +832,17 @@ class OpenAIChatCompletion(BaseLLM):
except Exception as e: except Exception as e:
exception_response = getattr(e, "response", None) exception_response = getattr(e, "response", None)
status_code = getattr(e, "status_code", 500) status_code = getattr(e, "status_code", 500)
exception_body = getattr(e, "body", None)
error_headers = getattr(e, "headers", None) error_headers = getattr(e, "headers", None)
if error_headers is None and exception_response: if error_headers is None and exception_response:
error_headers = getattr(exception_response, "headers", None) error_headers = getattr(exception_response, "headers", None)
message = getattr(e, "message", str(e)) message = getattr(e, "message", str(e))
raise OpenAIError( raise OpenAIError(
status_code=status_code, message=message, headers=error_headers status_code=status_code,
message=message,
headers=error_headers,
body=exception_body,
) )
def streaming( def streaming(
@ -973,6 +981,7 @@ class OpenAIChatCompletion(BaseLLM):
error_headers = getattr(e, "headers", None) error_headers = getattr(e, "headers", None)
status_code = getattr(e, "status_code", 500) status_code = getattr(e, "status_code", 500)
error_response = getattr(e, "response", None) error_response = getattr(e, "response", None)
exception_body = getattr(e, "body", None)
if error_headers is None and error_response: if error_headers is None and error_response:
error_headers = getattr(error_response, "headers", None) error_headers = getattr(error_response, "headers", None)
if response is not None and hasattr(response, "text"): if response is not None and hasattr(response, "text"):
@ -980,6 +989,7 @@ class OpenAIChatCompletion(BaseLLM):
status_code=status_code, status_code=status_code,
message=f"{str(e)}\n\nOriginal Response: {response.text}", # type: ignore message=f"{str(e)}\n\nOriginal Response: {response.text}", # type: ignore
headers=error_headers, headers=error_headers,
body=exception_body,
) )
else: else:
if type(e).__name__ == "ReadTimeout": if type(e).__name__ == "ReadTimeout":
@ -987,16 +997,21 @@ class OpenAIChatCompletion(BaseLLM):
status_code=408, status_code=408,
message=f"{type(e).__name__}", message=f"{type(e).__name__}",
headers=error_headers, headers=error_headers,
body=exception_body,
) )
elif hasattr(e, "status_code"): elif hasattr(e, "status_code"):
raise OpenAIError( raise OpenAIError(
status_code=getattr(e, "status_code", 500), status_code=getattr(e, "status_code", 500),
message=str(e), message=str(e),
headers=error_headers, headers=error_headers,
body=exception_body,
) )
else: else:
raise OpenAIError( raise OpenAIError(
status_code=500, message=f"{str(e)}", headers=error_headers status_code=500,
message=f"{str(e)}",
headers=error_headers,
body=exception_body,
) )
def get_stream_options( def get_stream_options(
@ -2635,7 +2650,7 @@ class OpenAIAssistantsAPI(BaseLLM):
assistant_id: str, assistant_id: str,
additional_instructions: Optional[str], additional_instructions: Optional[str],
instructions: Optional[str], instructions: Optional[str],
metadata: Optional[object], metadata: Optional[Dict],
model: Optional[str], model: Optional[str],
stream: Optional[bool], stream: Optional[bool],
tools: Optional[Iterable[AssistantToolParam]], tools: Optional[Iterable[AssistantToolParam]],
@ -2674,12 +2689,12 @@ class OpenAIAssistantsAPI(BaseLLM):
assistant_id: str, assistant_id: str,
additional_instructions: Optional[str], additional_instructions: Optional[str],
instructions: Optional[str], instructions: Optional[str],
metadata: Optional[object], metadata: Optional[Dict],
model: Optional[str], model: Optional[str],
tools: Optional[Iterable[AssistantToolParam]], tools: Optional[Iterable[AssistantToolParam]],
event_handler: Optional[AssistantEventHandler], event_handler: Optional[AssistantEventHandler],
) -> AsyncAssistantStreamManager[AsyncAssistantEventHandler]: ) -> AsyncAssistantStreamManager[AsyncAssistantEventHandler]:
data = { data: Dict[str, Any] = {
"thread_id": thread_id, "thread_id": thread_id,
"assistant_id": assistant_id, "assistant_id": assistant_id,
"additional_instructions": additional_instructions, "additional_instructions": additional_instructions,
@ -2699,12 +2714,12 @@ class OpenAIAssistantsAPI(BaseLLM):
assistant_id: str, assistant_id: str,
additional_instructions: Optional[str], additional_instructions: Optional[str],
instructions: Optional[str], instructions: Optional[str],
metadata: Optional[object], metadata: Optional[Dict],
model: Optional[str], model: Optional[str],
tools: Optional[Iterable[AssistantToolParam]], tools: Optional[Iterable[AssistantToolParam]],
event_handler: Optional[AssistantEventHandler], event_handler: Optional[AssistantEventHandler],
) -> AssistantStreamManager[AssistantEventHandler]: ) -> AssistantStreamManager[AssistantEventHandler]:
data = { data: Dict[str, Any] = {
"thread_id": thread_id, "thread_id": thread_id,
"assistant_id": assistant_id, "assistant_id": assistant_id,
"additional_instructions": additional_instructions, "additional_instructions": additional_instructions,
@ -2726,7 +2741,7 @@ class OpenAIAssistantsAPI(BaseLLM):
assistant_id: str, assistant_id: str,
additional_instructions: Optional[str], additional_instructions: Optional[str],
instructions: Optional[str], instructions: Optional[str],
metadata: Optional[object], metadata: Optional[Dict],
model: Optional[str], model: Optional[str],
stream: Optional[bool], stream: Optional[bool],
tools: Optional[Iterable[AssistantToolParam]], tools: Optional[Iterable[AssistantToolParam]],
@ -2748,7 +2763,7 @@ class OpenAIAssistantsAPI(BaseLLM):
assistant_id: str, assistant_id: str,
additional_instructions: Optional[str], additional_instructions: Optional[str],
instructions: Optional[str], instructions: Optional[str],
metadata: Optional[object], metadata: Optional[Dict],
model: Optional[str], model: Optional[str],
stream: Optional[bool], stream: Optional[bool],
tools: Optional[Iterable[AssistantToolParam]], tools: Optional[Iterable[AssistantToolParam]],
@ -2771,7 +2786,7 @@ class OpenAIAssistantsAPI(BaseLLM):
assistant_id: str, assistant_id: str,
additional_instructions: Optional[str], additional_instructions: Optional[str],
instructions: Optional[str], instructions: Optional[str],
metadata: Optional[object], metadata: Optional[Dict],
model: Optional[str], model: Optional[str],
stream: Optional[bool], stream: Optional[bool],
tools: Optional[Iterable[AssistantToolParam]], tools: Optional[Iterable[AssistantToolParam]],

View file

@ -0,0 +1,190 @@
from typing import TYPE_CHECKING, Any, Dict, Optional, Union, cast
import httpx
import litellm
from litellm._logging import verbose_logger
from litellm.llms.base_llm.responses.transformation import BaseResponsesAPIConfig
from litellm.secret_managers.main import get_secret_str
from litellm.types.llms.openai import *
from litellm.types.router import GenericLiteLLMParams
from ..common_utils import OpenAIError
if TYPE_CHECKING:
from litellm.litellm_core_utils.litellm_logging import Logging as _LiteLLMLoggingObj
LiteLLMLoggingObj = _LiteLLMLoggingObj
else:
LiteLLMLoggingObj = Any
class OpenAIResponsesAPIConfig(BaseResponsesAPIConfig):
def get_supported_openai_params(self, model: str) -> list:
"""
All OpenAI Responses API params are supported
"""
return [
"input",
"model",
"include",
"instructions",
"max_output_tokens",
"metadata",
"parallel_tool_calls",
"previous_response_id",
"reasoning",
"store",
"stream",
"temperature",
"text",
"tool_choice",
"tools",
"top_p",
"truncation",
"user",
"extra_headers",
"extra_query",
"extra_body",
"timeout",
]
def map_openai_params(
self,
response_api_optional_params: ResponsesAPIOptionalRequestParams,
model: str,
drop_params: bool,
) -> Dict:
"""No mapping applied since inputs are in OpenAI spec already"""
return dict(response_api_optional_params)
def transform_responses_api_request(
self,
model: str,
input: Union[str, ResponseInputParam],
response_api_optional_request_params: Dict,
litellm_params: GenericLiteLLMParams,
headers: dict,
) -> ResponsesAPIRequestParams:
"""No transform applied since inputs are in OpenAI spec already"""
return ResponsesAPIRequestParams(
model=model, input=input, **response_api_optional_request_params
)
def transform_response_api_response(
self,
model: str,
raw_response: httpx.Response,
logging_obj: LiteLLMLoggingObj,
) -> ResponsesAPIResponse:
"""No transform applied since outputs are in OpenAI spec already"""
try:
raw_response_json = raw_response.json()
except Exception:
raise OpenAIError(
message=raw_response.text, status_code=raw_response.status_code
)
return ResponsesAPIResponse(**raw_response_json)
def validate_environment(
self,
headers: dict,
model: str,
api_key: Optional[str] = None,
) -> dict:
api_key = (
api_key
or litellm.api_key
or litellm.openai_key
or get_secret_str("OPENAI_API_KEY")
)
headers.update(
{
"Authorization": f"Bearer {api_key}",
}
)
return headers
def get_complete_url(
self,
api_base: Optional[str],
model: str,
stream: Optional[bool] = None,
) -> str:
"""
Get the endpoint for OpenAI responses API
"""
api_base = (
api_base
or litellm.api_base
or get_secret_str("OPENAI_API_BASE")
or "https://api.openai.com/v1"
)
# Remove trailing slashes
api_base = api_base.rstrip("/")
return f"{api_base}/responses"
def transform_streaming_response(
self,
model: str,
parsed_chunk: dict,
logging_obj: LiteLLMLoggingObj,
) -> ResponsesAPIStreamingResponse:
"""
Transform a parsed streaming response chunk into a ResponsesAPIStreamingResponse
"""
# Convert the dictionary to a properly typed ResponsesAPIStreamingResponse
verbose_logger.debug("Raw OpenAI Chunk=%s", parsed_chunk)
event_type = str(parsed_chunk.get("type"))
event_pydantic_model = OpenAIResponsesAPIConfig.get_event_model_class(
event_type=event_type
)
return event_pydantic_model(**parsed_chunk)
@staticmethod
def get_event_model_class(event_type: str) -> Any:
"""
Returns the appropriate event model class based on the event type.
Args:
event_type (str): The type of event from the response chunk
Returns:
Any: The corresponding event model class
Raises:
ValueError: If the event type is unknown
"""
event_models = {
ResponsesAPIStreamEvents.RESPONSE_CREATED: ResponseCreatedEvent,
ResponsesAPIStreamEvents.RESPONSE_IN_PROGRESS: ResponseInProgressEvent,
ResponsesAPIStreamEvents.RESPONSE_COMPLETED: ResponseCompletedEvent,
ResponsesAPIStreamEvents.RESPONSE_FAILED: ResponseFailedEvent,
ResponsesAPIStreamEvents.RESPONSE_INCOMPLETE: ResponseIncompleteEvent,
ResponsesAPIStreamEvents.OUTPUT_ITEM_ADDED: OutputItemAddedEvent,
ResponsesAPIStreamEvents.OUTPUT_ITEM_DONE: OutputItemDoneEvent,
ResponsesAPIStreamEvents.CONTENT_PART_ADDED: ContentPartAddedEvent,
ResponsesAPIStreamEvents.CONTENT_PART_DONE: ContentPartDoneEvent,
ResponsesAPIStreamEvents.OUTPUT_TEXT_DELTA: OutputTextDeltaEvent,
ResponsesAPIStreamEvents.OUTPUT_TEXT_ANNOTATION_ADDED: OutputTextAnnotationAddedEvent,
ResponsesAPIStreamEvents.OUTPUT_TEXT_DONE: OutputTextDoneEvent,
ResponsesAPIStreamEvents.REFUSAL_DELTA: RefusalDeltaEvent,
ResponsesAPIStreamEvents.REFUSAL_DONE: RefusalDoneEvent,
ResponsesAPIStreamEvents.FUNCTION_CALL_ARGUMENTS_DELTA: FunctionCallArgumentsDeltaEvent,
ResponsesAPIStreamEvents.FUNCTION_CALL_ARGUMENTS_DONE: FunctionCallArgumentsDoneEvent,
ResponsesAPIStreamEvents.FILE_SEARCH_CALL_IN_PROGRESS: FileSearchCallInProgressEvent,
ResponsesAPIStreamEvents.FILE_SEARCH_CALL_SEARCHING: FileSearchCallSearchingEvent,
ResponsesAPIStreamEvents.FILE_SEARCH_CALL_COMPLETED: FileSearchCallCompletedEvent,
ResponsesAPIStreamEvents.WEB_SEARCH_CALL_IN_PROGRESS: WebSearchCallInProgressEvent,
ResponsesAPIStreamEvents.WEB_SEARCH_CALL_SEARCHING: WebSearchCallSearchingEvent,
ResponsesAPIStreamEvents.WEB_SEARCH_CALL_COMPLETED: WebSearchCallCompletedEvent,
ResponsesAPIStreamEvents.ERROR: ErrorEvent,
}
model_class = event_models.get(cast(ResponsesAPIStreamEvents, event_type))
if not model_class:
raise ValueError(f"Unknown event type: {event_type}")
return model_class

View file

@ -3910,42 +3910,19 @@ async def atext_completion(
ctx = contextvars.copy_context() ctx = contextvars.copy_context()
func_with_context = partial(ctx.run, func) func_with_context = partial(ctx.run, func)
_, custom_llm_provider, _, _ = get_llm_provider( init_response = await loop.run_in_executor(None, func_with_context)
model=model, api_base=kwargs.get("api_base", None) if isinstance(init_response, dict) or isinstance(
) init_response, TextCompletionResponse
): ## CACHING SCENARIO
if ( if isinstance(init_response, dict):
custom_llm_provider == "openai" response = TextCompletionResponse(**init_response)
or custom_llm_provider == "azure"
or custom_llm_provider == "azure_text"
or custom_llm_provider == "custom_openai"
or custom_llm_provider == "anyscale"
or custom_llm_provider == "mistral"
or custom_llm_provider == "openrouter"
or custom_llm_provider == "deepinfra"
or custom_llm_provider == "perplexity"
or custom_llm_provider == "groq"
or custom_llm_provider == "nvidia_nim"
or custom_llm_provider == "cerebras"
or custom_llm_provider == "sambanova"
or custom_llm_provider == "ai21_chat"
or custom_llm_provider == "ai21"
or custom_llm_provider == "volcengine"
or custom_llm_provider == "text-completion-codestral"
or custom_llm_provider == "deepseek"
or custom_llm_provider == "text-completion-openai"
or custom_llm_provider == "huggingface"
or custom_llm_provider == "ollama"
or custom_llm_provider == "vertex_ai"
or custom_llm_provider in litellm.openai_compatible_providers
): # currently implemented aiohttp calls for just azure and openai, soon all.
# Await normally
response = await loop.run_in_executor(None, func_with_context)
if asyncio.iscoroutine(response):
response = await response
else: else:
# Call the synchronous function using run_in_executor response = init_response
response = await loop.run_in_executor(None, func_with_context) elif asyncio.iscoroutine(init_response):
response = await init_response
else:
response = init_response # type: ignore
if ( if (
kwargs.get("stream", False) is True kwargs.get("stream", False) is True
or isinstance(response, TextCompletionStreamWrapper) or isinstance(response, TextCompletionStreamWrapper)

View file

@ -6,7 +6,7 @@
"input_cost_per_token": 0.0000, "input_cost_per_token": 0.0000,
"output_cost_per_token": 0.000, "output_cost_per_token": 0.000,
"litellm_provider": "one of https://docs.litellm.ai/docs/providers", "litellm_provider": "one of https://docs.litellm.ai/docs/providers",
"mode": "one of chat, embedding, completion, image_generation, audio_transcription, audio_speech", "mode": "one of: chat, embedding, completion, image_generation, audio_transcription, audio_speech, image_generation, moderation, rerank",
"supports_function_calling": true, "supports_function_calling": true,
"supports_parallel_function_calling": true, "supports_parallel_function_calling": true,
"supports_vision": true, "supports_vision": true,
@ -931,7 +931,7 @@
"input_cost_per_token": 0.000000, "input_cost_per_token": 0.000000,
"output_cost_per_token": 0.000000, "output_cost_per_token": 0.000000,
"litellm_provider": "openai", "litellm_provider": "openai",
"mode": "moderations" "mode": "moderation"
}, },
"text-moderation-007": { "text-moderation-007": {
"max_tokens": 32768, "max_tokens": 32768,
@ -940,7 +940,7 @@
"input_cost_per_token": 0.000000, "input_cost_per_token": 0.000000,
"output_cost_per_token": 0.000000, "output_cost_per_token": 0.000000,
"litellm_provider": "openai", "litellm_provider": "openai",
"mode": "moderations" "mode": "moderation"
}, },
"text-moderation-latest": { "text-moderation-latest": {
"max_tokens": 32768, "max_tokens": 32768,
@ -949,7 +949,7 @@
"input_cost_per_token": 0.000000, "input_cost_per_token": 0.000000,
"output_cost_per_token": 0.000000, "output_cost_per_token": 0.000000,
"litellm_provider": "openai", "litellm_provider": "openai",
"mode": "moderations" "mode": "moderation"
}, },
"256-x-256/dall-e-2": { "256-x-256/dall-e-2": {
"mode": "image_generation", "mode": "image_generation",
@ -1625,13 +1625,23 @@
"max_tokens": 8192, "max_tokens": 8192,
"max_input_tokens": 128000, "max_input_tokens": 128000,
"max_output_tokens": 8192, "max_output_tokens": 8192,
"input_cost_per_token": 0.0, "input_cost_per_token": 0.00000135,
"input_cost_per_token_cache_hit": 0.0, "output_cost_per_token": 0.0000054,
"output_cost_per_token": 0.0,
"litellm_provider": "azure_ai", "litellm_provider": "azure_ai",
"mode": "chat", "mode": "chat",
"supports_prompt_caching": true, "supports_tool_choice": true,
"supports_tool_choice": true "source": "https://techcommunity.microsoft.com/blog/machinelearningblog/deepseek-r1-improved-performance-higher-limits-and-transparent-pricing/4386367"
},
"azure_ai/deepseek-v3": {
"max_tokens": 8192,
"max_input_tokens": 128000,
"max_output_tokens": 8192,
"input_cost_per_token": 0.00000114,
"output_cost_per_token": 0.00000456,
"litellm_provider": "azure_ai",
"mode": "chat",
"supports_tool_choice": true,
"source": "https://techcommunity.microsoft.com/blog/machinelearningblog/announcing-deepseek-v3-on-azure-ai-foundry-and-github/4390438"
}, },
"azure_ai/jamba-instruct": { "azure_ai/jamba-instruct": {
"max_tokens": 4096, "max_tokens": 4096,
@ -1643,6 +1653,17 @@
"mode": "chat", "mode": "chat",
"supports_tool_choice": true "supports_tool_choice": true
}, },
"azure_ai/mistral-nemo": {
"max_tokens": 4096,
"max_input_tokens": 131072,
"max_output_tokens": 4096,
"input_cost_per_token": 0.00000015,
"output_cost_per_token": 0.00000015,
"litellm_provider": "azure_ai",
"mode": "chat",
"supports_function_calling": true,
"source": "https://azuremarketplace.microsoft.com/en/marketplace/apps/000-000.mistral-nemo-12b-2407?tab=PlansAndPrice"
},
"azure_ai/mistral-large": { "azure_ai/mistral-large": {
"max_tokens": 8191, "max_tokens": 8191,
"max_input_tokens": 32000, "max_input_tokens": 32000,
@ -1770,10 +1791,34 @@
"source":"https://azuremarketplace.microsoft.com/en-us/marketplace/apps/metagenai.meta-llama-3-1-405b-instruct-offer?tab=PlansAndPrice", "source":"https://azuremarketplace.microsoft.com/en-us/marketplace/apps/metagenai.meta-llama-3-1-405b-instruct-offer?tab=PlansAndPrice",
"supports_tool_choice": true "supports_tool_choice": true
}, },
"azure_ai/Phi-4": { "azure_ai/Phi-4-mini-instruct": {
"max_tokens": 4096, "max_tokens": 4096,
"max_input_tokens": 128000, "max_input_tokens": 131072,
"max_output_tokens": 4096, "max_output_tokens": 4096,
"input_cost_per_token": 0,
"output_cost_per_token": 0,
"litellm_provider": "azure_ai",
"mode": "chat",
"supports_function_calling": true,
"source": "https://learn.microsoft.com/en-us/azure/ai-foundry/concepts/models-featured#microsoft"
},
"azure_ai/Phi-4-multimodal-instruct": {
"max_tokens": 4096,
"max_input_tokens": 131072,
"max_output_tokens": 4096,
"input_cost_per_token": 0,
"output_cost_per_token": 0,
"litellm_provider": "azure_ai",
"mode": "chat",
"supports_audio_input": true,
"supports_function_calling": true,
"supports_vision": true,
"source": "https://learn.microsoft.com/en-us/azure/ai-foundry/concepts/models-featured#microsoft"
},
"azure_ai/Phi-4": {
"max_tokens": 16384,
"max_input_tokens": 16384,
"max_output_tokens": 16384,
"input_cost_per_token": 0.000000125, "input_cost_per_token": 0.000000125,
"output_cost_per_token": 0.0000005, "output_cost_per_token": 0.0000005,
"litellm_provider": "azure_ai", "litellm_provider": "azure_ai",
@ -3892,31 +3937,6 @@
"source": "https://cloud.google.com/vertex-ai/generative-ai/pricing", "source": "https://cloud.google.com/vertex-ai/generative-ai/pricing",
"supports_tool_choice": true "supports_tool_choice": true
}, },
"gemini/gemini-2.0-flash": {
"max_tokens": 8192,
"max_input_tokens": 1048576,
"max_output_tokens": 8192,
"max_images_per_prompt": 3000,
"max_videos_per_prompt": 10,
"max_video_length": 1,
"max_audio_length_hours": 8.4,
"max_audio_per_prompt": 1,
"max_pdf_size_mb": 30,
"input_cost_per_audio_token": 0.0000007,
"input_cost_per_token": 0.0000001,
"output_cost_per_token": 0.0000004,
"litellm_provider": "gemini",
"mode": "chat",
"rpm": 10000,
"tpm": 10000000,
"supports_system_messages": true,
"supports_function_calling": true,
"supports_vision": true,
"supports_response_schema": true,
"supports_audio_output": true,
"supports_tool_choice": true,
"source": "https://ai.google.dev/pricing#2_0flash"
},
"gemini-2.0-flash-001": { "gemini-2.0-flash-001": {
"max_tokens": 8192, "max_tokens": 8192,
"max_input_tokens": 1048576, "max_input_tokens": 1048576,
@ -4008,6 +4028,69 @@
"source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#gemini-2.0-flash", "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#gemini-2.0-flash",
"supports_tool_choice": true "supports_tool_choice": true
}, },
"gemini/gemini-2.0-pro-exp-02-05": {
"max_tokens": 8192,
"max_input_tokens": 2097152,
"max_output_tokens": 8192,
"max_images_per_prompt": 3000,
"max_videos_per_prompt": 10,
"max_video_length": 1,
"max_audio_length_hours": 8.4,
"max_audio_per_prompt": 1,
"max_pdf_size_mb": 30,
"input_cost_per_image": 0,
"input_cost_per_video_per_second": 0,
"input_cost_per_audio_per_second": 0,
"input_cost_per_token": 0,
"input_cost_per_character": 0,
"input_cost_per_token_above_128k_tokens": 0,
"input_cost_per_character_above_128k_tokens": 0,
"input_cost_per_image_above_128k_tokens": 0,
"input_cost_per_video_per_second_above_128k_tokens": 0,
"input_cost_per_audio_per_second_above_128k_tokens": 0,
"output_cost_per_token": 0,
"output_cost_per_character": 0,
"output_cost_per_token_above_128k_tokens": 0,
"output_cost_per_character_above_128k_tokens": 0,
"litellm_provider": "gemini",
"mode": "chat",
"rpm": 2,
"tpm": 1000000,
"supports_system_messages": true,
"supports_function_calling": true,
"supports_vision": true,
"supports_audio_input": true,
"supports_video_input": true,
"supports_pdf_input": true,
"supports_response_schema": true,
"supports_tool_choice": true,
"source": "https://cloud.google.com/vertex-ai/generative-ai/pricing"
},
"gemini/gemini-2.0-flash": {
"max_tokens": 8192,
"max_input_tokens": 1048576,
"max_output_tokens": 8192,
"max_images_per_prompt": 3000,
"max_videos_per_prompt": 10,
"max_video_length": 1,
"max_audio_length_hours": 8.4,
"max_audio_per_prompt": 1,
"max_pdf_size_mb": 30,
"input_cost_per_audio_token": 0.0000007,
"input_cost_per_token": 0.0000001,
"output_cost_per_token": 0.0000004,
"litellm_provider": "gemini",
"mode": "chat",
"rpm": 10000,
"tpm": 10000000,
"supports_system_messages": true,
"supports_function_calling": true,
"supports_vision": true,
"supports_response_schema": true,
"supports_audio_output": true,
"supports_tool_choice": true,
"source": "https://ai.google.dev/pricing#2_0flash"
},
"gemini/gemini-2.0-flash-001": { "gemini/gemini-2.0-flash-001": {
"max_tokens": 8192, "max_tokens": 8192,
"max_input_tokens": 1048576, "max_input_tokens": 1048576,
@ -4511,6 +4594,12 @@
"mode": "image_generation", "mode": "image_generation",
"source": "https://cloud.google.com/vertex-ai/generative-ai/pricing" "source": "https://cloud.google.com/vertex-ai/generative-ai/pricing"
}, },
"vertex_ai/imagen-3.0-generate-002": {
"output_cost_per_image": 0.04,
"litellm_provider": "vertex_ai-image-models",
"mode": "image_generation",
"source": "https://cloud.google.com/vertex-ai/generative-ai/pricing"
},
"vertex_ai/imagen-3.0-generate-001": { "vertex_ai/imagen-3.0-generate-001": {
"output_cost_per_image": 0.04, "output_cost_per_image": 0.04,
"litellm_provider": "vertex_ai-image-models", "litellm_provider": "vertex_ai-image-models",
@ -6547,6 +6636,12 @@
"supports_prompt_caching": true, "supports_prompt_caching": true,
"supports_response_schema": true "supports_response_schema": true
}, },
"1024-x-1024/50-steps/bedrock/amazon.nova-canvas-v1:0": {
"max_input_tokens": 2600,
"output_cost_per_image": 0.06,
"litellm_provider": "bedrock",
"mode": "image_generation"
},
"eu.amazon.nova-pro-v1:0": { "eu.amazon.nova-pro-v1:0": {
"max_tokens": 4096, "max_tokens": 4096,
"max_input_tokens": 300000, "max_input_tokens": 300000,
@ -7477,6 +7572,18 @@
"litellm_provider": "bedrock", "litellm_provider": "bedrock",
"mode": "embedding" "mode": "embedding"
}, },
"us.deepseek.r1-v1:0": {
"max_tokens": 4096,
"max_input_tokens": 128000,
"max_output_tokens": 4096,
"input_cost_per_token": 0.00000135,
"output_cost_per_token": 0.0000054,
"litellm_provider": "bedrock_converse",
"mode": "chat",
"supports_function_calling": false,
"supports_tool_choice": false
},
"meta.llama3-3-70b-instruct-v1:0": { "meta.llama3-3-70b-instruct-v1:0": {
"max_tokens": 4096, "max_tokens": 4096,
"max_input_tokens": 128000, "max_input_tokens": 128000,

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

View file

@ -1 +1 @@
<!DOCTYPE html><html id="__next_error__"><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width, initial-scale=1"/><link rel="preload" as="script" fetchPriority="low" href="/ui/_next/static/chunks/webpack-75a5453f51d60261.js"/><script src="/ui/_next/static/chunks/fd9d1056-524b80e1a6b8bb06.js" async=""></script><script src="/ui/_next/static/chunks/117-883150efc583d711.js" async=""></script><script src="/ui/_next/static/chunks/main-app-475d6efe4080647d.js" async=""></script><title>LiteLLM Dashboard</title><meta name="description" content="LiteLLM Proxy Admin UI"/><link rel="icon" href="/ui/favicon.ico" type="image/x-icon" sizes="16x16"/><meta name="next-size-adjust"/><script src="/ui/_next/static/chunks/polyfills-42372ed130431b0a.js" noModule=""></script></head><body><script src="/ui/_next/static/chunks/webpack-75a5453f51d60261.js" async=""></script><script>(self.__next_f=self.__next_f||[]).push([0]);self.__next_f.push([2,null])</script><script>self.__next_f.push([1,"1:HL[\"/ui/_next/static/media/a34f9d1faa5f3315-s.p.woff2\",\"font\",{\"crossOrigin\":\"\",\"type\":\"font/woff2\"}]\n2:HL[\"/ui/_next/static/css/86f6cc749f6b8493.css\",\"style\"]\n3:HL[\"/ui/_next/static/css/f41c66e22715ab00.css\",\"style\"]\n"])</script><script>self.__next_f.push([1,"4:I[12846,[],\"\"]\n6:I[19107,[],\"ClientPageRoot\"]\n7:I[92222,[\"665\",\"static/chunks/3014691f-0b72c78cfebbd712.js\",\"990\",\"static/chunks/13b76428-ebdf3012af0e4489.js\",\"441\",\"static/chunks/441-79926bf2b9d89e04.js\",\"261\",\"static/chunks/261-e48c2ac6ff0b811c.js\",\"899\",\"static/chunks/899-354f59ecde307dfa.js\",\"914\",\"static/chunks/914-e17acab83d0eadb5.js\",\"250\",\"static/chunks/250-51513f2f6dabf571.js\",\"699\",\"static/chunks/699-6b82f8e7b98ca1a3.js\",\"931\",\"static/chunks/app/page-b36633214e76cfd1.js\"],\"default\",1]\n8:I[4707,[],\"\"]\n9:I[36423,[],\"\"]\nb:I[61060,[],\"\"]\nc:[]\n"])</script><script>self.__next_f.push([1,"0:[\"$\",\"$L4\",null,{\"buildId\":\"rCxUxULLkHhl5KoPY9DHv\",\"assetPrefix\":\"/ui\",\"urlParts\":[\"\",\"\"],\"initialTree\":[\"\",{\"children\":[\"__PAGE__\",{}]},\"$undefined\",\"$undefined\",true],\"initialSeedData\":[\"\",{\"children\":[\"__PAGE__\",{},[[\"$L5\",[\"$\",\"$L6\",null,{\"props\":{\"params\":{},\"searchParams\":{}},\"Component\":\"$7\"}],null],null],null]},[[[[\"$\",\"link\",\"0\",{\"rel\":\"stylesheet\",\"href\":\"/ui/_next/static/css/86f6cc749f6b8493.css\",\"precedence\":\"next\",\"crossOrigin\":\"$undefined\"}],[\"$\",\"link\",\"1\",{\"rel\":\"stylesheet\",\"href\":\"/ui/_next/static/css/f41c66e22715ab00.css\",\"precedence\":\"next\",\"crossOrigin\":\"$undefined\"}]],[\"$\",\"html\",null,{\"lang\":\"en\",\"children\":[\"$\",\"body\",null,{\"className\":\"__className_cf7686\",\"children\":[\"$\",\"$L8\",null,{\"parallelRouterKey\":\"children\",\"segmentPath\":[\"children\"],\"error\":\"$undefined\",\"errorStyles\":\"$undefined\",\"errorScripts\":\"$undefined\",\"template\":[\"$\",\"$L9\",null,{}],\"templateStyles\":\"$undefined\",\"templateScripts\":\"$undefined\",\"notFound\":[[\"$\",\"title\",null,{\"children\":\"404: This page could not be found.\"}],[\"$\",\"div\",null,{\"style\":{\"fontFamily\":\"system-ui,\\\"Segoe UI\\\",Roboto,Helvetica,Arial,sans-serif,\\\"Apple Color Emoji\\\",\\\"Segoe UI Emoji\\\"\",\"height\":\"100vh\",\"textAlign\":\"center\",\"display\":\"flex\",\"flexDirection\":\"column\",\"alignItems\":\"center\",\"justifyContent\":\"center\"},\"children\":[\"$\",\"div\",null,{\"children\":[[\"$\",\"style\",null,{\"dangerouslySetInnerHTML\":{\"__html\":\"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}\"}}],[\"$\",\"h1\",null,{\"className\":\"next-error-h1\",\"style\":{\"display\":\"inline-block\",\"margin\":\"0 20px 0 0\",\"padding\":\"0 23px 0 0\",\"fontSize\":24,\"fontWeight\":500,\"verticalAlign\":\"top\",\"lineHeight\":\"49px\"},\"children\":\"404\"}],[\"$\",\"div\",null,{\"style\":{\"display\":\"inline-block\"},\"children\":[\"$\",\"h2\",null,{\"style\":{\"fontSize\":14,\"fontWeight\":400,\"lineHeight\":\"49px\",\"margin\":0},\"children\":\"This page could not be found.\"}]}]]}]}]],\"notFoundStyles\":[]}]}]}]],null],null],\"couldBeIntercepted\":false,\"initialHead\":[null,\"$La\"],\"globalErrorComponent\":\"$b\",\"missingSlots\":\"$Wc\"}]\n"])</script><script>self.__next_f.push([1,"a:[[\"$\",\"meta\",\"0\",{\"name\":\"viewport\",\"content\":\"width=device-width, initial-scale=1\"}],[\"$\",\"meta\",\"1\",{\"charSet\":\"utf-8\"}],[\"$\",\"title\",\"2\",{\"children\":\"LiteLLM Dashboard\"}],[\"$\",\"meta\",\"3\",{\"name\":\"description\",\"content\":\"LiteLLM Proxy Admin UI\"}],[\"$\",\"link\",\"4\",{\"rel\":\"icon\",\"href\":\"/ui/favicon.ico\",\"type\":\"image/x-icon\",\"sizes\":\"16x16\"}],[\"$\",\"meta\",\"5\",{\"name\":\"next-size-adjust\"}]]\n5:null\n"])</script></body></html> <!DOCTYPE html><html id="__next_error__"><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width, initial-scale=1"/><link rel="preload" as="script" fetchPriority="low" href="/ui/_next/static/chunks/webpack-75a5453f51d60261.js"/><script src="/ui/_next/static/chunks/fd9d1056-524b80e1a6b8bb06.js" async=""></script><script src="/ui/_next/static/chunks/117-883150efc583d711.js" async=""></script><script src="/ui/_next/static/chunks/main-app-475d6efe4080647d.js" async=""></script><title>LiteLLM Dashboard</title><meta name="description" content="LiteLLM Proxy Admin UI"/><link rel="icon" href="/ui/favicon.ico" type="image/x-icon" sizes="16x16"/><meta name="next-size-adjust"/><script src="/ui/_next/static/chunks/polyfills-42372ed130431b0a.js" noModule=""></script></head><body><script src="/ui/_next/static/chunks/webpack-75a5453f51d60261.js" async=""></script><script>(self.__next_f=self.__next_f||[]).push([0]);self.__next_f.push([2,null])</script><script>self.__next_f.push([1,"1:HL[\"/ui/_next/static/media/a34f9d1faa5f3315-s.p.woff2\",\"font\",{\"crossOrigin\":\"\",\"type\":\"font/woff2\"}]\n2:HL[\"/ui/_next/static/css/86f6cc749f6b8493.css\",\"style\"]\n3:HL[\"/ui/_next/static/css/b6d997482399c7e1.css\",\"style\"]\n"])</script><script>self.__next_f.push([1,"4:I[12846,[],\"\"]\n6:I[19107,[],\"ClientPageRoot\"]\n7:I[62177,[\"665\",\"static/chunks/3014691f-0b72c78cfebbd712.js\",\"990\",\"static/chunks/13b76428-ebdf3012af0e4489.js\",\"441\",\"static/chunks/441-79926bf2b9d89e04.js\",\"261\",\"static/chunks/261-cb27c20c4f8ec4c6.js\",\"899\",\"static/chunks/899-354f59ecde307dfa.js\",\"157\",\"static/chunks/157-cf7bc8b3ae1b80ba.js\",\"250\",\"static/chunks/250-51513f2f6dabf571.js\",\"699\",\"static/chunks/699-6b82f8e7b98ca1a3.js\",\"931\",\"static/chunks/app/page-a25b75c267486fe2.js\"],\"default\",1]\n8:I[4707,[],\"\"]\n9:I[36423,[],\"\"]\nb:I[61060,[],\"\"]\nc:[]\n"])</script><script>self.__next_f.push([1,"0:[\"$\",\"$L4\",null,{\"buildId\":\"i92Qc9kkJSCtCgV3DDmdu\",\"assetPrefix\":\"/ui\",\"urlParts\":[\"\",\"\"],\"initialTree\":[\"\",{\"children\":[\"__PAGE__\",{}]},\"$undefined\",\"$undefined\",true],\"initialSeedData\":[\"\",{\"children\":[\"__PAGE__\",{},[[\"$L5\",[\"$\",\"$L6\",null,{\"props\":{\"params\":{},\"searchParams\":{}},\"Component\":\"$7\"}],null],null],null]},[[[[\"$\",\"link\",\"0\",{\"rel\":\"stylesheet\",\"href\":\"/ui/_next/static/css/86f6cc749f6b8493.css\",\"precedence\":\"next\",\"crossOrigin\":\"$undefined\"}],[\"$\",\"link\",\"1\",{\"rel\":\"stylesheet\",\"href\":\"/ui/_next/static/css/b6d997482399c7e1.css\",\"precedence\":\"next\",\"crossOrigin\":\"$undefined\"}]],[\"$\",\"html\",null,{\"lang\":\"en\",\"children\":[\"$\",\"body\",null,{\"className\":\"__className_cf7686\",\"children\":[\"$\",\"$L8\",null,{\"parallelRouterKey\":\"children\",\"segmentPath\":[\"children\"],\"error\":\"$undefined\",\"errorStyles\":\"$undefined\",\"errorScripts\":\"$undefined\",\"template\":[\"$\",\"$L9\",null,{}],\"templateStyles\":\"$undefined\",\"templateScripts\":\"$undefined\",\"notFound\":[[\"$\",\"title\",null,{\"children\":\"404: This page could not be found.\"}],[\"$\",\"div\",null,{\"style\":{\"fontFamily\":\"system-ui,\\\"Segoe UI\\\",Roboto,Helvetica,Arial,sans-serif,\\\"Apple Color Emoji\\\",\\\"Segoe UI Emoji\\\"\",\"height\":\"100vh\",\"textAlign\":\"center\",\"display\":\"flex\",\"flexDirection\":\"column\",\"alignItems\":\"center\",\"justifyContent\":\"center\"},\"children\":[\"$\",\"div\",null,{\"children\":[[\"$\",\"style\",null,{\"dangerouslySetInnerHTML\":{\"__html\":\"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}\"}}],[\"$\",\"h1\",null,{\"className\":\"next-error-h1\",\"style\":{\"display\":\"inline-block\",\"margin\":\"0 20px 0 0\",\"padding\":\"0 23px 0 0\",\"fontSize\":24,\"fontWeight\":500,\"verticalAlign\":\"top\",\"lineHeight\":\"49px\"},\"children\":\"404\"}],[\"$\",\"div\",null,{\"style\":{\"display\":\"inline-block\"},\"children\":[\"$\",\"h2\",null,{\"style\":{\"fontSize\":14,\"fontWeight\":400,\"lineHeight\":\"49px\",\"margin\":0},\"children\":\"This page could not be found.\"}]}]]}]}]],\"notFoundStyles\":[]}]}]}]],null],null],\"couldBeIntercepted\":false,\"initialHead\":[null,\"$La\"],\"globalErrorComponent\":\"$b\",\"missingSlots\":\"$Wc\"}]\n"])</script><script>self.__next_f.push([1,"a:[[\"$\",\"meta\",\"0\",{\"name\":\"viewport\",\"content\":\"width=device-width, initial-scale=1\"}],[\"$\",\"meta\",\"1\",{\"charSet\":\"utf-8\"}],[\"$\",\"title\",\"2\",{\"children\":\"LiteLLM Dashboard\"}],[\"$\",\"meta\",\"3\",{\"name\":\"description\",\"content\":\"LiteLLM Proxy Admin UI\"}],[\"$\",\"link\",\"4\",{\"rel\":\"icon\",\"href\":\"/ui/favicon.ico\",\"type\":\"image/x-icon\",\"sizes\":\"16x16\"}],[\"$\",\"meta\",\"5\",{\"name\":\"next-size-adjust\"}]]\n5:null\n"])</script></body></html>

View file

@ -1,7 +1,7 @@
2:I[19107,[],"ClientPageRoot"] 2:I[19107,[],"ClientPageRoot"]
3:I[92222,["665","static/chunks/3014691f-0b72c78cfebbd712.js","990","static/chunks/13b76428-ebdf3012af0e4489.js","441","static/chunks/441-79926bf2b9d89e04.js","261","static/chunks/261-e48c2ac6ff0b811c.js","899","static/chunks/899-354f59ecde307dfa.js","914","static/chunks/914-e17acab83d0eadb5.js","250","static/chunks/250-51513f2f6dabf571.js","699","static/chunks/699-6b82f8e7b98ca1a3.js","931","static/chunks/app/page-b36633214e76cfd1.js"],"default",1] 3:I[62177,["665","static/chunks/3014691f-0b72c78cfebbd712.js","990","static/chunks/13b76428-ebdf3012af0e4489.js","441","static/chunks/441-79926bf2b9d89e04.js","261","static/chunks/261-cb27c20c4f8ec4c6.js","899","static/chunks/899-354f59ecde307dfa.js","157","static/chunks/157-cf7bc8b3ae1b80ba.js","250","static/chunks/250-51513f2f6dabf571.js","699","static/chunks/699-6b82f8e7b98ca1a3.js","931","static/chunks/app/page-a25b75c267486fe2.js"],"default",1]
4:I[4707,[],""] 4:I[4707,[],""]
5:I[36423,[],""] 5:I[36423,[],""]
0:["rCxUxULLkHhl5KoPY9DHv",[[["",{"children":["__PAGE__",{}]},"$undefined","$undefined",true],["",{"children":["__PAGE__",{},[["$L1",["$","$L2",null,{"props":{"params":{},"searchParams":{}},"Component":"$3"}],null],null],null]},[[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/86f6cc749f6b8493.css","precedence":"next","crossOrigin":"$undefined"}],["$","link","1",{"rel":"stylesheet","href":"/ui/_next/static/css/f41c66e22715ab00.css","precedence":"next","crossOrigin":"$undefined"}]],["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_cf7686","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[]}]}]}]],null],null],["$L6",null]]]] 0:["i92Qc9kkJSCtCgV3DDmdu",[[["",{"children":["__PAGE__",{}]},"$undefined","$undefined",true],["",{"children":["__PAGE__",{},[["$L1",["$","$L2",null,{"props":{"params":{},"searchParams":{}},"Component":"$3"}],null],null],null]},[[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/86f6cc749f6b8493.css","precedence":"next","crossOrigin":"$undefined"}],["$","link","1",{"rel":"stylesheet","href":"/ui/_next/static/css/b6d997482399c7e1.css","precedence":"next","crossOrigin":"$undefined"}]],["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_cf7686","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[]}]}]}]],null],null],["$L6",null]]]]
6:[["$","meta","0",{"name":"viewport","content":"width=device-width, initial-scale=1"}],["$","meta","1",{"charSet":"utf-8"}],["$","title","2",{"children":"LiteLLM Dashboard"}],["$","meta","3",{"name":"description","content":"LiteLLM Proxy Admin UI"}],["$","link","4",{"rel":"icon","href":"/ui/favicon.ico","type":"image/x-icon","sizes":"16x16"}],["$","meta","5",{"name":"next-size-adjust"}]] 6:[["$","meta","0",{"name":"viewport","content":"width=device-width, initial-scale=1"}],["$","meta","1",{"charSet":"utf-8"}],["$","title","2",{"children":"LiteLLM Dashboard"}],["$","meta","3",{"name":"description","content":"LiteLLM Proxy Admin UI"}],["$","link","4",{"rel":"icon","href":"/ui/favicon.ico","type":"image/x-icon","sizes":"16x16"}],["$","meta","5",{"name":"next-size-adjust"}]]
1:null 1:null

View file

@ -1,7 +1,7 @@
2:I[19107,[],"ClientPageRoot"] 2:I[19107,[],"ClientPageRoot"]
3:I[52829,["441","static/chunks/441-79926bf2b9d89e04.js","261","static/chunks/261-e48c2ac6ff0b811c.js","250","static/chunks/250-51513f2f6dabf571.js","699","static/chunks/699-6b82f8e7b98ca1a3.js","418","static/chunks/app/model_hub/page-6f97b95f1023b0e9.js"],"default",1] 3:I[52829,["441","static/chunks/441-79926bf2b9d89e04.js","261","static/chunks/261-cb27c20c4f8ec4c6.js","250","static/chunks/250-51513f2f6dabf571.js","699","static/chunks/699-6b82f8e7b98ca1a3.js","418","static/chunks/app/model_hub/page-6f97b95f1023b0e9.js"],"default",1]
4:I[4707,[],""] 4:I[4707,[],""]
5:I[36423,[],""] 5:I[36423,[],""]
0:["rCxUxULLkHhl5KoPY9DHv",[[["",{"children":["model_hub",{"children":["__PAGE__",{}]}]},"$undefined","$undefined",true],["",{"children":["model_hub",{"children":["__PAGE__",{},[["$L1",["$","$L2",null,{"props":{"params":{},"searchParams":{}},"Component":"$3"}],null],null],null]},[null,["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children","model_hub","children"],"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":"$undefined","notFoundStyles":"$undefined"}]],null]},[[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/86f6cc749f6b8493.css","precedence":"next","crossOrigin":"$undefined"}],["$","link","1",{"rel":"stylesheet","href":"/ui/_next/static/css/f41c66e22715ab00.css","precedence":"next","crossOrigin":"$undefined"}]],["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_cf7686","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[]}]}]}]],null],null],["$L6",null]]]] 0:["i92Qc9kkJSCtCgV3DDmdu",[[["",{"children":["model_hub",{"children":["__PAGE__",{}]}]},"$undefined","$undefined",true],["",{"children":["model_hub",{"children":["__PAGE__",{},[["$L1",["$","$L2",null,{"props":{"params":{},"searchParams":{}},"Component":"$3"}],null],null],null]},[null,["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children","model_hub","children"],"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":"$undefined","notFoundStyles":"$undefined"}]],null]},[[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/86f6cc749f6b8493.css","precedence":"next","crossOrigin":"$undefined"}],["$","link","1",{"rel":"stylesheet","href":"/ui/_next/static/css/b6d997482399c7e1.css","precedence":"next","crossOrigin":"$undefined"}]],["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_cf7686","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[]}]}]}]],null],null],["$L6",null]]]]
6:[["$","meta","0",{"name":"viewport","content":"width=device-width, initial-scale=1"}],["$","meta","1",{"charSet":"utf-8"}],["$","title","2",{"children":"LiteLLM Dashboard"}],["$","meta","3",{"name":"description","content":"LiteLLM Proxy Admin UI"}],["$","link","4",{"rel":"icon","href":"/ui/favicon.ico","type":"image/x-icon","sizes":"16x16"}],["$","meta","5",{"name":"next-size-adjust"}]] 6:[["$","meta","0",{"name":"viewport","content":"width=device-width, initial-scale=1"}],["$","meta","1",{"charSet":"utf-8"}],["$","title","2",{"children":"LiteLLM Dashboard"}],["$","meta","3",{"name":"description","content":"LiteLLM Proxy Admin UI"}],["$","link","4",{"rel":"icon","href":"/ui/favicon.ico","type":"image/x-icon","sizes":"16x16"}],["$","meta","5",{"name":"next-size-adjust"}]]
1:null 1:null

View file

@ -2,6 +2,6 @@
3:I[12011,["665","static/chunks/3014691f-0b72c78cfebbd712.js","441","static/chunks/441-79926bf2b9d89e04.js","899","static/chunks/899-354f59ecde307dfa.js","250","static/chunks/250-51513f2f6dabf571.js","461","static/chunks/app/onboarding/page-a31bc08c35f01c0a.js"],"default",1] 3:I[12011,["665","static/chunks/3014691f-0b72c78cfebbd712.js","441","static/chunks/441-79926bf2b9d89e04.js","899","static/chunks/899-354f59ecde307dfa.js","250","static/chunks/250-51513f2f6dabf571.js","461","static/chunks/app/onboarding/page-a31bc08c35f01c0a.js"],"default",1]
4:I[4707,[],""] 4:I[4707,[],""]
5:I[36423,[],""] 5:I[36423,[],""]
0:["rCxUxULLkHhl5KoPY9DHv",[[["",{"children":["onboarding",{"children":["__PAGE__",{}]}]},"$undefined","$undefined",true],["",{"children":["onboarding",{"children":["__PAGE__",{},[["$L1",["$","$L2",null,{"props":{"params":{},"searchParams":{}},"Component":"$3"}],null],null],null]},[null,["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children","onboarding","children"],"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":"$undefined","notFoundStyles":"$undefined"}]],null]},[[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/86f6cc749f6b8493.css","precedence":"next","crossOrigin":"$undefined"}],["$","link","1",{"rel":"stylesheet","href":"/ui/_next/static/css/f41c66e22715ab00.css","precedence":"next","crossOrigin":"$undefined"}]],["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_cf7686","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[]}]}]}]],null],null],["$L6",null]]]] 0:["i92Qc9kkJSCtCgV3DDmdu",[[["",{"children":["onboarding",{"children":["__PAGE__",{}]}]},"$undefined","$undefined",true],["",{"children":["onboarding",{"children":["__PAGE__",{},[["$L1",["$","$L2",null,{"props":{"params":{},"searchParams":{}},"Component":"$3"}],null],null],null]},[null,["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children","onboarding","children"],"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":"$undefined","notFoundStyles":"$undefined"}]],null]},[[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/86f6cc749f6b8493.css","precedence":"next","crossOrigin":"$undefined"}],["$","link","1",{"rel":"stylesheet","href":"/ui/_next/static/css/b6d997482399c7e1.css","precedence":"next","crossOrigin":"$undefined"}]],["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_cf7686","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[]}]}]}]],null],null],["$L6",null]]]]
6:[["$","meta","0",{"name":"viewport","content":"width=device-width, initial-scale=1"}],["$","meta","1",{"charSet":"utf-8"}],["$","title","2",{"children":"LiteLLM Dashboard"}],["$","meta","3",{"name":"description","content":"LiteLLM Proxy Admin UI"}],["$","link","4",{"rel":"icon","href":"/ui/favicon.ico","type":"image/x-icon","sizes":"16x16"}],["$","meta","5",{"name":"next-size-adjust"}]] 6:[["$","meta","0",{"name":"viewport","content":"width=device-width, initial-scale=1"}],["$","meta","1",{"charSet":"utf-8"}],["$","title","2",{"children":"LiteLLM Dashboard"}],["$","meta","3",{"name":"description","content":"LiteLLM Proxy Admin UI"}],["$","link","4",{"rel":"icon","href":"/ui/favicon.ico","type":"image/x-icon","sizes":"16x16"}],["$","meta","5",{"name":"next-size-adjust"}]]
1:null 1:null

View file

@ -1,13 +1,5 @@
model_list: model_list:
- model_name: gpt-4o - model_name: amazon.nova-canvas-v1:0
litellm_params: litellm_params:
model: azure/gpt-4o model: bedrock/amazon.nova-canvas-v1:0
litellm_credential_name: default_azure_credential aws_region_name: "us-east-1"
credential_list:
- credential_name: default_azure_credential
credential_values:
api_key: os.environ/AZURE_API_KEY
api_base: os.environ/AZURE_API_BASE
credential_info:
description: "Default Azure credential"

View file

@ -1994,13 +1994,14 @@ class ProxyException(Exception):
message: str, message: str,
type: str, type: str,
param: Optional[str], param: Optional[str],
code: Optional[Union[int, str]] = None, code: Optional[Union[int, str]] = None, # maps to status code
headers: Optional[Dict[str, str]] = None, headers: Optional[Dict[str, str]] = None,
openai_code: Optional[str] = None, # maps to 'code' in openai
): ):
self.message = str(message) self.message = str(message)
self.type = type self.type = type
self.param = param self.param = param
self.openai_code = openai_code or code
# If we look on official python OpenAI lib, the code should be a string: # If we look on official python OpenAI lib, the code should be a string:
# https://github.com/openai/openai-python/blob/195c05a64d39c87b2dfdf1eca2d339597f1fce03/src/openai/types/shared/error_object.py#L11 # https://github.com/openai/openai-python/blob/195c05a64d39c87b2dfdf1eca2d339597f1fce03/src/openai/types/shared/error_object.py#L11
# Related LiteLLM issue: https://github.com/BerriAI/litellm/discussions/4834 # Related LiteLLM issue: https://github.com/BerriAI/litellm/discussions/4834
@ -2054,6 +2055,7 @@ class ProxyErrorTypes(str, enum.Enum):
budget_exceeded = "budget_exceeded" budget_exceeded = "budget_exceeded"
key_model_access_denied = "key_model_access_denied" key_model_access_denied = "key_model_access_denied"
team_model_access_denied = "team_model_access_denied" team_model_access_denied = "team_model_access_denied"
user_model_access_denied = "user_model_access_denied"
expired_key = "expired_key" expired_key = "expired_key"
auth_error = "auth_error" auth_error = "auth_error"
internal_server_error = "internal_server_error" internal_server_error = "internal_server_error"
@ -2062,6 +2064,20 @@ class ProxyErrorTypes(str, enum.Enum):
validation_error = "bad_request_error" validation_error = "bad_request_error"
cache_ping_error = "cache_ping_error" cache_ping_error = "cache_ping_error"
@classmethod
def get_model_access_error_type_for_object(
cls, object_type: Literal["key", "user", "team"]
) -> "ProxyErrorTypes":
"""
Get the model access error type for object_type
"""
if object_type == "key":
return cls.key_model_access_denied
elif object_type == "team":
return cls.team_model_access_denied
elif object_type == "user":
return cls.user_model_access_denied
DB_CONNECTION_ERROR_TYPES = (httpx.ConnectError, httpx.ReadError, httpx.ReadTimeout) DB_CONNECTION_ERROR_TYPES = (httpx.ConnectError, httpx.ReadError, httpx.ReadTimeout)
@ -2283,6 +2299,7 @@ class SpecialHeaders(enum.Enum):
azure_authorization = "API-Key" azure_authorization = "API-Key"
anthropic_authorization = "x-api-key" anthropic_authorization = "x-api-key"
google_ai_studio_authorization = "x-goog-api-key" google_ai_studio_authorization = "x-goog-api-key"
azure_apim_authorization = "Ocp-Apim-Subscription-Key"
class LitellmDataForBackendLLMCall(TypedDict, total=False): class LitellmDataForBackendLLMCall(TypedDict, total=False):

View file

@ -98,11 +98,18 @@ async def common_checks(
) )
# 2. If team can call model # 2. If team can call model
_team_model_access_check( if _model and team_object:
team_object=team_object, if not await can_team_access_model(
model=_model, model=_model,
team_object=team_object,
llm_router=llm_router, llm_router=llm_router,
team_model_aliases=valid_token.team_model_aliases if valid_token else None, team_model_aliases=valid_token.team_model_aliases if valid_token else None,
):
raise ProxyException(
message=f"Team not allowed to access model. Team={team_object.team_id}, Model={_model}. Allowed team models = {team_object.models}",
type=ProxyErrorTypes.team_model_access_denied,
param="model",
code=status.HTTP_401_UNAUTHORIZED,
) )
## 2.1 If user can call model (if personal key) ## 2.1 If user can call model (if personal key)
@ -971,10 +978,18 @@ async def _can_object_call_model(
llm_router: Optional[Router], llm_router: Optional[Router],
models: List[str], models: List[str],
team_model_aliases: Optional[Dict[str, str]] = None, team_model_aliases: Optional[Dict[str, str]] = None,
object_type: Literal["user", "team", "key"] = "user",
) -> Literal[True]: ) -> Literal[True]:
""" """
Checks if token can call a given model Checks if token can call a given model
Args:
- model: str
- llm_router: Optional[Router]
- models: List[str]
- team_model_aliases: Optional[Dict[str, str]]
- object_type: Literal["user", "team", "key"]. We use the object type to raise the correct exception type
Returns: Returns:
- True: if token allowed to call model - True: if token allowed to call model
@ -1018,10 +1033,15 @@ async def _can_object_call_model(
if (len(filtered_models) == 0 and len(models) == 0) or "*" in filtered_models: if (len(filtered_models) == 0 and len(models) == 0) or "*" in filtered_models:
all_model_access = True all_model_access = True
if SpecialModelNames.all_proxy_models.value in filtered_models:
all_model_access = True
if model is not None and model not in filtered_models and all_model_access is False: if model is not None and model not in filtered_models and all_model_access is False:
raise ProxyException( raise ProxyException(
message=f"API Key not allowed to access model. This token can only access models={models}. Tried to access {model}", message=f"{object_type} not allowed to access model. This {object_type} can only access models={models}. Tried to access {model}",
type=ProxyErrorTypes.key_model_access_denied, type=ProxyErrorTypes.get_model_access_error_type_for_object(
object_type=object_type
),
param="model", param="model",
code=status.HTTP_401_UNAUTHORIZED, code=status.HTTP_401_UNAUTHORIZED,
) )
@ -1072,6 +1092,26 @@ async def can_key_call_model(
llm_router=llm_router, llm_router=llm_router,
models=valid_token.models, models=valid_token.models,
team_model_aliases=valid_token.team_model_aliases, team_model_aliases=valid_token.team_model_aliases,
object_type="key",
)
async def can_team_access_model(
model: str,
team_object: Optional[LiteLLM_TeamTable],
llm_router: Optional[Router],
team_model_aliases: Optional[Dict[str, str]] = None,
) -> Literal[True]:
"""
Returns True if the team can access a specific model.
"""
return await _can_object_call_model(
model=model,
llm_router=llm_router,
models=team_object.models if team_object else [],
team_model_aliases=team_model_aliases,
object_type="team",
) )
@ -1096,6 +1136,7 @@ async def can_user_call_model(
model=model, model=model,
llm_router=llm_router, llm_router=llm_router,
models=user_object.models, models=user_object.models,
object_type="user",
) )
@ -1248,53 +1289,6 @@ async def _team_max_budget_check(
) )
def _team_model_access_check(
model: Optional[str],
team_object: Optional[LiteLLM_TeamTable],
llm_router: Optional[Router],
team_model_aliases: Optional[Dict[str, str]] = None,
):
"""
Access check for team models
Raises:
Exception if the team is not allowed to call the`model`
"""
if (
model is not None
and team_object is not None
and team_object.models is not None
and len(team_object.models) > 0
and model not in team_object.models
):
# this means the team has access to all models on the proxy
if "all-proxy-models" in team_object.models or "*" in team_object.models:
# this means the team has access to all models on the proxy
pass
# check if the team model is an access_group
elif (
model_in_access_group(
model=model, team_models=team_object.models, llm_router=llm_router
)
is True
):
pass
elif model and "*" in model:
pass
elif _model_in_team_aliases(model=model, team_model_aliases=team_model_aliases):
pass
elif _model_matches_any_wildcard_pattern_in_list(
model=model, allowed_model_list=team_object.models
):
pass
else:
raise ProxyException(
message=f"Team not allowed to access model. Team={team_object.team_id}, Model={model}. Allowed team models = {team_object.models}",
type=ProxyErrorTypes.team_model_access_denied,
param="model",
code=status.HTTP_401_UNAUTHORIZED,
)
def is_model_allowed_by_pattern(model: str, allowed_model_pattern: str) -> bool: def is_model_allowed_by_pattern(model: str, allowed_model_pattern: str) -> bool:
""" """
Check if a model matches an allowed pattern. Check if a model matches an allowed pattern.

View file

@ -33,6 +33,7 @@ from litellm.proxy._types import (
ScopeMapping, ScopeMapping,
Span, Span,
) )
from litellm.proxy.auth.auth_checks import can_team_access_model
from litellm.proxy.utils import PrismaClient, ProxyLogging from litellm.proxy.utils import PrismaClient, ProxyLogging
from .auth_checks import ( from .auth_checks import (
@ -344,11 +345,16 @@ class JWTHandler:
if keys_url is None: if keys_url is None:
raise Exception("Missing JWT Public Key URL from environment.") raise Exception("Missing JWT Public Key URL from environment.")
cached_keys = await self.user_api_key_cache.async_get_cache( keys_url_list = [url.strip() for url in keys_url.split(",")]
"litellm_jwt_auth_keys"
) for key_url in keys_url_list:
cache_key = f"litellm_jwt_auth_keys_{key_url}"
cached_keys = await self.user_api_key_cache.async_get_cache(cache_key)
if cached_keys is None: if cached_keys is None:
response = await self.http_handler.get(keys_url) response = await self.http_handler.get(key_url)
response_json = response.json() response_json = response.json()
if "keys" in response_json: if "keys" in response_json:
@ -357,7 +363,7 @@ class JWTHandler:
keys = response_json keys = response_json
await self.user_api_key_cache.async_set_cache( await self.user_api_key_cache.async_set_cache(
key="litellm_jwt_auth_keys", key=cache_key,
value=keys, value=keys,
ttl=self.litellm_jwtauth.public_key_ttl, # cache for 10 mins ttl=self.litellm_jwtauth.public_key_ttl, # cache for 10 mins
) )
@ -365,12 +371,13 @@ class JWTHandler:
keys = cached_keys keys = cached_keys
public_key = self.parse_keys(keys=keys, kid=kid) public_key = self.parse_keys(keys=keys, kid=kid)
if public_key is None: if public_key is not None:
raise Exception(
f"No matching public key found. kid={kid}, keys_url={keys_url}, cached_keys={cached_keys}, len(keys)={len(keys)}"
)
return cast(dict, public_key) return cast(dict, public_key)
raise Exception(
f"No matching public key found. keys={keys_url_list}, kid={kid}"
)
def parse_keys(self, keys: JWKKeyValue, kid: Optional[str]) -> Optional[JWTKeyItem]: def parse_keys(self, keys: JWKKeyValue, kid: Optional[str]) -> Optional[JWTKeyItem]:
public_key: Optional[JWTKeyItem] = None public_key: Optional[JWTKeyItem] = None
if len(keys) == 1: if len(keys) == 1:
@ -723,8 +730,12 @@ class JWTAuthManager:
team_models = team_object.models team_models = team_object.models
if isinstance(team_models, list) and ( if isinstance(team_models, list) and (
not requested_model not requested_model
or requested_model in team_models or can_team_access_model(
or "*" in team_models model=requested_model,
team_object=team_object,
llm_router=None,
team_model_aliases=None,
)
): ):
is_allowed = allowed_routes_check( is_allowed = allowed_routes_check(
user_role=LitellmUserRoles.TEAM, user_role=LitellmUserRoles.TEAM,

View file

@ -77,6 +77,11 @@ google_ai_studio_api_key_header = APIKeyHeader(
auto_error=False, auto_error=False,
description="If google ai studio client used.", description="If google ai studio client used.",
) )
azure_apim_header = APIKeyHeader(
name=SpecialHeaders.azure_apim_authorization.value,
auto_error=False,
description="The default name of the subscription key header of Azure",
)
def _get_bearer_token( def _get_bearer_token(
@ -301,6 +306,7 @@ async def _user_api_key_auth_builder( # noqa: PLR0915
azure_api_key_header: str, azure_api_key_header: str,
anthropic_api_key_header: Optional[str], anthropic_api_key_header: Optional[str],
google_ai_studio_api_key_header: Optional[str], google_ai_studio_api_key_header: Optional[str],
azure_apim_header: Optional[str],
request_data: dict, request_data: dict,
) -> UserAPIKeyAuth: ) -> UserAPIKeyAuth:
@ -344,6 +350,8 @@ async def _user_api_key_auth_builder( # noqa: PLR0915
api_key = anthropic_api_key_header api_key = anthropic_api_key_header
elif isinstance(google_ai_studio_api_key_header, str): elif isinstance(google_ai_studio_api_key_header, str):
api_key = google_ai_studio_api_key_header api_key = google_ai_studio_api_key_header
elif isinstance(azure_apim_header, str):
api_key = azure_apim_header
elif pass_through_endpoints is not None: elif pass_through_endpoints is not None:
for endpoint in pass_through_endpoints: for endpoint in pass_through_endpoints:
if endpoint.get("path", "") == route: if endpoint.get("path", "") == route:
@ -1165,6 +1173,7 @@ async def user_api_key_auth(
google_ai_studio_api_key_header: Optional[str] = fastapi.Security( google_ai_studio_api_key_header: Optional[str] = fastapi.Security(
google_ai_studio_api_key_header google_ai_studio_api_key_header
), ),
azure_apim_header: Optional[str] = fastapi.Security(azure_apim_header),
) -> UserAPIKeyAuth: ) -> UserAPIKeyAuth:
""" """
Parent function to authenticate user api key / jwt token. Parent function to authenticate user api key / jwt token.
@ -1178,6 +1187,7 @@ async def user_api_key_auth(
azure_api_key_header=azure_api_key_header, azure_api_key_header=azure_api_key_header,
anthropic_api_key_header=anthropic_api_key_header, anthropic_api_key_header=anthropic_api_key_header,
google_ai_studio_api_key_header=google_ai_studio_api_key_header, google_ai_studio_api_key_header=google_ai_studio_api_key_header,
azure_apim_header=azure_apim_header,
request_data=request_data, request_data=request_data,
) )

View file

@ -365,6 +365,8 @@ async def user_info(
and user_api_key_dict.user_role == LitellmUserRoles.PROXY_ADMIN and user_api_key_dict.user_role == LitellmUserRoles.PROXY_ADMIN
): ):
return await _get_user_info_for_proxy_admin() return await _get_user_info_for_proxy_admin()
elif user_id is None:
user_id = user_api_key_dict.user_id
## GET USER ROW ## ## GET USER ROW ##
if user_id is not None: if user_id is not None:
user_info = await prisma_client.get_data(user_id=user_id) user_info = await prisma_client.get_data(user_id=user_id)
@ -373,10 +375,6 @@ async def user_info(
## GET ALL TEAMS ## ## GET ALL TEAMS ##
team_list = [] team_list = []
team_id_list = [] team_id_list = []
# get all teams user belongs to
# teams_1 = await prisma_client.get_data(
# user_id=user_id, table_name="team", query_type="find_all"
# )
from litellm.proxy.management_endpoints.team_endpoints import list_team from litellm.proxy.management_endpoints.team_endpoints import list_team
teams_1 = await list_team( teams_1 = await list_team(

View file

@ -3,8 +3,8 @@ import asyncio
import json import json
from base64 import b64encode from base64 import b64encode
from datetime import datetime from datetime import datetime
from typing import List, Optional from typing import Dict, List, Optional, Union
from urllib.parse import urlparse from urllib.parse import parse_qs, urlencode, urlparse
import httpx import httpx
from fastapi import APIRouter, Depends, HTTPException, Request, Response, status from fastapi import APIRouter, Depends, HTTPException, Request, Response, status
@ -307,6 +307,21 @@ class HttpPassThroughEndpointHelpers:
return EndpointType.ANTHROPIC return EndpointType.ANTHROPIC
return EndpointType.GENERIC return EndpointType.GENERIC
@staticmethod
def get_merged_query_parameters(
existing_url: httpx.URL, request_query_params: Dict[str, Union[str, list]]
) -> Dict[str, Union[str, List[str]]]:
# Get the existing query params from the target URL
existing_query_string = existing_url.query.decode("utf-8")
existing_query_params = parse_qs(existing_query_string)
# parse_qs returns a dict where each value is a list, so let's flatten it
updated_existing_query_params = {
k: v[0] if len(v) == 1 else v for k, v in existing_query_params.items()
}
# Merge the query params, giving priority to the existing ones
return {**request_query_params, **updated_existing_query_params}
@staticmethod @staticmethod
async def _make_non_streaming_http_request( async def _make_non_streaming_http_request(
request: Request, request: Request,
@ -346,6 +361,7 @@ async def pass_through_request( # noqa: PLR0915
user_api_key_dict: UserAPIKeyAuth, user_api_key_dict: UserAPIKeyAuth,
custom_body: Optional[dict] = None, custom_body: Optional[dict] = None,
forward_headers: Optional[bool] = False, forward_headers: Optional[bool] = False,
merge_query_params: Optional[bool] = False,
query_params: Optional[dict] = None, query_params: Optional[dict] = None,
stream: Optional[bool] = None, stream: Optional[bool] = None,
): ):
@ -361,6 +377,18 @@ async def pass_through_request( # noqa: PLR0915
request=request, headers=headers, forward_headers=forward_headers request=request, headers=headers, forward_headers=forward_headers
) )
if merge_query_params:
# Create a new URL with the merged query params
url = url.copy_with(
query=urlencode(
HttpPassThroughEndpointHelpers.get_merged_query_parameters(
existing_url=url,
request_query_params=dict(request.query_params),
)
).encode("ascii")
)
endpoint_type: EndpointType = HttpPassThroughEndpointHelpers.get_endpoint_type( endpoint_type: EndpointType = HttpPassThroughEndpointHelpers.get_endpoint_type(
str(url) str(url)
) )
@ -657,6 +685,7 @@ def create_pass_through_route(
target: str, target: str,
custom_headers: Optional[dict] = None, custom_headers: Optional[dict] = None,
_forward_headers: Optional[bool] = False, _forward_headers: Optional[bool] = False,
_merge_query_params: Optional[bool] = False,
dependencies: Optional[List] = None, dependencies: Optional[List] = None,
): ):
# check if target is an adapter.py or a url # check if target is an adapter.py or a url
@ -703,6 +732,7 @@ def create_pass_through_route(
custom_headers=custom_headers or {}, custom_headers=custom_headers or {},
user_api_key_dict=user_api_key_dict, user_api_key_dict=user_api_key_dict,
forward_headers=_forward_headers, forward_headers=_forward_headers,
merge_query_params=_merge_query_params,
query_params=query_params, query_params=query_params,
stream=stream, stream=stream,
custom_body=custom_body, custom_body=custom_body,
@ -732,6 +762,7 @@ async def initialize_pass_through_endpoints(pass_through_endpoints: list):
custom_headers=_custom_headers custom_headers=_custom_headers
) )
_forward_headers = endpoint.get("forward_headers", None) _forward_headers = endpoint.get("forward_headers", None)
_merge_query_params = endpoint.get("merge_query_params", None)
_auth = endpoint.get("auth", None) _auth = endpoint.get("auth", None)
_dependencies = None _dependencies = None
if _auth is not None and str(_auth).lower() == "true": if _auth is not None and str(_auth).lower() == "true":
@ -753,7 +784,12 @@ async def initialize_pass_through_endpoints(pass_through_endpoints: list):
app.add_api_route( # type: ignore app.add_api_route( # type: ignore
path=_path, path=_path,
endpoint=create_pass_through_route( # type: ignore endpoint=create_pass_through_route( # type: ignore
_path, _target, _custom_headers, _forward_headers, _dependencies _path,
_target,
_custom_headers,
_forward_headers,
_merge_query_params,
_dependencies,
), ),
methods=["GET", "POST", "PUT", "DELETE", "PATCH"], methods=["GET", "POST", "PUT", "DELETE", "PATCH"],
dependencies=_dependencies, dependencies=_dependencies,

View file

@ -949,7 +949,9 @@ def _set_spend_logs_payload(
spend_logs_url: Optional[str] = None, spend_logs_url: Optional[str] = None,
): ):
verbose_proxy_logger.info( verbose_proxy_logger.info(
"Writing spend log to db - request_id: {}".format(payload.get("request_id")) "Writing spend log to db - request_id: {}, spend: {}".format(
payload.get("request_id"), payload.get("spend")
)
) )
if prisma_client is not None and spend_logs_url is not None: if prisma_client is not None and spend_logs_url is not None:
if isinstance(payload["startTime"], datetime): if isinstance(payload["startTime"], datetime):
@ -3759,6 +3761,7 @@ async def chat_completion( # noqa: PLR0915
message=getattr(e, "message", error_msg), message=getattr(e, "message", error_msg),
type=getattr(e, "type", "None"), type=getattr(e, "type", "None"),
param=getattr(e, "param", "None"), param=getattr(e, "param", "None"),
openai_code=getattr(e, "code", None),
code=getattr(e, "status_code", 500), code=getattr(e, "status_code", 500),
headers=headers, headers=headers,
) )
@ -3972,6 +3975,7 @@ async def completion( # noqa: PLR0915
message=getattr(e, "message", error_msg), message=getattr(e, "message", error_msg),
type=getattr(e, "type", "None"), type=getattr(e, "type", "None"),
param=getattr(e, "param", "None"), param=getattr(e, "param", "None"),
openai_code=getattr(e, "code", None),
code=getattr(e, "status_code", 500), code=getattr(e, "status_code", 500),
) )
@ -4181,6 +4185,7 @@ async def embeddings( # noqa: PLR0915
message=getattr(e, "message", error_msg), message=getattr(e, "message", error_msg),
type=getattr(e, "type", "None"), type=getattr(e, "type", "None"),
param=getattr(e, "param", "None"), param=getattr(e, "param", "None"),
openai_code=getattr(e, "code", None),
code=getattr(e, "status_code", 500), code=getattr(e, "status_code", 500),
) )
@ -4300,6 +4305,7 @@ async def image_generation(
message=getattr(e, "message", error_msg), message=getattr(e, "message", error_msg),
type=getattr(e, "type", "None"), type=getattr(e, "type", "None"),
param=getattr(e, "param", "None"), param=getattr(e, "param", "None"),
openai_code=getattr(e, "code", None),
code=getattr(e, "status_code", 500), code=getattr(e, "status_code", 500),
) )
@ -4561,6 +4567,7 @@ async def audio_transcriptions(
message=getattr(e, "message", error_msg), message=getattr(e, "message", error_msg),
type=getattr(e, "type", "None"), type=getattr(e, "type", "None"),
param=getattr(e, "param", "None"), param=getattr(e, "param", "None"),
openai_code=getattr(e, "code", None),
code=getattr(e, "status_code", 500), code=getattr(e, "status_code", 500),
) )
@ -4710,6 +4717,7 @@ async def get_assistants(
message=getattr(e, "message", error_msg), message=getattr(e, "message", error_msg),
type=getattr(e, "type", "None"), type=getattr(e, "type", "None"),
param=getattr(e, "param", "None"), param=getattr(e, "param", "None"),
openai_code=getattr(e, "code", None),
code=getattr(e, "status_code", 500), code=getattr(e, "status_code", 500),
) )
@ -4808,7 +4816,7 @@ async def create_assistant(
message=getattr(e, "message", error_msg), message=getattr(e, "message", error_msg),
type=getattr(e, "type", "None"), type=getattr(e, "type", "None"),
param=getattr(e, "param", "None"), param=getattr(e, "param", "None"),
code=getattr(e, "status_code", 500), code=getattr(e, "code", getattr(e, "status_code", 500)),
) )
@ -4905,7 +4913,7 @@ async def delete_assistant(
message=getattr(e, "message", error_msg), message=getattr(e, "message", error_msg),
type=getattr(e, "type", "None"), type=getattr(e, "type", "None"),
param=getattr(e, "param", "None"), param=getattr(e, "param", "None"),
code=getattr(e, "status_code", 500), code=getattr(e, "code", getattr(e, "status_code", 500)),
) )
@ -5002,7 +5010,7 @@ async def create_threads(
message=getattr(e, "message", error_msg), message=getattr(e, "message", error_msg),
type=getattr(e, "type", "None"), type=getattr(e, "type", "None"),
param=getattr(e, "param", "None"), param=getattr(e, "param", "None"),
code=getattr(e, "status_code", 500), code=getattr(e, "code", getattr(e, "status_code", 500)),
) )
@ -5098,7 +5106,7 @@ async def get_thread(
message=getattr(e, "message", error_msg), message=getattr(e, "message", error_msg),
type=getattr(e, "type", "None"), type=getattr(e, "type", "None"),
param=getattr(e, "param", "None"), param=getattr(e, "param", "None"),
code=getattr(e, "status_code", 500), code=getattr(e, "code", getattr(e, "status_code", 500)),
) )
@ -5197,7 +5205,7 @@ async def add_messages(
message=getattr(e, "message", error_msg), message=getattr(e, "message", error_msg),
type=getattr(e, "type", "None"), type=getattr(e, "type", "None"),
param=getattr(e, "param", "None"), param=getattr(e, "param", "None"),
code=getattr(e, "status_code", 500), code=getattr(e, "code", getattr(e, "status_code", 500)),
) )
@ -5292,7 +5300,7 @@ async def get_messages(
message=getattr(e, "message", error_msg), message=getattr(e, "message", error_msg),
type=getattr(e, "type", "None"), type=getattr(e, "type", "None"),
param=getattr(e, "param", "None"), param=getattr(e, "param", "None"),
code=getattr(e, "status_code", 500), code=getattr(e, "code", getattr(e, "status_code", 500)),
) )
@ -5401,7 +5409,7 @@ async def run_thread(
message=getattr(e, "message", error_msg), message=getattr(e, "message", error_msg),
type=getattr(e, "type", "None"), type=getattr(e, "type", "None"),
param=getattr(e, "param", "None"), param=getattr(e, "param", "None"),
code=getattr(e, "status_code", 500), code=getattr(e, "code", getattr(e, "status_code", 500)),
) )

217
litellm/responses/main.py Normal file
View file

@ -0,0 +1,217 @@
import asyncio
import contextvars
from functools import partial
from typing import Any, Dict, Iterable, List, Literal, Optional, Union
import httpx
import litellm
from litellm.constants import request_timeout
from litellm.litellm_core_utils.litellm_logging import Logging as LiteLLMLoggingObj
from litellm.llms.base_llm.responses.transformation import BaseResponsesAPIConfig
from litellm.llms.custom_httpx.llm_http_handler import BaseLLMHTTPHandler
from litellm.responses.utils import ResponsesAPIRequestUtils
from litellm.types.llms.openai import (
Reasoning,
ResponseIncludable,
ResponseInputParam,
ResponsesAPIOptionalRequestParams,
ResponsesAPIResponse,
ResponseTextConfigParam,
ToolChoice,
ToolParam,
)
from litellm.types.router import GenericLiteLLMParams
from litellm.utils import ProviderConfigManager, client
from .streaming_iterator import BaseResponsesAPIStreamingIterator
####### ENVIRONMENT VARIABLES ###################
# Initialize any necessary instances or variables here
base_llm_http_handler = BaseLLMHTTPHandler()
#################################################
@client
async def aresponses(
input: Union[str, ResponseInputParam],
model: str,
include: Optional[List[ResponseIncludable]] = None,
instructions: Optional[str] = None,
max_output_tokens: Optional[int] = None,
metadata: Optional[Dict[str, Any]] = None,
parallel_tool_calls: Optional[bool] = None,
previous_response_id: Optional[str] = None,
reasoning: Optional[Reasoning] = None,
store: Optional[bool] = None,
stream: Optional[bool] = None,
temperature: Optional[float] = None,
text: Optional[ResponseTextConfigParam] = None,
tool_choice: Optional[ToolChoice] = None,
tools: Optional[Iterable[ToolParam]] = None,
top_p: Optional[float] = None,
truncation: Optional[Literal["auto", "disabled"]] = None,
user: Optional[str] = None,
# Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
# The extra values given here take precedence over values defined on the client or passed to this method.
extra_headers: Optional[Dict[str, Any]] = None,
extra_query: Optional[Dict[str, Any]] = None,
extra_body: Optional[Dict[str, Any]] = None,
timeout: Optional[Union[float, httpx.Timeout]] = None,
**kwargs,
) -> Union[ResponsesAPIResponse, BaseResponsesAPIStreamingIterator]:
"""
Async: Handles responses API requests by reusing the synchronous function
"""
try:
loop = asyncio.get_event_loop()
kwargs["aresponses"] = True
func = partial(
responses,
input=input,
model=model,
include=include,
instructions=instructions,
max_output_tokens=max_output_tokens,
metadata=metadata,
parallel_tool_calls=parallel_tool_calls,
previous_response_id=previous_response_id,
reasoning=reasoning,
store=store,
stream=stream,
temperature=temperature,
text=text,
tool_choice=tool_choice,
tools=tools,
top_p=top_p,
truncation=truncation,
user=user,
extra_headers=extra_headers,
extra_query=extra_query,
extra_body=extra_body,
timeout=timeout,
**kwargs,
)
ctx = contextvars.copy_context()
func_with_context = partial(ctx.run, func)
init_response = await loop.run_in_executor(None, func_with_context)
if asyncio.iscoroutine(init_response):
response = await init_response
else:
response = init_response
return response
except Exception as e:
raise e
@client
def responses(
input: Union[str, ResponseInputParam],
model: str,
include: Optional[List[ResponseIncludable]] = None,
instructions: Optional[str] = None,
max_output_tokens: Optional[int] = None,
metadata: Optional[Dict[str, Any]] = None,
parallel_tool_calls: Optional[bool] = None,
previous_response_id: Optional[str] = None,
reasoning: Optional[Reasoning] = None,
store: Optional[bool] = None,
stream: Optional[bool] = None,
temperature: Optional[float] = None,
text: Optional[ResponseTextConfigParam] = None,
tool_choice: Optional[ToolChoice] = None,
tools: Optional[Iterable[ToolParam]] = None,
top_p: Optional[float] = None,
truncation: Optional[Literal["auto", "disabled"]] = None,
user: Optional[str] = None,
# Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
# The extra values given here take precedence over values defined on the client or passed to this method.
extra_headers: Optional[Dict[str, Any]] = None,
extra_query: Optional[Dict[str, Any]] = None,
extra_body: Optional[Dict[str, Any]] = None,
timeout: Optional[Union[float, httpx.Timeout]] = None,
**kwargs,
):
"""
Synchronous version of the Responses API.
Uses the synchronous HTTP handler to make requests.
"""
litellm_logging_obj: LiteLLMLoggingObj = kwargs.get("litellm_logging_obj") # type: ignore
litellm_call_id: Optional[str] = kwargs.get("litellm_call_id", None)
_is_async = kwargs.pop("aresponses", False) is True
# get llm provider logic
litellm_params = GenericLiteLLMParams(**kwargs)
model, custom_llm_provider, dynamic_api_key, dynamic_api_base = (
litellm.get_llm_provider(
model=model,
custom_llm_provider=kwargs.get("custom_llm_provider", None),
api_base=litellm_params.api_base,
api_key=litellm_params.api_key,
)
)
# get provider config
responses_api_provider_config: Optional[BaseResponsesAPIConfig] = (
ProviderConfigManager.get_provider_responses_api_config(
model=model,
provider=litellm.LlmProviders(custom_llm_provider),
)
)
if responses_api_provider_config is None:
raise litellm.BadRequestError(
model=model,
llm_provider=custom_llm_provider,
message=f"Responses API not available for custom_llm_provider={custom_llm_provider}, model: {model}",
)
# Get all parameters using locals() and combine with kwargs
local_vars = locals()
local_vars.update(kwargs)
# Get ResponsesAPIOptionalRequestParams with only valid parameters
response_api_optional_params: ResponsesAPIOptionalRequestParams = (
ResponsesAPIRequestUtils.get_requested_response_api_optional_param(local_vars)
)
# Get optional parameters for the responses API
responses_api_request_params: Dict = (
ResponsesAPIRequestUtils.get_optional_params_responses_api(
model=model,
responses_api_provider_config=responses_api_provider_config,
response_api_optional_params=response_api_optional_params,
)
)
# Pre Call logging
litellm_logging_obj.update_environment_variables(
model=model,
user=user,
optional_params=dict(responses_api_request_params),
litellm_params={
"litellm_call_id": litellm_call_id,
**responses_api_request_params,
},
custom_llm_provider=custom_llm_provider,
)
# Call the handler with _is_async flag instead of directly calling the async handler
response = base_llm_http_handler.response_api_handler(
model=model,
input=input,
responses_api_provider_config=responses_api_provider_config,
response_api_optional_request_params=responses_api_request_params,
custom_llm_provider=custom_llm_provider,
litellm_params=litellm_params,
logging_obj=litellm_logging_obj,
extra_headers=extra_headers,
extra_body=extra_body,
timeout=timeout or request_timeout,
_is_async=_is_async,
client=kwargs.get("client"),
)
return response

View file

@ -0,0 +1,209 @@
import asyncio
import json
from datetime import datetime
from typing import Optional
import httpx
from litellm.constants import STREAM_SSE_DONE_STRING
from litellm.litellm_core_utils.asyncify import run_async_function
from litellm.litellm_core_utils.litellm_logging import Logging as LiteLLMLoggingObj
from litellm.litellm_core_utils.thread_pool_executor import executor
from litellm.llms.base_llm.responses.transformation import BaseResponsesAPIConfig
from litellm.types.llms.openai import (
ResponsesAPIStreamEvents,
ResponsesAPIStreamingResponse,
)
from litellm.utils import CustomStreamWrapper
class BaseResponsesAPIStreamingIterator:
"""
Base class for streaming iterators that process responses from the Responses API.
This class contains shared logic for both synchronous and asynchronous iterators.
"""
def __init__(
self,
response: httpx.Response,
model: str,
responses_api_provider_config: BaseResponsesAPIConfig,
logging_obj: LiteLLMLoggingObj,
):
self.response = response
self.model = model
self.logging_obj = logging_obj
self.finished = False
self.responses_api_provider_config = responses_api_provider_config
self.completed_response: Optional[ResponsesAPIStreamingResponse] = None
self.start_time = datetime.now()
def _process_chunk(self, chunk):
"""Process a single chunk of data from the stream"""
if not chunk:
return None
# Handle SSE format (data: {...})
chunk = CustomStreamWrapper._strip_sse_data_from_chunk(chunk)
if chunk is None:
return None
# Handle "[DONE]" marker
if chunk == STREAM_SSE_DONE_STRING:
self.finished = True
return None
try:
# Parse the JSON chunk
parsed_chunk = json.loads(chunk)
# Format as ResponsesAPIStreamingResponse
if isinstance(parsed_chunk, dict):
openai_responses_api_chunk = (
self.responses_api_provider_config.transform_streaming_response(
model=self.model,
parsed_chunk=parsed_chunk,
logging_obj=self.logging_obj,
)
)
# Store the completed response
if (
openai_responses_api_chunk
and openai_responses_api_chunk.type
== ResponsesAPIStreamEvents.RESPONSE_COMPLETED
):
self.completed_response = openai_responses_api_chunk
self._handle_logging_completed_response()
return openai_responses_api_chunk
return None
except json.JSONDecodeError:
# If we can't parse the chunk, continue
return None
def _handle_logging_completed_response(self):
"""Base implementation - should be overridden by subclasses"""
pass
class ResponsesAPIStreamingIterator(BaseResponsesAPIStreamingIterator):
"""
Async iterator for processing streaming responses from the Responses API.
"""
def __init__(
self,
response: httpx.Response,
model: str,
responses_api_provider_config: BaseResponsesAPIConfig,
logging_obj: LiteLLMLoggingObj,
):
super().__init__(response, model, responses_api_provider_config, logging_obj)
self.stream_iterator = response.aiter_lines()
def __aiter__(self):
return self
async def __anext__(self) -> ResponsesAPIStreamingResponse:
try:
while True:
# Get the next chunk from the stream
try:
chunk = await self.stream_iterator.__anext__()
except StopAsyncIteration:
self.finished = True
raise StopAsyncIteration
result = self._process_chunk(chunk)
if self.finished:
raise StopAsyncIteration
elif result is not None:
return result
# If result is None, continue the loop to get the next chunk
except httpx.HTTPError as e:
# Handle HTTP errors
self.finished = True
raise e
def _handle_logging_completed_response(self):
"""Handle logging for completed responses in async context"""
asyncio.create_task(
self.logging_obj.async_success_handler(
result=self.completed_response,
start_time=self.start_time,
end_time=datetime.now(),
cache_hit=None,
)
)
executor.submit(
self.logging_obj.success_handler,
result=self.completed_response,
cache_hit=None,
start_time=self.start_time,
end_time=datetime.now(),
)
class SyncResponsesAPIStreamingIterator(BaseResponsesAPIStreamingIterator):
"""
Synchronous iterator for processing streaming responses from the Responses API.
"""
def __init__(
self,
response: httpx.Response,
model: str,
responses_api_provider_config: BaseResponsesAPIConfig,
logging_obj: LiteLLMLoggingObj,
):
super().__init__(response, model, responses_api_provider_config, logging_obj)
self.stream_iterator = response.iter_lines()
def __iter__(self):
return self
def __next__(self):
try:
while True:
# Get the next chunk from the stream
try:
chunk = next(self.stream_iterator)
except StopIteration:
self.finished = True
raise StopIteration
result = self._process_chunk(chunk)
if self.finished:
raise StopIteration
elif result is not None:
return result
# If result is None, continue the loop to get the next chunk
except httpx.HTTPError as e:
# Handle HTTP errors
self.finished = True
raise e
def _handle_logging_completed_response(self):
"""Handle logging for completed responses in sync context"""
run_async_function(
async_function=self.logging_obj.async_success_handler,
result=self.completed_response,
start_time=self.start_time,
end_time=datetime.now(),
cache_hit=None,
)
executor.submit(
self.logging_obj.success_handler,
result=self.completed_response,
cache_hit=None,
start_time=self.start_time,
end_time=datetime.now(),
)

View file

@ -0,0 +1,97 @@
from typing import Any, Dict, cast, get_type_hints
import litellm
from litellm.llms.base_llm.responses.transformation import BaseResponsesAPIConfig
from litellm.types.llms.openai import (
ResponseAPIUsage,
ResponsesAPIOptionalRequestParams,
)
from litellm.types.utils import Usage
class ResponsesAPIRequestUtils:
"""Helper utils for constructing ResponseAPI requests"""
@staticmethod
def get_optional_params_responses_api(
model: str,
responses_api_provider_config: BaseResponsesAPIConfig,
response_api_optional_params: ResponsesAPIOptionalRequestParams,
) -> Dict:
"""
Get optional parameters for the responses API.
Args:
params: Dictionary of all parameters
model: The model name
responses_api_provider_config: The provider configuration for responses API
Returns:
A dictionary of supported parameters for the responses API
"""
# Remove None values and internal parameters
# Get supported parameters for the model
supported_params = responses_api_provider_config.get_supported_openai_params(
model
)
# Check for unsupported parameters
unsupported_params = [
param
for param in response_api_optional_params
if param not in supported_params
]
if unsupported_params:
raise litellm.UnsupportedParamsError(
model=model,
message=f"The following parameters are not supported for model {model}: {', '.join(unsupported_params)}",
)
# Map parameters to provider-specific format
mapped_params = responses_api_provider_config.map_openai_params(
response_api_optional_params=response_api_optional_params,
model=model,
drop_params=litellm.drop_params,
)
return mapped_params
@staticmethod
def get_requested_response_api_optional_param(
params: Dict[str, Any]
) -> ResponsesAPIOptionalRequestParams:
"""
Filter parameters to only include those defined in ResponsesAPIOptionalRequestParams.
Args:
params: Dictionary of parameters to filter
Returns:
ResponsesAPIOptionalRequestParams instance with only the valid parameters
"""
valid_keys = get_type_hints(ResponsesAPIOptionalRequestParams).keys()
filtered_params = {k: v for k, v in params.items() if k in valid_keys}
return cast(ResponsesAPIOptionalRequestParams, filtered_params)
class ResponseAPILoggingUtils:
@staticmethod
def _is_response_api_usage(usage: dict) -> bool:
"""returns True if usage is from OpenAI Response API"""
if "input_tokens" in usage and "output_tokens" in usage:
return True
return False
@staticmethod
def _transform_response_api_usage_to_chat_usage(usage: dict) -> Usage:
"""Tranforms the ResponseAPIUsage object to a Usage object"""
response_api_usage: ResponseAPIUsage = ResponseAPIUsage(**usage)
prompt_tokens: int = response_api_usage.input_tokens or 0
completion_tokens: int = response_api_usage.output_tokens or 0
return Usage(
prompt_tokens=prompt_tokens,
completion_tokens=completion_tokens,
total_tokens=prompt_tokens + completion_tokens,
)

View file

@ -30,6 +30,8 @@ def get_azure_ad_token_provider() -> Callable[[], str]:
client_secret=os.environ["AZURE_CLIENT_SECRET"], client_secret=os.environ["AZURE_CLIENT_SECRET"],
tenant_id=os.environ["AZURE_TENANT_ID"], tenant_id=os.environ["AZURE_TENANT_ID"],
) )
elif cred == "ManagedIdentityCredential":
credential = cred_cls(client_id=os.environ["AZURE_CLIENT_ID"])
else: else:
credential = cred_cls() credential = cred_cls()

View file

@ -365,6 +365,63 @@ class AmazonStability3TextToImageResponse(TypedDict, total=False):
finish_reasons: List[str] finish_reasons: List[str]
class AmazonNovaCanvasRequestBase(TypedDict, total=False):
"""
Base class for Amazon Nova Canvas API requests
"""
pass
class AmazonNovaCanvasImageGenerationConfig(TypedDict, total=False):
"""
Config for Amazon Nova Canvas Text to Image API
Ref: https://docs.aws.amazon.com/nova/latest/userguide/image-gen-req-resp-structure.html
"""
cfgScale: int
seed: int
quality: Literal["standard", "premium"]
width: int
height: int
numberOfImages: int
class AmazonNovaCanvasTextToImageParams(TypedDict, total=False):
"""
Params for Amazon Nova Canvas Text to Image API
"""
text: str
negativeText: str
controlStrength: float
controlMode: Literal["CANNY_EDIT", "SEGMENTATION"]
conditionImage: str
class AmazonNovaCanvasTextToImageRequest(AmazonNovaCanvasRequestBase, TypedDict, total=False):
"""
Request for Amazon Nova Canvas Text to Image API
Ref: https://docs.aws.amazon.com/nova/latest/userguide/image-gen-req-resp-structure.html
"""
textToImageParams: AmazonNovaCanvasTextToImageParams
taskType: Literal["TEXT_IMAGE"]
imageGenerationConfig: AmazonNovaCanvasImageGenerationConfig
class AmazonNovaCanvasTextToImageResponse(TypedDict, total=False):
"""
Response for Amazon Nova Canvas Text to Image API
Ref: https://docs.aws.amazon.com/nova/latest/userguide/image-gen-req-resp-structure.html
"""
images: List[str]
if TYPE_CHECKING: if TYPE_CHECKING:
from botocore.awsrequest import AWSPreparedRequest from botocore.awsrequest import AWSPreparedRequest
else: else:

View file

@ -1,6 +1,8 @@
from enum import Enum
from os import PathLike from os import PathLike
from typing import IO, Any, Iterable, List, Literal, Mapping, Optional, Tuple, Union from typing import IO, Any, Iterable, List, Literal, Mapping, Optional, Tuple, Union
import httpx
from openai._legacy_response import ( from openai._legacy_response import (
HttpxBinaryResponseContent as _HttpxBinaryResponseContent, HttpxBinaryResponseContent as _HttpxBinaryResponseContent,
) )
@ -31,8 +33,24 @@ from openai.types.chat.chat_completion_prediction_content_param import (
) )
from openai.types.embedding import Embedding as OpenAIEmbedding from openai.types.embedding import Embedding as OpenAIEmbedding
from openai.types.fine_tuning.fine_tuning_job import FineTuningJob from openai.types.fine_tuning.fine_tuning_job import FineTuningJob
from pydantic import BaseModel, Field from openai.types.responses.response import (
from typing_extensions import Dict, Required, TypedDict, override IncompleteDetails,
Response,
ResponseOutputItem,
ResponseTextConfig,
Tool,
ToolChoice,
)
from openai.types.responses.response_create_params import (
Reasoning,
ResponseIncludable,
ResponseInputParam,
ResponseTextConfigParam,
ToolChoice,
ToolParam,
)
from pydantic import BaseModel, Discriminator, Field, PrivateAttr
from typing_extensions import Annotated, Dict, Required, TypedDict, override
FileContent = Union[IO[bytes], bytes, PathLike] FileContent = Union[IO[bytes], bytes, PathLike]
@ -684,3 +702,323 @@ OpenAIAudioTranscriptionOptionalParams = Literal[
OpenAIImageVariationOptionalParams = Literal["n", "size", "response_format", "user"] OpenAIImageVariationOptionalParams = Literal["n", "size", "response_format", "user"]
class ResponsesAPIOptionalRequestParams(TypedDict, total=False):
"""TypedDict for Optional parameters supported by the responses API."""
include: Optional[List[ResponseIncludable]]
instructions: Optional[str]
max_output_tokens: Optional[int]
metadata: Optional[Dict[str, Any]]
parallel_tool_calls: Optional[bool]
previous_response_id: Optional[str]
reasoning: Optional[Reasoning]
store: Optional[bool]
stream: Optional[bool]
temperature: Optional[float]
text: Optional[ResponseTextConfigParam]
tool_choice: Optional[ToolChoice]
tools: Optional[Iterable[ToolParam]]
top_p: Optional[float]
truncation: Optional[Literal["auto", "disabled"]]
user: Optional[str]
class ResponsesAPIRequestParams(ResponsesAPIOptionalRequestParams, total=False):
"""TypedDict for request parameters supported by the responses API."""
input: Union[str, ResponseInputParam]
model: str
class BaseLiteLLMOpenAIResponseObject(BaseModel):
def __getitem__(self, key):
return self.__dict__[key]
def get(self, key, default=None):
return self.__dict__.get(key, default)
def __contains__(self, key):
return key in self.__dict__
class OutputTokensDetails(BaseLiteLLMOpenAIResponseObject):
reasoning_tokens: int
model_config = {"extra": "allow"}
class ResponseAPIUsage(BaseLiteLLMOpenAIResponseObject):
input_tokens: int
"""The number of input tokens."""
output_tokens: int
"""The number of output tokens."""
output_tokens_details: Optional[OutputTokensDetails]
"""A detailed breakdown of the output tokens."""
total_tokens: int
"""The total number of tokens used."""
model_config = {"extra": "allow"}
class ResponsesAPIResponse(BaseLiteLLMOpenAIResponseObject):
id: str
created_at: float
error: Optional[dict]
incomplete_details: Optional[IncompleteDetails]
instructions: Optional[str]
metadata: Optional[Dict]
model: Optional[str]
object: Optional[str]
output: List[ResponseOutputItem]
parallel_tool_calls: bool
temperature: Optional[float]
tool_choice: ToolChoice
tools: List[Tool]
top_p: Optional[float]
max_output_tokens: Optional[int]
previous_response_id: Optional[str]
reasoning: Optional[Reasoning]
status: Optional[str]
text: Optional[ResponseTextConfig]
truncation: Optional[Literal["auto", "disabled"]]
usage: Optional[ResponseAPIUsage]
user: Optional[str]
# Define private attributes using PrivateAttr
_hidden_params: dict = PrivateAttr(default_factory=dict)
class ResponsesAPIStreamEvents(str, Enum):
"""
Enum representing all supported OpenAI stream event types for the Responses API.
Inherits from str to allow direct string comparison and usage as dictionary keys.
"""
# Response lifecycle events
RESPONSE_CREATED = "response.created"
RESPONSE_IN_PROGRESS = "response.in_progress"
RESPONSE_COMPLETED = "response.completed"
RESPONSE_FAILED = "response.failed"
RESPONSE_INCOMPLETE = "response.incomplete"
# Output item events
OUTPUT_ITEM_ADDED = "response.output_item.added"
OUTPUT_ITEM_DONE = "response.output_item.done"
# Content part events
CONTENT_PART_ADDED = "response.content_part.added"
CONTENT_PART_DONE = "response.content_part.done"
# Output text events
OUTPUT_TEXT_DELTA = "response.output_text.delta"
OUTPUT_TEXT_ANNOTATION_ADDED = "response.output_text.annotation.added"
OUTPUT_TEXT_DONE = "response.output_text.done"
# Refusal events
REFUSAL_DELTA = "response.refusal.delta"
REFUSAL_DONE = "response.refusal.done"
# Function call events
FUNCTION_CALL_ARGUMENTS_DELTA = "response.function_call_arguments.delta"
FUNCTION_CALL_ARGUMENTS_DONE = "response.function_call_arguments.done"
# File search events
FILE_SEARCH_CALL_IN_PROGRESS = "response.file_search_call.in_progress"
FILE_SEARCH_CALL_SEARCHING = "response.file_search_call.searching"
FILE_SEARCH_CALL_COMPLETED = "response.file_search_call.completed"
# Web search events
WEB_SEARCH_CALL_IN_PROGRESS = "response.web_search_call.in_progress"
WEB_SEARCH_CALL_SEARCHING = "response.web_search_call.searching"
WEB_SEARCH_CALL_COMPLETED = "response.web_search_call.completed"
# Error event
ERROR = "error"
class ResponseCreatedEvent(BaseLiteLLMOpenAIResponseObject):
type: Literal[ResponsesAPIStreamEvents.RESPONSE_CREATED]
response: ResponsesAPIResponse
class ResponseInProgressEvent(BaseLiteLLMOpenAIResponseObject):
type: Literal[ResponsesAPIStreamEvents.RESPONSE_IN_PROGRESS]
response: ResponsesAPIResponse
class ResponseCompletedEvent(BaseLiteLLMOpenAIResponseObject):
type: Literal[ResponsesAPIStreamEvents.RESPONSE_COMPLETED]
response: ResponsesAPIResponse
_hidden_params: dict = PrivateAttr(default_factory=dict)
class ResponseFailedEvent(BaseLiteLLMOpenAIResponseObject):
type: Literal[ResponsesAPIStreamEvents.RESPONSE_FAILED]
response: ResponsesAPIResponse
class ResponseIncompleteEvent(BaseLiteLLMOpenAIResponseObject):
type: Literal[ResponsesAPIStreamEvents.RESPONSE_INCOMPLETE]
response: ResponsesAPIResponse
class OutputItemAddedEvent(BaseLiteLLMOpenAIResponseObject):
type: Literal[ResponsesAPIStreamEvents.OUTPUT_ITEM_ADDED]
output_index: int
item: dict
class OutputItemDoneEvent(BaseLiteLLMOpenAIResponseObject):
type: Literal[ResponsesAPIStreamEvents.OUTPUT_ITEM_DONE]
output_index: int
item: dict
class ContentPartAddedEvent(BaseLiteLLMOpenAIResponseObject):
type: Literal[ResponsesAPIStreamEvents.CONTENT_PART_ADDED]
item_id: str
output_index: int
content_index: int
part: dict
class ContentPartDoneEvent(BaseLiteLLMOpenAIResponseObject):
type: Literal[ResponsesAPIStreamEvents.CONTENT_PART_DONE]
item_id: str
output_index: int
content_index: int
part: dict
class OutputTextDeltaEvent(BaseLiteLLMOpenAIResponseObject):
type: Literal[ResponsesAPIStreamEvents.OUTPUT_TEXT_DELTA]
item_id: str
output_index: int
content_index: int
delta: str
class OutputTextAnnotationAddedEvent(BaseLiteLLMOpenAIResponseObject):
type: Literal[ResponsesAPIStreamEvents.OUTPUT_TEXT_ANNOTATION_ADDED]
item_id: str
output_index: int
content_index: int
annotation_index: int
annotation: dict
class OutputTextDoneEvent(BaseLiteLLMOpenAIResponseObject):
type: Literal[ResponsesAPIStreamEvents.OUTPUT_TEXT_DONE]
item_id: str
output_index: int
content_index: int
text: str
class RefusalDeltaEvent(BaseLiteLLMOpenAIResponseObject):
type: Literal[ResponsesAPIStreamEvents.REFUSAL_DELTA]
item_id: str
output_index: int
content_index: int
delta: str
class RefusalDoneEvent(BaseLiteLLMOpenAIResponseObject):
type: Literal[ResponsesAPIStreamEvents.REFUSAL_DONE]
item_id: str
output_index: int
content_index: int
refusal: str
class FunctionCallArgumentsDeltaEvent(BaseLiteLLMOpenAIResponseObject):
type: Literal[ResponsesAPIStreamEvents.FUNCTION_CALL_ARGUMENTS_DELTA]
item_id: str
output_index: int
delta: str
class FunctionCallArgumentsDoneEvent(BaseLiteLLMOpenAIResponseObject):
type: Literal[ResponsesAPIStreamEvents.FUNCTION_CALL_ARGUMENTS_DONE]
item_id: str
output_index: int
arguments: str
class FileSearchCallInProgressEvent(BaseLiteLLMOpenAIResponseObject):
type: Literal[ResponsesAPIStreamEvents.FILE_SEARCH_CALL_IN_PROGRESS]
output_index: int
item_id: str
class FileSearchCallSearchingEvent(BaseLiteLLMOpenAIResponseObject):
type: Literal[ResponsesAPIStreamEvents.FILE_SEARCH_CALL_SEARCHING]
output_index: int
item_id: str
class FileSearchCallCompletedEvent(BaseLiteLLMOpenAIResponseObject):
type: Literal[ResponsesAPIStreamEvents.FILE_SEARCH_CALL_COMPLETED]
output_index: int
item_id: str
class WebSearchCallInProgressEvent(BaseLiteLLMOpenAIResponseObject):
type: Literal[ResponsesAPIStreamEvents.WEB_SEARCH_CALL_IN_PROGRESS]
output_index: int
item_id: str
class WebSearchCallSearchingEvent(BaseLiteLLMOpenAIResponseObject):
type: Literal[ResponsesAPIStreamEvents.WEB_SEARCH_CALL_SEARCHING]
output_index: int
item_id: str
class WebSearchCallCompletedEvent(BaseLiteLLMOpenAIResponseObject):
type: Literal[ResponsesAPIStreamEvents.WEB_SEARCH_CALL_COMPLETED]
output_index: int
item_id: str
class ErrorEvent(BaseLiteLLMOpenAIResponseObject):
type: Literal[ResponsesAPIStreamEvents.ERROR]
code: Optional[str]
message: str
param: Optional[str]
# Union type for all possible streaming responses
ResponsesAPIStreamingResponse = Annotated[
Union[
ResponseCreatedEvent,
ResponseInProgressEvent,
ResponseCompletedEvent,
ResponseFailedEvent,
ResponseIncompleteEvent,
OutputItemAddedEvent,
OutputItemDoneEvent,
ContentPartAddedEvent,
ContentPartDoneEvent,
OutputTextDeltaEvent,
OutputTextAnnotationAddedEvent,
OutputTextDoneEvent,
RefusalDeltaEvent,
RefusalDoneEvent,
FunctionCallArgumentsDeltaEvent,
FunctionCallArgumentsDoneEvent,
FileSearchCallInProgressEvent,
FileSearchCallSearchingEvent,
FileSearchCallCompletedEvent,
WebSearchCallInProgressEvent,
WebSearchCallSearchingEvent,
WebSearchCallCompletedEvent,
ErrorEvent,
],
Discriminator("type"),
]

View file

@ -227,6 +227,8 @@ class CallTypes(Enum):
list_fine_tuning_jobs = "list_fine_tuning_jobs" list_fine_tuning_jobs = "list_fine_tuning_jobs"
aretrieve_fine_tuning_job = "aretrieve_fine_tuning_job" aretrieve_fine_tuning_job = "aretrieve_fine_tuning_job"
retrieve_fine_tuning_job = "retrieve_fine_tuning_job" retrieve_fine_tuning_job = "retrieve_fine_tuning_job"
responses = "responses"
aresponses = "aresponses"
CallTypesLiteral = Literal[ CallTypesLiteral = Literal[

View file

@ -211,6 +211,7 @@ from litellm.llms.base_llm.image_variations.transformation import (
BaseImageVariationConfig, BaseImageVariationConfig,
) )
from litellm.llms.base_llm.rerank.transformation import BaseRerankConfig from litellm.llms.base_llm.rerank.transformation import BaseRerankConfig
from litellm.llms.base_llm.responses.transformation import BaseResponsesAPIConfig
from ._logging import _is_debugging_on, verbose_logger from ._logging import _is_debugging_on, verbose_logger
from .caching.caching import ( from .caching.caching import (
@ -729,6 +730,11 @@ def function_setup( # noqa: PLR0915
call_type == CallTypes.aspeech.value or call_type == CallTypes.speech.value call_type == CallTypes.aspeech.value or call_type == CallTypes.speech.value
): ):
messages = kwargs.get("input", "speech") messages = kwargs.get("input", "speech")
elif (
call_type == CallTypes.aresponses.value
or call_type == CallTypes.responses.value
):
messages = args[0] if len(args) > 0 else kwargs["input"]
else: else:
messages = "default-message-value" messages = "default-message-value"
stream = True if "stream" in kwargs and kwargs["stream"] is True else False stream = True if "stream" in kwargs and kwargs["stream"] is True else False
@ -2445,6 +2451,7 @@ def get_optional_params_image_gen(
config_class = ( config_class = (
litellm.AmazonStability3Config litellm.AmazonStability3Config
if litellm.AmazonStability3Config._is_stability_3_model(model=model) if litellm.AmazonStability3Config._is_stability_3_model(model=model)
else litellm.AmazonNovaCanvasConfig if litellm.AmazonNovaCanvasConfig._is_nova_model(model=model)
else litellm.AmazonStabilityConfig else litellm.AmazonStabilityConfig
) )
supported_params = config_class.get_supported_openai_params(model=model) supported_params = config_class.get_supported_openai_params(model=model)
@ -5121,7 +5128,7 @@ def prompt_token_calculator(model, messages):
from anthropic import AI_PROMPT, HUMAN_PROMPT, Anthropic from anthropic import AI_PROMPT, HUMAN_PROMPT, Anthropic
anthropic_obj = Anthropic() anthropic_obj = Anthropic()
num_tokens = anthropic_obj.count_tokens(text) num_tokens = anthropic_obj.count_tokens(text) # type: ignore
else: else:
num_tokens = len(encoding.encode(text)) num_tokens = len(encoding.encode(text))
return num_tokens return num_tokens
@ -6293,6 +6300,15 @@ class ProviderConfigManager:
return litellm.DeepgramAudioTranscriptionConfig() return litellm.DeepgramAudioTranscriptionConfig()
return None return None
@staticmethod
def get_provider_responses_api_config(
model: str,
provider: LlmProviders,
) -> Optional[BaseResponsesAPIConfig]:
if litellm.LlmProviders.OPENAI == provider:
return litellm.OpenAIResponsesAPIConfig()
return None
@staticmethod @staticmethod
def get_provider_text_completion_config( def get_provider_text_completion_config(
model: str, model: str,

View file

@ -6,7 +6,7 @@
"input_cost_per_token": 0.0000, "input_cost_per_token": 0.0000,
"output_cost_per_token": 0.000, "output_cost_per_token": 0.000,
"litellm_provider": "one of https://docs.litellm.ai/docs/providers", "litellm_provider": "one of https://docs.litellm.ai/docs/providers",
"mode": "one of chat, embedding, completion, image_generation, audio_transcription, audio_speech", "mode": "one of: chat, embedding, completion, image_generation, audio_transcription, audio_speech, image_generation, moderation, rerank",
"supports_function_calling": true, "supports_function_calling": true,
"supports_parallel_function_calling": true, "supports_parallel_function_calling": true,
"supports_vision": true, "supports_vision": true,
@ -931,7 +931,7 @@
"input_cost_per_token": 0.000000, "input_cost_per_token": 0.000000,
"output_cost_per_token": 0.000000, "output_cost_per_token": 0.000000,
"litellm_provider": "openai", "litellm_provider": "openai",
"mode": "moderations" "mode": "moderation"
}, },
"text-moderation-007": { "text-moderation-007": {
"max_tokens": 32768, "max_tokens": 32768,
@ -940,7 +940,7 @@
"input_cost_per_token": 0.000000, "input_cost_per_token": 0.000000,
"output_cost_per_token": 0.000000, "output_cost_per_token": 0.000000,
"litellm_provider": "openai", "litellm_provider": "openai",
"mode": "moderations" "mode": "moderation"
}, },
"text-moderation-latest": { "text-moderation-latest": {
"max_tokens": 32768, "max_tokens": 32768,
@ -949,7 +949,7 @@
"input_cost_per_token": 0.000000, "input_cost_per_token": 0.000000,
"output_cost_per_token": 0.000000, "output_cost_per_token": 0.000000,
"litellm_provider": "openai", "litellm_provider": "openai",
"mode": "moderations" "mode": "moderation"
}, },
"256-x-256/dall-e-2": { "256-x-256/dall-e-2": {
"mode": "image_generation", "mode": "image_generation",
@ -1625,13 +1625,23 @@
"max_tokens": 8192, "max_tokens": 8192,
"max_input_tokens": 128000, "max_input_tokens": 128000,
"max_output_tokens": 8192, "max_output_tokens": 8192,
"input_cost_per_token": 0.0, "input_cost_per_token": 0.00000135,
"input_cost_per_token_cache_hit": 0.0, "output_cost_per_token": 0.0000054,
"output_cost_per_token": 0.0,
"litellm_provider": "azure_ai", "litellm_provider": "azure_ai",
"mode": "chat", "mode": "chat",
"supports_prompt_caching": true, "supports_tool_choice": true,
"supports_tool_choice": true "source": "https://techcommunity.microsoft.com/blog/machinelearningblog/deepseek-r1-improved-performance-higher-limits-and-transparent-pricing/4386367"
},
"azure_ai/deepseek-v3": {
"max_tokens": 8192,
"max_input_tokens": 128000,
"max_output_tokens": 8192,
"input_cost_per_token": 0.00000114,
"output_cost_per_token": 0.00000456,
"litellm_provider": "azure_ai",
"mode": "chat",
"supports_tool_choice": true,
"source": "https://techcommunity.microsoft.com/blog/machinelearningblog/announcing-deepseek-v3-on-azure-ai-foundry-and-github/4390438"
}, },
"azure_ai/jamba-instruct": { "azure_ai/jamba-instruct": {
"max_tokens": 4096, "max_tokens": 4096,
@ -1643,6 +1653,17 @@
"mode": "chat", "mode": "chat",
"supports_tool_choice": true "supports_tool_choice": true
}, },
"azure_ai/mistral-nemo": {
"max_tokens": 4096,
"max_input_tokens": 131072,
"max_output_tokens": 4096,
"input_cost_per_token": 0.00000015,
"output_cost_per_token": 0.00000015,
"litellm_provider": "azure_ai",
"mode": "chat",
"supports_function_calling": true,
"source": "https://azuremarketplace.microsoft.com/en/marketplace/apps/000-000.mistral-nemo-12b-2407?tab=PlansAndPrice"
},
"azure_ai/mistral-large": { "azure_ai/mistral-large": {
"max_tokens": 8191, "max_tokens": 8191,
"max_input_tokens": 32000, "max_input_tokens": 32000,
@ -1770,10 +1791,34 @@
"source":"https://azuremarketplace.microsoft.com/en-us/marketplace/apps/metagenai.meta-llama-3-1-405b-instruct-offer?tab=PlansAndPrice", "source":"https://azuremarketplace.microsoft.com/en-us/marketplace/apps/metagenai.meta-llama-3-1-405b-instruct-offer?tab=PlansAndPrice",
"supports_tool_choice": true "supports_tool_choice": true
}, },
"azure_ai/Phi-4": { "azure_ai/Phi-4-mini-instruct": {
"max_tokens": 4096, "max_tokens": 4096,
"max_input_tokens": 128000, "max_input_tokens": 131072,
"max_output_tokens": 4096, "max_output_tokens": 4096,
"input_cost_per_token": 0,
"output_cost_per_token": 0,
"litellm_provider": "azure_ai",
"mode": "chat",
"supports_function_calling": true,
"source": "https://learn.microsoft.com/en-us/azure/ai-foundry/concepts/models-featured#microsoft"
},
"azure_ai/Phi-4-multimodal-instruct": {
"max_tokens": 4096,
"max_input_tokens": 131072,
"max_output_tokens": 4096,
"input_cost_per_token": 0,
"output_cost_per_token": 0,
"litellm_provider": "azure_ai",
"mode": "chat",
"supports_audio_input": true,
"supports_function_calling": true,
"supports_vision": true,
"source": "https://learn.microsoft.com/en-us/azure/ai-foundry/concepts/models-featured#microsoft"
},
"azure_ai/Phi-4": {
"max_tokens": 16384,
"max_input_tokens": 16384,
"max_output_tokens": 16384,
"input_cost_per_token": 0.000000125, "input_cost_per_token": 0.000000125,
"output_cost_per_token": 0.0000005, "output_cost_per_token": 0.0000005,
"litellm_provider": "azure_ai", "litellm_provider": "azure_ai",
@ -3892,31 +3937,6 @@
"source": "https://cloud.google.com/vertex-ai/generative-ai/pricing", "source": "https://cloud.google.com/vertex-ai/generative-ai/pricing",
"supports_tool_choice": true "supports_tool_choice": true
}, },
"gemini/gemini-2.0-flash": {
"max_tokens": 8192,
"max_input_tokens": 1048576,
"max_output_tokens": 8192,
"max_images_per_prompt": 3000,
"max_videos_per_prompt": 10,
"max_video_length": 1,
"max_audio_length_hours": 8.4,
"max_audio_per_prompt": 1,
"max_pdf_size_mb": 30,
"input_cost_per_audio_token": 0.0000007,
"input_cost_per_token": 0.0000001,
"output_cost_per_token": 0.0000004,
"litellm_provider": "gemini",
"mode": "chat",
"rpm": 10000,
"tpm": 10000000,
"supports_system_messages": true,
"supports_function_calling": true,
"supports_vision": true,
"supports_response_schema": true,
"supports_audio_output": true,
"supports_tool_choice": true,
"source": "https://ai.google.dev/pricing#2_0flash"
},
"gemini-2.0-flash-001": { "gemini-2.0-flash-001": {
"max_tokens": 8192, "max_tokens": 8192,
"max_input_tokens": 1048576, "max_input_tokens": 1048576,
@ -4008,6 +4028,69 @@
"source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#gemini-2.0-flash", "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#gemini-2.0-flash",
"supports_tool_choice": true "supports_tool_choice": true
}, },
"gemini/gemini-2.0-pro-exp-02-05": {
"max_tokens": 8192,
"max_input_tokens": 2097152,
"max_output_tokens": 8192,
"max_images_per_prompt": 3000,
"max_videos_per_prompt": 10,
"max_video_length": 1,
"max_audio_length_hours": 8.4,
"max_audio_per_prompt": 1,
"max_pdf_size_mb": 30,
"input_cost_per_image": 0,
"input_cost_per_video_per_second": 0,
"input_cost_per_audio_per_second": 0,
"input_cost_per_token": 0,
"input_cost_per_character": 0,
"input_cost_per_token_above_128k_tokens": 0,
"input_cost_per_character_above_128k_tokens": 0,
"input_cost_per_image_above_128k_tokens": 0,
"input_cost_per_video_per_second_above_128k_tokens": 0,
"input_cost_per_audio_per_second_above_128k_tokens": 0,
"output_cost_per_token": 0,
"output_cost_per_character": 0,
"output_cost_per_token_above_128k_tokens": 0,
"output_cost_per_character_above_128k_tokens": 0,
"litellm_provider": "gemini",
"mode": "chat",
"rpm": 2,
"tpm": 1000000,
"supports_system_messages": true,
"supports_function_calling": true,
"supports_vision": true,
"supports_audio_input": true,
"supports_video_input": true,
"supports_pdf_input": true,
"supports_response_schema": true,
"supports_tool_choice": true,
"source": "https://cloud.google.com/vertex-ai/generative-ai/pricing"
},
"gemini/gemini-2.0-flash": {
"max_tokens": 8192,
"max_input_tokens": 1048576,
"max_output_tokens": 8192,
"max_images_per_prompt": 3000,
"max_videos_per_prompt": 10,
"max_video_length": 1,
"max_audio_length_hours": 8.4,
"max_audio_per_prompt": 1,
"max_pdf_size_mb": 30,
"input_cost_per_audio_token": 0.0000007,
"input_cost_per_token": 0.0000001,
"output_cost_per_token": 0.0000004,
"litellm_provider": "gemini",
"mode": "chat",
"rpm": 10000,
"tpm": 10000000,
"supports_system_messages": true,
"supports_function_calling": true,
"supports_vision": true,
"supports_response_schema": true,
"supports_audio_output": true,
"supports_tool_choice": true,
"source": "https://ai.google.dev/pricing#2_0flash"
},
"gemini/gemini-2.0-flash-001": { "gemini/gemini-2.0-flash-001": {
"max_tokens": 8192, "max_tokens": 8192,
"max_input_tokens": 1048576, "max_input_tokens": 1048576,
@ -4511,6 +4594,12 @@
"mode": "image_generation", "mode": "image_generation",
"source": "https://cloud.google.com/vertex-ai/generative-ai/pricing" "source": "https://cloud.google.com/vertex-ai/generative-ai/pricing"
}, },
"vertex_ai/imagen-3.0-generate-002": {
"output_cost_per_image": 0.04,
"litellm_provider": "vertex_ai-image-models",
"mode": "image_generation",
"source": "https://cloud.google.com/vertex-ai/generative-ai/pricing"
},
"vertex_ai/imagen-3.0-generate-001": { "vertex_ai/imagen-3.0-generate-001": {
"output_cost_per_image": 0.04, "output_cost_per_image": 0.04,
"litellm_provider": "vertex_ai-image-models", "litellm_provider": "vertex_ai-image-models",
@ -6547,6 +6636,12 @@
"supports_prompt_caching": true, "supports_prompt_caching": true,
"supports_response_schema": true "supports_response_schema": true
}, },
"1024-x-1024/50-steps/bedrock/amazon.nova-canvas-v1:0": {
"max_input_tokens": 2600,
"output_cost_per_image": 0.06,
"litellm_provider": "bedrock",
"mode": "image_generation"
},
"eu.amazon.nova-pro-v1:0": { "eu.amazon.nova-pro-v1:0": {
"max_tokens": 4096, "max_tokens": 4096,
"max_input_tokens": 300000, "max_input_tokens": 300000,
@ -7477,6 +7572,18 @@
"litellm_provider": "bedrock", "litellm_provider": "bedrock",
"mode": "embedding" "mode": "embedding"
}, },
"us.deepseek.r1-v1:0": {
"max_tokens": 4096,
"max_input_tokens": 128000,
"max_output_tokens": 4096,
"input_cost_per_token": 0.00000135,
"output_cost_per_token": 0.0000054,
"litellm_provider": "bedrock_converse",
"mode": "chat",
"supports_function_calling": false,
"supports_tool_choice": false
},
"meta.llama3-3-70b-instruct-v1:0": { "meta.llama3-3-70b-instruct-v1:0": {
"max_tokens": 4096, "max_tokens": 4096,
"max_input_tokens": 128000, "max_input_tokens": 128000,

View file

@ -1,6 +1,6 @@
[tool.poetry] [tool.poetry]
name = "litellm" name = "litellm"
version = "1.63.5" version = "1.63.7"
description = "Library to easily interface with LLM API providers" description = "Library to easily interface with LLM API providers"
authors = ["BerriAI"] authors = ["BerriAI"]
license = "MIT" license = "MIT"
@ -96,7 +96,7 @@ requires = ["poetry-core", "wheel"]
build-backend = "poetry.core.masonry.api" build-backend = "poetry.core.masonry.api"
[tool.commitizen] [tool.commitizen]
version = "1.63.5" version = "1.63.7"
version_files = [ version_files = [
"pyproject.toml:^version" "pyproject.toml:^version"
] ]

View file

@ -1,7 +1,7 @@
# LITELLM PROXY DEPENDENCIES # # LITELLM PROXY DEPENDENCIES #
anyio==4.4.0 # openai + http req. anyio==4.4.0 # openai + http req.
httpx==0.27.0 # Pin Httpx dependency httpx==0.27.0 # Pin Httpx dependency
openai==1.61.0 # openai req. openai==1.66.1 # openai req.
fastapi==0.115.5 # server dep fastapi==0.115.5 # server dep
backoff==2.2.1 # server dep backoff==2.2.1 # server dep
pyyaml==6.0.2 # server dep pyyaml==6.0.2 # server dep

View file

@ -59,15 +59,15 @@ class BaseImageGenTest(ABC):
await asyncio.sleep(1) await asyncio.sleep(1)
assert response._hidden_params["response_cost"] is not None # assert response._hidden_params["response_cost"] is not None
assert response._hidden_params["response_cost"] > 0 # assert response._hidden_params["response_cost"] > 0
print("response_cost", response._hidden_params["response_cost"]) # print("response_cost", response._hidden_params["response_cost"])
logged_standard_logging_payload = custom_logger.standard_logging_payload logged_standard_logging_payload = custom_logger.standard_logging_payload
print("logged_standard_logging_payload", logged_standard_logging_payload) print("logged_standard_logging_payload", logged_standard_logging_payload)
assert logged_standard_logging_payload is not None assert logged_standard_logging_payload is not None
assert logged_standard_logging_payload["response_cost"] is not None # assert logged_standard_logging_payload["response_cost"] is not None
assert logged_standard_logging_payload["response_cost"] > 0 # assert logged_standard_logging_payload["response_cost"] > 0
from openai.types.images_response import ImagesResponse from openai.types.images_response import ImagesResponse

View file

@ -130,6 +130,19 @@ class TestBedrockSd1(BaseImageGenTest):
return {"model": "bedrock/stability.sd3-large-v1:0"} return {"model": "bedrock/stability.sd3-large-v1:0"}
class TestBedrockNovaCanvasTextToImage(BaseImageGenTest):
def get_base_image_generation_call_args(self) -> dict:
litellm.in_memory_llm_clients_cache = InMemoryCache()
return {
"model": "bedrock/amazon.nova-canvas-v1:0",
"n": 1,
"size": "320x320",
"imageGenerationConfig": {"cfgScale": 6.5, "seed": 12},
"taskType": "TEXT_IMAGE",
"aws_region_name": "us-east-1",
}
class TestOpenAIDalle3(BaseImageGenTest): class TestOpenAIDalle3(BaseImageGenTest):
def get_base_image_generation_call_args(self) -> dict: def get_base_image_generation_call_args(self) -> dict:
return {"model": "dall-e-3"} return {"model": "dall-e-3"}

View file

@ -0,0 +1,239 @@
import json
import os
import sys
from unittest.mock import AsyncMock, MagicMock, patch
import httpx
import pytest
sys.path.insert(
0, os.path.abspath("../../../../..")
) # Adds the parent directory to the system path
from litellm.llms.openai.responses.transformation import OpenAIResponsesAPIConfig
from litellm.types.llms.openai import (
OutputTextDeltaEvent,
ResponseCompletedEvent,
ResponsesAPIRequestParams,
ResponsesAPIResponse,
ResponsesAPIStreamEvents,
)
class TestOpenAIResponsesAPIConfig:
def setup_method(self):
self.config = OpenAIResponsesAPIConfig()
self.model = "gpt-4o"
self.logging_obj = MagicMock()
def test_map_openai_params(self):
"""Test that parameters are correctly mapped"""
test_params = {"input": "Hello world", "temperature": 0.7, "stream": True}
result = self.config.map_openai_params(
response_api_optional_params=test_params,
model=self.model,
drop_params=False,
)
# The function should return the params unchanged
assert result == test_params
def validate_responses_api_request_params(self, params, expected_fields):
"""
Validate that the params dict has the expected structure of ResponsesAPIRequestParams
Args:
params: The dict to validate
expected_fields: Dict of field names and their expected values
"""
# Check that it's a dict
assert isinstance(params, dict), "Result should be a dict"
# Check expected fields have correct values
for field, value in expected_fields.items():
assert field in params, f"Missing expected field: {field}"
assert (
params[field] == value
), f"Field {field} has value {params[field]}, expected {value}"
def test_transform_responses_api_request(self):
"""Test request transformation"""
input_text = "What is the capital of France?"
optional_params = {"temperature": 0.7, "stream": True}
result = self.config.transform_responses_api_request(
model=self.model,
input=input_text,
response_api_optional_request_params=optional_params,
litellm_params={},
headers={},
)
# Validate the result has the expected structure and values
expected_fields = {
"model": self.model,
"input": input_text,
"temperature": 0.7,
"stream": True,
}
self.validate_responses_api_request_params(result, expected_fields)
def test_transform_streaming_response(self):
"""Test streaming response transformation"""
# Test with a text delta event
chunk = {
"type": "response.output_text.delta",
"item_id": "item_123",
"output_index": 0,
"content_index": 0,
"delta": "Hello",
}
result = self.config.transform_streaming_response(
model=self.model, parsed_chunk=chunk, logging_obj=self.logging_obj
)
assert isinstance(result, OutputTextDeltaEvent)
assert result.type == ResponsesAPIStreamEvents.OUTPUT_TEXT_DELTA
assert result.delta == "Hello"
assert result.item_id == "item_123"
# Test with a completed event - providing all required fields
completed_chunk = {
"type": "response.completed",
"response": {
"id": "resp_123",
"created_at": 1234567890,
"model": "gpt-4o",
"object": "response",
"output": [],
"parallel_tool_calls": False,
"error": None,
"incomplete_details": None,
"instructions": None,
"metadata": None,
"temperature": 0.7,
"tool_choice": "auto",
"tools": [],
"top_p": 1.0,
"max_output_tokens": None,
"previous_response_id": None,
"reasoning": None,
"status": "completed",
"text": None,
"truncation": "auto",
"usage": None,
"user": None,
},
}
# Mock the get_event_model_class to avoid validation issues in tests
with patch.object(
OpenAIResponsesAPIConfig, "get_event_model_class"
) as mock_get_class:
mock_get_class.return_value = ResponseCompletedEvent
result = self.config.transform_streaming_response(
model=self.model,
parsed_chunk=completed_chunk,
logging_obj=self.logging_obj,
)
assert result.type == ResponsesAPIStreamEvents.RESPONSE_COMPLETED
assert result.response.id == "resp_123"
def test_validate_environment(self):
"""Test that validate_environment correctly sets the Authorization header"""
# Test with provided API key
headers = {}
api_key = "test_api_key"
result = self.config.validate_environment(
headers=headers, model=self.model, api_key=api_key
)
assert "Authorization" in result
assert result["Authorization"] == f"Bearer {api_key}"
# Test with empty headers
headers = {}
with patch("litellm.api_key", "litellm_api_key"):
result = self.config.validate_environment(headers=headers, model=self.model)
assert "Authorization" in result
assert result["Authorization"] == "Bearer litellm_api_key"
# Test with existing headers
headers = {"Content-Type": "application/json"}
with patch("litellm.openai_key", "openai_key"):
with patch("litellm.api_key", None):
result = self.config.validate_environment(
headers=headers, model=self.model
)
assert "Authorization" in result
assert result["Authorization"] == "Bearer openai_key"
assert "Content-Type" in result
assert result["Content-Type"] == "application/json"
# Test with environment variable
headers = {}
with patch("litellm.api_key", None):
with patch("litellm.openai_key", None):
with patch(
"litellm.llms.openai.responses.transformation.get_secret_str",
return_value="env_api_key",
):
result = self.config.validate_environment(
headers=headers, model=self.model
)
assert "Authorization" in result
assert result["Authorization"] == "Bearer env_api_key"
def test_get_complete_url(self):
"""Test that get_complete_url returns the correct URL"""
# Test with provided API base
api_base = "https://custom-openai.example.com/v1"
result = self.config.get_complete_url(api_base=api_base, model=self.model)
assert result == "https://custom-openai.example.com/v1/responses"
# Test with litellm.api_base
with patch("litellm.api_base", "https://litellm-api-base.example.com/v1"):
result = self.config.get_complete_url(api_base=None, model=self.model)
assert result == "https://litellm-api-base.example.com/v1/responses"
# Test with environment variable
with patch("litellm.api_base", None):
with patch(
"litellm.llms.openai.responses.transformation.get_secret_str",
return_value="https://env-api-base.example.com/v1",
):
result = self.config.get_complete_url(api_base=None, model=self.model)
assert result == "https://env-api-base.example.com/v1/responses"
# Test with default API base
with patch("litellm.api_base", None):
with patch(
"litellm.llms.openai.responses.transformation.get_secret_str",
return_value=None,
):
result = self.config.get_complete_url(api_base=None, model=self.model)
assert result == "https://api.openai.com/v1/responses"
# Test with trailing slash in API base
api_base = "https://custom-openai.example.com/v1/"
result = self.config.get_complete_url(api_base=api_base, model=self.model)
assert result == "https://custom-openai.example.com/v1/responses"

View file

@ -0,0 +1,150 @@
import json
import os
import sys
import pytest
from fastapi.testclient import TestClient
sys.path.insert(
0, os.path.abspath("../../..")
) # Adds the parent directory to the system path
import litellm
from litellm.llms.base_llm.responses.transformation import BaseResponsesAPIConfig
from litellm.llms.openai.responses.transformation import OpenAIResponsesAPIConfig
from litellm.responses.utils import ResponseAPILoggingUtils, ResponsesAPIRequestUtils
from litellm.types.llms.openai import ResponsesAPIOptionalRequestParams
from litellm.types.utils import Usage
class TestResponsesAPIRequestUtils:
def test_get_optional_params_responses_api(self):
"""Test that optional parameters are correctly processed for responses API"""
# Setup
model = "gpt-4o"
config = OpenAIResponsesAPIConfig()
optional_params = ResponsesAPIOptionalRequestParams(
{"temperature": 0.7, "max_output_tokens": 100}
)
# Execute
result = ResponsesAPIRequestUtils.get_optional_params_responses_api(
model=model,
responses_api_provider_config=config,
response_api_optional_params=optional_params,
)
# Assert
assert result == optional_params
assert "temperature" in result
assert result["temperature"] == 0.7
assert "max_output_tokens" in result
assert result["max_output_tokens"] == 100
def test_get_optional_params_responses_api_unsupported_param(self):
"""Test that unsupported parameters raise an error"""
# Setup
model = "gpt-4o"
config = OpenAIResponsesAPIConfig()
optional_params = ResponsesAPIOptionalRequestParams(
{"temperature": 0.7, "unsupported_param": "value"}
)
# Execute and Assert
with pytest.raises(litellm.UnsupportedParamsError) as excinfo:
ResponsesAPIRequestUtils.get_optional_params_responses_api(
model=model,
responses_api_provider_config=config,
response_api_optional_params=optional_params,
)
assert "unsupported_param" in str(excinfo.value)
assert model in str(excinfo.value)
def test_get_requested_response_api_optional_param(self):
"""Test filtering parameters to only include those in ResponsesAPIOptionalRequestParams"""
# Setup
params = {
"temperature": 0.7,
"max_output_tokens": 100,
"invalid_param": "value",
"model": "gpt-4o", # This is not in ResponsesAPIOptionalRequestParams
}
# Execute
result = ResponsesAPIRequestUtils.get_requested_response_api_optional_param(
params
)
# Assert
assert "temperature" in result
assert "max_output_tokens" in result
assert "invalid_param" not in result
assert "model" not in result
assert result["temperature"] == 0.7
assert result["max_output_tokens"] == 100
class TestResponseAPILoggingUtils:
def test_is_response_api_usage_true(self):
"""Test identification of Response API usage format"""
# Setup
usage = {"input_tokens": 10, "output_tokens": 20}
# Execute
result = ResponseAPILoggingUtils._is_response_api_usage(usage)
# Assert
assert result is True
def test_is_response_api_usage_false(self):
"""Test identification of non-Response API usage format"""
# Setup
usage = {"prompt_tokens": 10, "completion_tokens": 20, "total_tokens": 30}
# Execute
result = ResponseAPILoggingUtils._is_response_api_usage(usage)
# Assert
assert result is False
def test_transform_response_api_usage_to_chat_usage(self):
"""Test transformation from Response API usage to Chat usage format"""
# Setup
usage = {
"input_tokens": 10,
"output_tokens": 20,
"total_tokens": 30,
"output_tokens_details": {"reasoning_tokens": 5},
}
# Execute
result = ResponseAPILoggingUtils._transform_response_api_usage_to_chat_usage(
usage
)
# Assert
assert isinstance(result, Usage)
assert result.prompt_tokens == 10
assert result.completion_tokens == 20
assert result.total_tokens == 30
def test_transform_response_api_usage_with_none_values(self):
"""Test transformation handles None values properly"""
# Setup
usage = {
"input_tokens": 0, # Changed from None to 0
"output_tokens": 20,
"total_tokens": 20,
"output_tokens_details": {"reasoning_tokens": 5},
}
# Execute
result = ResponseAPILoggingUtils._transform_response_api_usage_to_chat_usage(
usage
)
# Assert
assert result.prompt_tokens == 0
assert result.completion_tokens == 20
assert result.total_tokens == 20

View file

@ -0,0 +1,108 @@
import json
from jsonschema import validate
def test_model_prices_and_context_window_json_is_valid():
'''
Validates the `model_prices_and_context_window.json` file.
If this test fails after you update the json, you need to update the schema or correct the change you made.
'''
INTENDED_SCHEMA = {
"type": "object",
"additionalProperties": {
"type": "object",
"properties": {
"cache_creation_input_audio_token_cost": {"type": "number"},
"cache_creation_input_token_cost": {"type": "number"},
"cache_read_input_token_cost": {"type": "number"},
"deprecation_date": {"type": "string"},
"input_cost_per_audio_per_second": {"type": "number"},
"input_cost_per_audio_per_second_above_128k_tokens": {"type": "number"},
"input_cost_per_audio_token": {"type": "number"},
"input_cost_per_character": {"type": "number"},
"input_cost_per_character_above_128k_tokens": {"type": "number"},
"input_cost_per_image": {"type": "number"},
"input_cost_per_image_above_128k_tokens": {"type": "number"},
"input_cost_per_pixel": {"type": "number"},
"input_cost_per_query": {"type": "number"},
"input_cost_per_request": {"type": "number"},
"input_cost_per_second": {"type": "number"},
"input_cost_per_token": {"type": "number"},
"input_cost_per_token_above_128k_tokens": {"type": "number"},
"input_cost_per_token_batch_requests": {"type": "number"},
"input_cost_per_token_batches": {"type": "number"},
"input_cost_per_token_cache_hit": {"type": "number"},
"input_cost_per_video_per_second": {"type": "number"},
"input_cost_per_video_per_second_above_128k_tokens": {"type": "number"},
"input_dbu_cost_per_token": {"type": "number"},
"litellm_provider": {"type": "string"},
"max_audio_length_hours": {"type": "number"},
"max_audio_per_prompt": {"type": "number"},
"max_document_chunks_per_query": {"type": "number"},
"max_images_per_prompt": {"type": "number"},
"max_input_tokens": {"type": "number"},
"max_output_tokens": {"type": "number"},
"max_pdf_size_mb": {"type": "number"},
"max_query_tokens": {"type": "number"},
"max_tokens": {"type": "number"},
"max_tokens_per_document_chunk": {"type": "number"},
"max_video_length": {"type": "number"},
"max_videos_per_prompt": {"type": "number"},
"metadata": {"type": "object"},
"mode": {
"type": "string",
"enum": [
"audio_speech",
"audio_transcription",
"chat",
"completion",
"embedding",
"image_generation",
"moderation",
"rerank"
],
},
"output_cost_per_audio_token": {"type": "number"},
"output_cost_per_character": {"type": "number"},
"output_cost_per_character_above_128k_tokens": {"type": "number"},
"output_cost_per_image": {"type": "number"},
"output_cost_per_pixel": {"type": "number"},
"output_cost_per_second": {"type": "number"},
"output_cost_per_token": {"type": "number"},
"output_cost_per_token_above_128k_tokens": {"type": "number"},
"output_cost_per_token_batches": {"type": "number"},
"output_db_cost_per_token": {"type": "number"},
"output_dbu_cost_per_token": {"type": "number"},
"output_vector_size": {"type": "number"},
"rpd": {"type": "number"},
"rpm": {"type": "number"},
"source": {"type": "string"},
"supports_assistant_prefill": {"type": "boolean"},
"supports_audio_input": {"type": "boolean"},
"supports_audio_output": {"type": "boolean"},
"supports_embedding_image_input": {"type": "boolean"},
"supports_function_calling": {"type": "boolean"},
"supports_image_input": {"type": "boolean"},
"supports_parallel_function_calling": {"type": "boolean"},
"supports_pdf_input": {"type": "boolean"},
"supports_prompt_caching": {"type": "boolean"},
"supports_response_schema": {"type": "boolean"},
"supports_system_messages": {"type": "boolean"},
"supports_tool_choice": {"type": "boolean"},
"supports_video_input": {"type": "boolean"},
"supports_vision": {"type": "boolean"},
"tool_use_system_prompt_tokens": {"type": "number"},
"tpm": {"type": "number"},
},
"additionalProperties": False,
},
}
with open("./model_prices_and_context_window.json", "r") as model_prices_file:
actual_json = json.load(model_prices_file)
assert isinstance(actual_json, dict)
actual_json.pop('sample_spec', None) # remove the sample, whose schema is inconsistent with the real data
validate(actual_json, INTENDED_SCHEMA)

View file

@ -0,0 +1,63 @@
# conftest.py
import importlib
import os
import sys
import pytest
sys.path.insert(
0, os.path.abspath("../..")
) # Adds the parent directory to the system path
import litellm
@pytest.fixture(scope="function", autouse=True)
def setup_and_teardown():
"""
This fixture reloads litellm before every function. To speed up testing by removing callbacks being chained.
"""
curr_dir = os.getcwd() # Get the current working directory
sys.path.insert(
0, os.path.abspath("../..")
) # Adds the project directory to the system path
import litellm
from litellm import Router
importlib.reload(litellm)
try:
if hasattr(litellm, "proxy") and hasattr(litellm.proxy, "proxy_server"):
import litellm.proxy.proxy_server
importlib.reload(litellm.proxy.proxy_server)
except Exception as e:
print(f"Error reloading litellm.proxy.proxy_server: {e}")
import asyncio
loop = asyncio.get_event_loop_policy().new_event_loop()
asyncio.set_event_loop(loop)
print(litellm)
# from litellm import Router, completion, aembedding, acompletion, embedding
yield
# Teardown code (executes after the yield point)
loop.close() # Close the loop created earlier
asyncio.set_event_loop(None) # Remove the reference to the loop
def pytest_collection_modifyitems(config, items):
# Separate tests in 'test_amazing_proxy_custom_logger.py' and other tests
custom_logger_tests = [
item for item in items if "custom_logger" in item.parent.name
]
other_tests = [item for item in items if "custom_logger" not in item.parent.name]
# Sort tests based on their names
custom_logger_tests.sort(key=lambda x: x.name)
other_tests.sort(key=lambda x: x.name)
# Reorder the items list
items[:] = custom_logger_tests + other_tests

View file

@ -0,0 +1,505 @@
import os
import sys
import pytest
import asyncio
from typing import Optional
sys.path.insert(0, os.path.abspath("../.."))
import litellm
from litellm.integrations.custom_logger import CustomLogger
import json
from litellm.types.utils import StandardLoggingPayload
from litellm.types.llms.openai import (
ResponseCompletedEvent,
ResponsesAPIResponse,
ResponseTextConfig,
ResponseAPIUsage,
IncompleteDetails,
)
def validate_responses_api_response(response, final_chunk: bool = False):
"""
Validate that a response from litellm.responses() or litellm.aresponses()
conforms to the expected ResponsesAPIResponse structure.
Args:
response: The response object to validate
Raises:
AssertionError: If the response doesn't match the expected structure
"""
# Validate response structure
print("response=", json.dumps(response, indent=4, default=str))
assert isinstance(
response, ResponsesAPIResponse
), "Response should be an instance of ResponsesAPIResponse"
# Required fields
assert "id" in response and isinstance(
response["id"], str
), "Response should have a string 'id' field"
assert "created_at" in response and isinstance(
response["created_at"], (int, float)
), "Response should have a numeric 'created_at' field"
assert "output" in response and isinstance(
response["output"], list
), "Response should have a list 'output' field"
assert "parallel_tool_calls" in response and isinstance(
response["parallel_tool_calls"], bool
), "Response should have a boolean 'parallel_tool_calls' field"
# Optional fields with their expected types
optional_fields = {
"error": (dict, type(None)), # error can be dict or None
"incomplete_details": (IncompleteDetails, type(None)),
"instructions": (str, type(None)),
"metadata": dict,
"model": str,
"object": str,
"temperature": (int, float),
"tool_choice": (dict, str),
"tools": list,
"top_p": (int, float),
"max_output_tokens": (int, type(None)),
"previous_response_id": (str, type(None)),
"reasoning": dict,
"status": str,
"text": ResponseTextConfig,
"truncation": str,
"usage": ResponseAPIUsage,
"user": (str, type(None)),
}
if final_chunk is False:
optional_fields["usage"] = type(None)
for field, expected_type in optional_fields.items():
if field in response:
assert isinstance(
response[field], expected_type
), f"Field '{field}' should be of type {expected_type}, but got {type(response[field])}"
# Check if output has at least one item
if final_chunk is True:
assert (
len(response["output"]) > 0
), "Response 'output' field should have at least one item"
return True # Return True if validation passes
@pytest.mark.parametrize("sync_mode", [True, False])
@pytest.mark.asyncio
async def test_basic_openai_responses_api(sync_mode):
litellm._turn_on_debug()
if sync_mode:
response = litellm.responses(
model="gpt-4o", input="Basic ping", max_output_tokens=20
)
else:
response = await litellm.aresponses(
model="gpt-4o", input="Basic ping", max_output_tokens=20
)
print("litellm response=", json.dumps(response, indent=4, default=str))
# Use the helper function to validate the response
validate_responses_api_response(response, final_chunk=True)
@pytest.mark.parametrize("sync_mode", [True])
@pytest.mark.asyncio
async def test_basic_openai_responses_api_streaming(sync_mode):
litellm._turn_on_debug()
if sync_mode:
response = litellm.responses(
model="gpt-4o",
input="Basic ping",
stream=True,
)
for event in response:
print("litellm response=", json.dumps(event, indent=4, default=str))
else:
response = await litellm.aresponses(
model="gpt-4o",
input="Basic ping",
stream=True,
)
async for event in response:
print("litellm response=", json.dumps(event, indent=4, default=str))
class TestCustomLogger(CustomLogger):
def __init__(
self,
):
self.standard_logging_object: Optional[StandardLoggingPayload] = None
async def async_log_success_event(self, kwargs, response_obj, start_time, end_time):
print("in async_log_success_event")
print("kwargs=", json.dumps(kwargs, indent=4, default=str))
self.standard_logging_object = kwargs["standard_logging_object"]
pass
def validate_standard_logging_payload(
slp: StandardLoggingPayload, response: ResponsesAPIResponse, request_model: str
):
"""
Validate that a StandardLoggingPayload object matches the expected response
Args:
slp (StandardLoggingPayload): The standard logging payload object to validate
response (dict): The litellm response to compare against
request_model (str): The model name that was requested
"""
# Validate payload exists
assert slp is not None, "Standard logging payload should not be None"
# Validate token counts
print("response=", json.dumps(response, indent=4, default=str))
assert (
slp["prompt_tokens"] == response["usage"]["input_tokens"]
), "Prompt tokens mismatch"
assert (
slp["completion_tokens"] == response["usage"]["output_tokens"]
), "Completion tokens mismatch"
assert (
slp["total_tokens"]
== response["usage"]["input_tokens"] + response["usage"]["output_tokens"]
), "Total tokens mismatch"
# Validate spend and response metadata
assert slp["response_cost"] > 0, "Response cost should be greater than 0"
assert slp["id"] == response["id"], "Response ID mismatch"
assert slp["model"] == request_model, "Model name mismatch"
# Validate messages
assert slp["messages"] == [{"content": "hi", "role": "user"}], "Messages mismatch"
# Validate complete response structure
validate_responses_match(slp["response"], response)
@pytest.mark.asyncio
async def test_basic_openai_responses_api_streaming_with_logging():
litellm._turn_on_debug()
litellm.set_verbose = True
test_custom_logger = TestCustomLogger()
litellm.callbacks = [test_custom_logger]
request_model = "gpt-4o"
response = await litellm.aresponses(
model=request_model,
input="hi",
stream=True,
)
final_response: Optional[ResponseCompletedEvent] = None
async for event in response:
if event.type == "response.completed":
final_response = event
print("litellm response=", json.dumps(event, indent=4, default=str))
print("sleeping for 2 seconds...")
await asyncio.sleep(2)
print(
"standard logging payload=",
json.dumps(test_custom_logger.standard_logging_object, indent=4, default=str),
)
assert final_response is not None
assert test_custom_logger.standard_logging_object is not None
validate_standard_logging_payload(
slp=test_custom_logger.standard_logging_object,
response=final_response.response,
request_model=request_model,
)
def validate_responses_match(slp_response, litellm_response):
"""Validate that the standard logging payload OpenAI response matches the litellm response"""
# Validate core fields
assert slp_response["id"] == litellm_response["id"], "ID mismatch"
assert slp_response["model"] == litellm_response["model"], "Model mismatch"
assert (
slp_response["created_at"] == litellm_response["created_at"]
), "Created at mismatch"
# Validate usage
assert (
slp_response["usage"]["input_tokens"]
== litellm_response["usage"]["input_tokens"]
), "Input tokens mismatch"
assert (
slp_response["usage"]["output_tokens"]
== litellm_response["usage"]["output_tokens"]
), "Output tokens mismatch"
assert (
slp_response["usage"]["total_tokens"]
== litellm_response["usage"]["total_tokens"]
), "Total tokens mismatch"
# Validate output/messages
assert len(slp_response["output"]) == len(
litellm_response["output"]
), "Output length mismatch"
for slp_msg, litellm_msg in zip(slp_response["output"], litellm_response["output"]):
assert slp_msg["role"] == litellm_msg.role, "Message role mismatch"
# Access the content's text field for the litellm response
litellm_content = litellm_msg.content[0].text if litellm_msg.content else ""
assert (
slp_msg["content"][0]["text"] == litellm_content
), f"Message content mismatch. Expected {litellm_content}, Got {slp_msg['content']}"
assert slp_msg["status"] == litellm_msg.status, "Message status mismatch"
@pytest.mark.asyncio
async def test_basic_openai_responses_api_non_streaming_with_logging():
litellm._turn_on_debug()
litellm.set_verbose = True
test_custom_logger = TestCustomLogger()
litellm.callbacks = [test_custom_logger]
request_model = "gpt-4o"
response = await litellm.aresponses(
model=request_model,
input="hi",
)
print("litellm response=", json.dumps(response, indent=4, default=str))
print("response hidden params=", response._hidden_params)
print("sleeping for 2 seconds...")
await asyncio.sleep(2)
print(
"standard logging payload=",
json.dumps(test_custom_logger.standard_logging_object, indent=4, default=str),
)
assert response is not None
assert test_custom_logger.standard_logging_object is not None
validate_standard_logging_payload(
test_custom_logger.standard_logging_object, response, request_model
)
def validate_stream_event(event):
"""
Validate that a streaming event from litellm.responses() or litellm.aresponses()
with stream=True conforms to the expected structure based on its event type.
Args:
event: The streaming event object to validate
Raises:
AssertionError: If the event doesn't match the expected structure for its type
"""
# Common validation for all event types
assert hasattr(event, "type"), "Event should have a 'type' attribute"
# Type-specific validation
if event.type == "response.created" or event.type == "response.in_progress":
assert hasattr(
event, "response"
), f"{event.type} event should have a 'response' attribute"
validate_responses_api_response(event.response, final_chunk=False)
elif event.type == "response.completed":
assert hasattr(
event, "response"
), "response.completed event should have a 'response' attribute"
validate_responses_api_response(event.response, final_chunk=True)
# Usage is guaranteed only on the completed event
assert (
"usage" in event.response
), "response.completed event should have usage information"
print("Usage in event.response=", event.response["usage"])
assert isinstance(event.response["usage"], ResponseAPIUsage)
elif event.type == "response.failed" or event.type == "response.incomplete":
assert hasattr(
event, "response"
), f"{event.type} event should have a 'response' attribute"
elif (
event.type == "response.output_item.added"
or event.type == "response.output_item.done"
):
assert hasattr(
event, "output_index"
), f"{event.type} event should have an 'output_index' attribute"
assert hasattr(
event, "item"
), f"{event.type} event should have an 'item' attribute"
elif (
event.type == "response.content_part.added"
or event.type == "response.content_part.done"
):
assert hasattr(
event, "item_id"
), f"{event.type} event should have an 'item_id' attribute"
assert hasattr(
event, "output_index"
), f"{event.type} event should have an 'output_index' attribute"
assert hasattr(
event, "content_index"
), f"{event.type} event should have a 'content_index' attribute"
assert hasattr(
event, "part"
), f"{event.type} event should have a 'part' attribute"
elif event.type == "response.output_text.delta":
assert hasattr(
event, "item_id"
), f"{event.type} event should have an 'item_id' attribute"
assert hasattr(
event, "output_index"
), f"{event.type} event should have an 'output_index' attribute"
assert hasattr(
event, "content_index"
), f"{event.type} event should have a 'content_index' attribute"
assert hasattr(
event, "delta"
), f"{event.type} event should have a 'delta' attribute"
elif event.type == "response.output_text.annotation.added":
assert hasattr(
event, "item_id"
), f"{event.type} event should have an 'item_id' attribute"
assert hasattr(
event, "output_index"
), f"{event.type} event should have an 'output_index' attribute"
assert hasattr(
event, "content_index"
), f"{event.type} event should have a 'content_index' attribute"
assert hasattr(
event, "annotation_index"
), f"{event.type} event should have an 'annotation_index' attribute"
assert hasattr(
event, "annotation"
), f"{event.type} event should have an 'annotation' attribute"
elif event.type == "response.output_text.done":
assert hasattr(
event, "item_id"
), f"{event.type} event should have an 'item_id' attribute"
assert hasattr(
event, "output_index"
), f"{event.type} event should have an 'output_index' attribute"
assert hasattr(
event, "content_index"
), f"{event.type} event should have a 'content_index' attribute"
assert hasattr(
event, "text"
), f"{event.type} event should have a 'text' attribute"
elif event.type == "response.refusal.delta":
assert hasattr(
event, "item_id"
), f"{event.type} event should have an 'item_id' attribute"
assert hasattr(
event, "output_index"
), f"{event.type} event should have an 'output_index' attribute"
assert hasattr(
event, "content_index"
), f"{event.type} event should have a 'content_index' attribute"
assert hasattr(
event, "delta"
), f"{event.type} event should have a 'delta' attribute"
elif event.type == "response.refusal.done":
assert hasattr(
event, "item_id"
), f"{event.type} event should have an 'item_id' attribute"
assert hasattr(
event, "output_index"
), f"{event.type} event should have an 'output_index' attribute"
assert hasattr(
event, "content_index"
), f"{event.type} event should have a 'content_index' attribute"
assert hasattr(
event, "refusal"
), f"{event.type} event should have a 'refusal' attribute"
elif event.type == "response.function_call_arguments.delta":
assert hasattr(
event, "item_id"
), f"{event.type} event should have an 'item_id' attribute"
assert hasattr(
event, "output_index"
), f"{event.type} event should have an 'output_index' attribute"
assert hasattr(
event, "delta"
), f"{event.type} event should have a 'delta' attribute"
elif event.type == "response.function_call_arguments.done":
assert hasattr(
event, "item_id"
), f"{event.type} event should have an 'item_id' attribute"
assert hasattr(
event, "output_index"
), f"{event.type} event should have an 'output_index' attribute"
assert hasattr(
event, "arguments"
), f"{event.type} event should have an 'arguments' attribute"
elif event.type in [
"response.file_search_call.in_progress",
"response.file_search_call.searching",
"response.file_search_call.completed",
"response.web_search_call.in_progress",
"response.web_search_call.searching",
"response.web_search_call.completed",
]:
assert hasattr(
event, "output_index"
), f"{event.type} event should have an 'output_index' attribute"
assert hasattr(
event, "item_id"
), f"{event.type} event should have an 'item_id' attribute"
elif event.type == "error":
assert hasattr(
event, "message"
), "Error event should have a 'message' attribute"
return True # Return True if validation passes
@pytest.mark.parametrize("sync_mode", [True, False])
@pytest.mark.asyncio
async def test_openai_responses_api_streaming_validation(sync_mode):
"""Test that validates each streaming event from the responses API"""
litellm._turn_on_debug()
event_types_seen = set()
if sync_mode:
response = litellm.responses(
model="gpt-4o",
input="Tell me about artificial intelligence in 3 sentences.",
stream=True,
)
for event in response:
print(f"Validating event type: {event.type}")
validate_stream_event(event)
event_types_seen.add(event.type)
else:
response = await litellm.aresponses(
model="gpt-4o",
input="Tell me about artificial intelligence in 3 sentences.",
stream=True,
)
async for event in response:
print(f"Validating event type: {event.type}")
validate_stream_event(event)
event_types_seen.add(event.type)
# At minimum, we should see these core event types
required_events = {"response.created", "response.completed"}
missing_events = required_events - event_types_seen
assert not missing_events, f"Missing required event types: {missing_events}"
print(f"Successfully validated all event types: {event_types_seen}")

View file

@ -992,8 +992,8 @@ def test_anthropic_thinking_output(model):
@pytest.mark.parametrize( @pytest.mark.parametrize(
"model", "model",
[ [
"anthropic/claude-3-7-sonnet-20250219", # "anthropic/claude-3-7-sonnet-20250219",
# "bedrock/us.anthropic.claude-3-7-sonnet-20250219-v1:0", "bedrock/us.anthropic.claude-3-7-sonnet-20250219-v1:0",
# "bedrock/invoke/us.anthropic.claude-3-7-sonnet-20250219-v1:0", # "bedrock/invoke/us.anthropic.claude-3-7-sonnet-20250219-v1:0",
], ],
) )
@ -1011,8 +1011,11 @@ def test_anthropic_thinking_output_stream(model):
reasoning_content_exists = False reasoning_content_exists = False
signature_block_exists = False signature_block_exists = False
tool_call_exists = False
for chunk in resp: for chunk in resp:
print(f"chunk 2: {chunk}") print(f"chunk 2: {chunk}")
if chunk.choices[0].delta.tool_calls:
tool_call_exists = True
if ( if (
hasattr(chunk.choices[0].delta, "thinking_blocks") hasattr(chunk.choices[0].delta, "thinking_blocks")
and chunk.choices[0].delta.thinking_blocks is not None and chunk.choices[0].delta.thinking_blocks is not None
@ -1025,6 +1028,7 @@ def test_anthropic_thinking_output_stream(model):
print(chunk.choices[0].delta.thinking_blocks[0]) print(chunk.choices[0].delta.thinking_blocks[0])
if chunk.choices[0].delta.thinking_blocks[0].get("signature"): if chunk.choices[0].delta.thinking_blocks[0].get("signature"):
signature_block_exists = True signature_block_exists = True
assert not tool_call_exists
assert reasoning_content_exists assert reasoning_content_exists
assert signature_block_exists assert signature_block_exists
except litellm.Timeout: except litellm.Timeout:

View file

@ -956,7 +956,7 @@ def test_bedrock_ptu():
@pytest.mark.asyncio @pytest.mark.asyncio
async def test_bedrock_extra_headers(): async def test_bedrock_custom_api_base():
""" """
Check if a url with 'modelId' passed in, is created correctly Check if a url with 'modelId' passed in, is created correctly
@ -994,6 +994,44 @@ async def test_bedrock_extra_headers():
mock_client_post.assert_called_once() mock_client_post.assert_called_once()
@pytest.mark.parametrize(
"model",
[
"anthropic.claude-3-sonnet-20240229-v1:0",
"bedrock/invoke/anthropic.claude-3-sonnet-20240229-v1:0",
],
)
@pytest.mark.asyncio
async def test_bedrock_extra_headers(model):
"""
Relevant Issue: https://github.com/BerriAI/litellm/issues/9106
"""
client = AsyncHTTPHandler()
with patch.object(client, "post", new=AsyncMock()) as mock_client_post:
litellm.set_verbose = True
from openai.types.chat import ChatCompletion
try:
response = await litellm.acompletion(
model=model,
messages=[{"role": "user", "content": "What's AWS?"}],
client=client,
extra_headers={"test": "hello world", "Authorization": "my-test-key"},
)
except Exception as e:
print(f"error: {e}")
print(f"mock_client_post.call_args.kwargs: {mock_client_post.call_args.kwargs}")
assert "test" in mock_client_post.call_args.kwargs["headers"]
assert mock_client_post.call_args.kwargs["headers"]["test"] == "hello world"
assert (
mock_client_post.call_args.kwargs["headers"]["Authorization"]
== "my-test-key"
)
mock_client_post.assert_called_once()
@pytest.mark.asyncio @pytest.mark.asyncio
async def test_bedrock_custom_prompt_template(): async def test_bedrock_custom_prompt_template():
""" """

View file

@ -1205,3 +1205,35 @@ def test_context_window_exceeded_error_from_litellm_proxy():
} }
with pytest.raises(litellm.ContextWindowExceededError): with pytest.raises(litellm.ContextWindowExceededError):
extract_and_raise_litellm_exception(**args) extract_and_raise_litellm_exception(**args)
@pytest.mark.parametrize("sync_mode", [True, False])
@pytest.mark.parametrize("stream_mode", [True, False])
@pytest.mark.parametrize("model", ["azure/gpt-4o"]) # "gpt-4o-mini",
@pytest.mark.asyncio
async def test_exception_bubbling_up(sync_mode, stream_mode, model):
"""
make sure code, param, and type are bubbled up
"""
import litellm
litellm.set_verbose = True
with pytest.raises(Exception) as exc_info:
if sync_mode:
litellm.completion(
model=model,
messages=[{"role": "usera", "content": "hi"}],
stream=stream_mode,
sync_stream=sync_mode,
)
else:
await litellm.acompletion(
model=model,
messages=[{"role": "usera", "content": "hi"}],
stream=stream_mode,
sync_stream=sync_mode,
)
assert exc_info.value.code == "invalid_value"
assert exc_info.value.param is not None
assert exc_info.value.type == "invalid_request_error"

View file

@ -329,3 +329,71 @@ async def test_aaapass_through_endpoint_pass_through_keys_langfuse(
setattr( setattr(
litellm.proxy.proxy_server, "proxy_logging_obj", original_proxy_logging_obj litellm.proxy.proxy_server, "proxy_logging_obj", original_proxy_logging_obj
) )
@pytest.mark.asyncio
async def test_pass_through_endpoint_bing(client, monkeypatch):
import litellm
captured_requests = []
async def mock_bing_request(*args, **kwargs):
captured_requests.append((args, kwargs))
mock_response = httpx.Response(
200,
json={
"_type": "SearchResponse",
"queryContext": {"originalQuery": "bob barker"},
"webPages": {
"webSearchUrl": "https://www.bing.com/search?q=bob+barker",
"totalEstimatedMatches": 12000000,
"value": [],
},
},
)
mock_response.request = Mock(spec=httpx.Request)
return mock_response
monkeypatch.setattr("httpx.AsyncClient.request", mock_bing_request)
# Define a pass-through endpoint
pass_through_endpoints = [
{
"path": "/bing/search",
"target": "https://api.bing.microsoft.com/v7.0/search?setLang=en-US&mkt=en-US",
"headers": {"Ocp-Apim-Subscription-Key": "XX"},
"forward_headers": True,
# Additional settings
"merge_query_params": True,
"auth": True,
},
{
"path": "/bing/search-no-merge-params",
"target": "https://api.bing.microsoft.com/v7.0/search?setLang=en-US&mkt=en-US",
"headers": {"Ocp-Apim-Subscription-Key": "XX"},
"forward_headers": True,
},
]
# Initialize the pass-through endpoint
await initialize_pass_through_endpoints(pass_through_endpoints)
general_settings: Optional[dict] = (
getattr(litellm.proxy.proxy_server, "general_settings", {}) or {}
)
general_settings.update({"pass_through_endpoints": pass_through_endpoints})
setattr(litellm.proxy.proxy_server, "general_settings", general_settings)
# Make 2 requests thru the pass-through endpoint
client.get("/bing/search?q=bob+barker")
client.get("/bing/search-no-merge-params?q=bob+barker")
first_transformed_url = captured_requests[0][1]["url"]
second_transformed_url = captured_requests[1][1]["url"]
# Assert the response
assert (
first_transformed_url
== "https://api.bing.microsoft.com/v7.0/search?q=bob+barker&setLang=en-US&mkt=en-US"
and second_transformed_url
== "https://api.bing.microsoft.com/v7.0/search?setLang=en-US&mkt=en-US"
)

View file

@ -9,7 +9,7 @@ from typing import Any, Optional, List, Literal
async def generate_key( async def generate_key(
session, models: Optional[List[str]] = None, team_id: Optional[str] = None session, models: Optional[List[str]] = None, team_id: Optional[str] = None
): ):
"""Helper function to generate a key with specific model access""" """Helper function to generate a key with specific model access controls"""
url = "http://0.0.0.0:4000/key/generate" url = "http://0.0.0.0:4000/key/generate"
headers = {"Authorization": "Bearer sk-1234", "Content-Type": "application/json"} headers = {"Authorization": "Bearer sk-1234", "Content-Type": "application/json"}
data = {} data = {}
@ -94,7 +94,7 @@ async def test_model_access_patterns(key_models, test_model, expect_success):
assert _error_body["type"] == "key_model_access_denied" assert _error_body["type"] == "key_model_access_denied"
assert _error_body["param"] == "model" assert _error_body["param"] == "model"
assert _error_body["code"] == "401" assert _error_body["code"] == "401"
assert "API Key not allowed to access model" in _error_body["message"] assert "key not allowed to access model" in _error_body["message"]
@pytest.mark.asyncio @pytest.mark.asyncio
@ -159,12 +159,6 @@ async def test_model_access_update():
"team_models, test_model, expect_success", "team_models, test_model, expect_success",
[ [
(["openai/*"], "anthropic/claude-2", False), # Non-matching model (["openai/*"], "anthropic/claude-2", False), # Non-matching model
(["gpt-4"], "gpt-4", True), # Exact model match
(["bedrock/*"], "bedrock/anthropic.claude-3", True), # Bedrock wildcard
(["bedrock/anthropic.*"], "bedrock/anthropic.claude-3", True), # Pattern match
(["bedrock/anthropic.*"], "bedrock/amazon.titan", False), # Pattern non-match
(None, "gpt-4", True), # No model restrictions
([], "gpt-4", True), # Empty model list
], ],
) )
@pytest.mark.asyncio @pytest.mark.asyncio
@ -285,6 +279,6 @@ def _validate_model_access_exception(
assert _error_body["param"] == "model" assert _error_body["param"] == "model"
assert _error_body["code"] == "401" assert _error_body["code"] == "401"
if expected_type == "key_model_access_denied": if expected_type == "key_model_access_denied":
assert "API Key not allowed to access model" in _error_body["message"] assert "key not allowed to access model" in _error_body["message"]
elif expected_type == "team_model_access_denied": elif expected_type == "team_model_access_denied":
assert "Team not allowed to access model" in _error_body["message"] assert "eam not allowed to access model" in _error_body["message"]

View file

@ -27,7 +27,7 @@ from litellm.proxy._types import (
) )
from litellm.proxy.utils import PrismaClient from litellm.proxy.utils import PrismaClient
from litellm.proxy.auth.auth_checks import ( from litellm.proxy.auth.auth_checks import (
_team_model_access_check, can_team_access_model,
_virtual_key_soft_budget_check, _virtual_key_soft_budget_check,
) )
from litellm.proxy.utils import ProxyLogging from litellm.proxy.utils import ProxyLogging
@ -427,9 +427,9 @@ async def test_virtual_key_max_budget_check(
], ],
) )
@pytest.mark.asyncio @pytest.mark.asyncio
async def test_team_model_access_check(model, team_models, expect_to_work): async def test_can_team_access_model(model, team_models, expect_to_work):
""" """
Test cases for _team_model_access_check: Test cases for can_team_access_model:
1. Exact model match 1. Exact model match
2. all-proxy-models access 2. all-proxy-models access
3. Wildcard (*) access 3. Wildcard (*) access
@ -438,16 +438,16 @@ async def test_team_model_access_check(model, team_models, expect_to_work):
6. Empty model list 6. Empty model list
7. None model list 7. None model list
""" """
try:
team_object = LiteLLM_TeamTable( team_object = LiteLLM_TeamTable(
team_id="test-team", team_id="test-team",
models=team_models, models=team_models,
) )
result = await can_team_access_model(
try:
_team_model_access_check(
model=model, model=model,
team_object=team_object, team_object=team_object,
llm_router=None, llm_router=None,
team_model_aliases=None,
) )
if not expect_to_work: if not expect_to_work:
pytest.fail( pytest.fail(

View file

@ -64,7 +64,7 @@ def test_load_config_with_custom_role_names():
@pytest.mark.asyncio @pytest.mark.asyncio
async def test_token_single_public_key(): async def test_token_single_public_key(monkeypatch):
import jwt import jwt
jwt_handler = JWTHandler() jwt_handler = JWTHandler()
@ -80,10 +80,15 @@ async def test_token_single_public_key():
] ]
} }
monkeypatch.setenv("JWT_PUBLIC_KEY_URL", "https://example.com/public-key")
# set cache # set cache
cache = DualCache() cache = DualCache()
await cache.async_set_cache(key="litellm_jwt_auth_keys", value=backend_keys["keys"]) await cache.async_set_cache(
key="litellm_jwt_auth_keys_https://example.com/public-key",
value=backend_keys["keys"],
)
jwt_handler.user_api_key_cache = cache jwt_handler.user_api_key_cache = cache
@ -99,7 +104,7 @@ async def test_token_single_public_key():
@pytest.mark.parametrize("audience", [None, "litellm-proxy"]) @pytest.mark.parametrize("audience", [None, "litellm-proxy"])
@pytest.mark.asyncio @pytest.mark.asyncio
async def test_valid_invalid_token(audience): async def test_valid_invalid_token(audience, monkeypatch):
""" """
Tests Tests
- valid token - valid token
@ -116,6 +121,8 @@ async def test_valid_invalid_token(audience):
if audience: if audience:
os.environ["JWT_AUDIENCE"] = audience os.environ["JWT_AUDIENCE"] = audience
monkeypatch.setenv("JWT_PUBLIC_KEY_URL", "https://example.com/public-key")
# Generate a private / public key pair using RSA algorithm # Generate a private / public key pair using RSA algorithm
key = rsa.generate_private_key( key = rsa.generate_private_key(
public_exponent=65537, key_size=2048, backend=default_backend() public_exponent=65537, key_size=2048, backend=default_backend()
@ -145,7 +152,9 @@ async def test_valid_invalid_token(audience):
# set cache # set cache
cache = DualCache() cache = DualCache()
await cache.async_set_cache(key="litellm_jwt_auth_keys", value=[public_jwk]) await cache.async_set_cache(
key="litellm_jwt_auth_keys_https://example.com/public-key", value=[public_jwk]
)
jwt_handler = JWTHandler() jwt_handler = JWTHandler()
@ -294,7 +303,7 @@ def team_token_tuple():
@pytest.mark.parametrize("audience", [None, "litellm-proxy"]) @pytest.mark.parametrize("audience", [None, "litellm-proxy"])
@pytest.mark.asyncio @pytest.mark.asyncio
async def test_team_token_output(prisma_client, audience): async def test_team_token_output(prisma_client, audience, monkeypatch):
import json import json
import uuid import uuid
@ -316,6 +325,8 @@ async def test_team_token_output(prisma_client, audience):
if audience: if audience:
os.environ["JWT_AUDIENCE"] = audience os.environ["JWT_AUDIENCE"] = audience
monkeypatch.setenv("JWT_PUBLIC_KEY_URL", "https://example.com/public-key")
# Generate a private / public key pair using RSA algorithm # Generate a private / public key pair using RSA algorithm
key = rsa.generate_private_key( key = rsa.generate_private_key(
public_exponent=65537, key_size=2048, backend=default_backend() public_exponent=65537, key_size=2048, backend=default_backend()
@ -345,7 +356,9 @@ async def test_team_token_output(prisma_client, audience):
# set cache # set cache
cache = DualCache() cache = DualCache()
await cache.async_set_cache(key="litellm_jwt_auth_keys", value=[public_jwk]) await cache.async_set_cache(
key="litellm_jwt_auth_keys_https://example.com/public-key", value=[public_jwk]
)
jwt_handler = JWTHandler() jwt_handler = JWTHandler()
@ -463,7 +476,7 @@ async def test_team_token_output(prisma_client, audience):
@pytest.mark.parametrize("user_id_upsert", [True, False]) @pytest.mark.parametrize("user_id_upsert", [True, False])
@pytest.mark.asyncio @pytest.mark.asyncio
async def aaaatest_user_token_output( async def aaaatest_user_token_output(
prisma_client, audience, team_id_set, default_team_id, user_id_upsert prisma_client, audience, team_id_set, default_team_id, user_id_upsert, monkeypatch
): ):
import uuid import uuid
@ -528,10 +541,14 @@ async def aaaatest_user_token_output(
assert isinstance(public_jwk, dict) assert isinstance(public_jwk, dict)
monkeypatch.setenv("JWT_PUBLIC_KEY_URL", "https://example.com/public-key")
# set cache # set cache
cache = DualCache() cache = DualCache()
await cache.async_set_cache(key="litellm_jwt_auth_keys", value=[public_jwk]) await cache.async_set_cache(
key="litellm_jwt_auth_keys_https://example.com/public-key", value=[public_jwk]
)
jwt_handler = JWTHandler() jwt_handler = JWTHandler()
@ -699,7 +716,9 @@ async def aaaatest_user_token_output(
@pytest.mark.parametrize("admin_allowed_routes", [None, ["ui_routes"]]) @pytest.mark.parametrize("admin_allowed_routes", [None, ["ui_routes"]])
@pytest.mark.parametrize("audience", [None, "litellm-proxy"]) @pytest.mark.parametrize("audience", [None, "litellm-proxy"])
@pytest.mark.asyncio @pytest.mark.asyncio
async def test_allowed_routes_admin(prisma_client, audience, admin_allowed_routes): async def test_allowed_routes_admin(
prisma_client, audience, admin_allowed_routes, monkeypatch
):
""" """
Add a check to make sure jwt proxy admin scope can access all allowed admin routes Add a check to make sure jwt proxy admin scope can access all allowed admin routes
@ -723,6 +742,8 @@ async def test_allowed_routes_admin(prisma_client, audience, admin_allowed_route
setattr(litellm.proxy.proxy_server, "prisma_client", prisma_client) setattr(litellm.proxy.proxy_server, "prisma_client", prisma_client)
await litellm.proxy.proxy_server.prisma_client.connect() await litellm.proxy.proxy_server.prisma_client.connect()
monkeypatch.setenv("JWT_PUBLIC_KEY_URL", "https://example.com/public-key")
os.environ.pop("JWT_AUDIENCE", None) os.environ.pop("JWT_AUDIENCE", None)
if audience: if audience:
os.environ["JWT_AUDIENCE"] = audience os.environ["JWT_AUDIENCE"] = audience
@ -756,7 +777,9 @@ async def test_allowed_routes_admin(prisma_client, audience, admin_allowed_route
# set cache # set cache
cache = DualCache() cache = DualCache()
await cache.async_set_cache(key="litellm_jwt_auth_keys", value=[public_jwk]) await cache.async_set_cache(
key="litellm_jwt_auth_keys_https://example.com/public-key", value=[public_jwk]
)
jwt_handler = JWTHandler() jwt_handler = JWTHandler()
@ -910,7 +933,9 @@ def mock_user_object(*args, **kwargs):
"user_email, should_work", [("ishaan@berri.ai", True), ("krrish@tassle.xyz", False)] "user_email, should_work", [("ishaan@berri.ai", True), ("krrish@tassle.xyz", False)]
) )
@pytest.mark.asyncio @pytest.mark.asyncio
async def test_allow_access_by_email(public_jwt_key, user_email, should_work): async def test_allow_access_by_email(
public_jwt_key, user_email, should_work, monkeypatch
):
""" """
Allow anyone with an `@xyz.com` email make a request to the proxy. Allow anyone with an `@xyz.com` email make a request to the proxy.
@ -925,10 +950,14 @@ async def test_allow_access_by_email(public_jwt_key, user_email, should_work):
public_jwk = public_jwt_key["public_jwk"] public_jwk = public_jwt_key["public_jwk"]
private_key = public_jwt_key["private_key"] private_key = public_jwt_key["private_key"]
monkeypatch.setenv("JWT_PUBLIC_KEY_URL", "https://example.com/public-key")
# set cache # set cache
cache = DualCache() cache = DualCache()
await cache.async_set_cache(key="litellm_jwt_auth_keys", value=[public_jwk]) await cache.async_set_cache(
key="litellm_jwt_auth_keys_https://example.com/public-key", value=[public_jwk]
)
jwt_handler = JWTHandler() jwt_handler = JWTHandler()
@ -1074,7 +1103,7 @@ async def test_end_user_jwt_auth(monkeypatch):
] ]
cache.set_cache( cache.set_cache(
key="litellm_jwt_auth_keys", key="litellm_jwt_auth_keys_https://example.com/public-key",
value=keys, value=keys,
) )

View file

@ -826,7 +826,7 @@ async def test_jwt_user_api_key_auth_builder_enforce_rbac(enforce_rbac, monkeypa
] ]
local_cache.set_cache( local_cache.set_cache(
key="litellm_jwt_auth_keys", key="litellm_jwt_auth_keys_my-fake-url",
value=keys, value=keys,
) )

View file

@ -308,7 +308,7 @@ async def test_chat_completion():
model="gpt-4", model="gpt-4",
messages=[{"role": "user", "content": "Hello!"}], messages=[{"role": "user", "content": "Hello!"}],
) )
assert "API Key not allowed to access model." in str(e) assert "key not allowed to access model." in str(e)
@pytest.mark.asyncio @pytest.mark.asyncio

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

View file

@ -1 +1 @@
<!DOCTYPE html><html id="__next_error__"><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width, initial-scale=1"/><link rel="preload" as="script" fetchPriority="low" href="/ui/_next/static/chunks/webpack-75a5453f51d60261.js"/><script src="/ui/_next/static/chunks/fd9d1056-524b80e1a6b8bb06.js" async=""></script><script src="/ui/_next/static/chunks/117-883150efc583d711.js" async=""></script><script src="/ui/_next/static/chunks/main-app-475d6efe4080647d.js" async=""></script><title>LiteLLM Dashboard</title><meta name="description" content="LiteLLM Proxy Admin UI"/><link rel="icon" href="/ui/favicon.ico" type="image/x-icon" sizes="16x16"/><meta name="next-size-adjust"/><script src="/ui/_next/static/chunks/polyfills-42372ed130431b0a.js" noModule=""></script></head><body><script src="/ui/_next/static/chunks/webpack-75a5453f51d60261.js" async=""></script><script>(self.__next_f=self.__next_f||[]).push([0]);self.__next_f.push([2,null])</script><script>self.__next_f.push([1,"1:HL[\"/ui/_next/static/media/a34f9d1faa5f3315-s.p.woff2\",\"font\",{\"crossOrigin\":\"\",\"type\":\"font/woff2\"}]\n2:HL[\"/ui/_next/static/css/86f6cc749f6b8493.css\",\"style\"]\n3:HL[\"/ui/_next/static/css/f41c66e22715ab00.css\",\"style\"]\n"])</script><script>self.__next_f.push([1,"4:I[12846,[],\"\"]\n6:I[19107,[],\"ClientPageRoot\"]\n7:I[92222,[\"665\",\"static/chunks/3014691f-0b72c78cfebbd712.js\",\"990\",\"static/chunks/13b76428-ebdf3012af0e4489.js\",\"441\",\"static/chunks/441-79926bf2b9d89e04.js\",\"261\",\"static/chunks/261-e48c2ac6ff0b811c.js\",\"899\",\"static/chunks/899-354f59ecde307dfa.js\",\"914\",\"static/chunks/914-e17acab83d0eadb5.js\",\"250\",\"static/chunks/250-51513f2f6dabf571.js\",\"699\",\"static/chunks/699-6b82f8e7b98ca1a3.js\",\"931\",\"static/chunks/app/page-b36633214e76cfd1.js\"],\"default\",1]\n8:I[4707,[],\"\"]\n9:I[36423,[],\"\"]\nb:I[61060,[],\"\"]\nc:[]\n"])</script><script>self.__next_f.push([1,"0:[\"$\",\"$L4\",null,{\"buildId\":\"rCxUxULLkHhl5KoPY9DHv\",\"assetPrefix\":\"/ui\",\"urlParts\":[\"\",\"\"],\"initialTree\":[\"\",{\"children\":[\"__PAGE__\",{}]},\"$undefined\",\"$undefined\",true],\"initialSeedData\":[\"\",{\"children\":[\"__PAGE__\",{},[[\"$L5\",[\"$\",\"$L6\",null,{\"props\":{\"params\":{},\"searchParams\":{}},\"Component\":\"$7\"}],null],null],null]},[[[[\"$\",\"link\",\"0\",{\"rel\":\"stylesheet\",\"href\":\"/ui/_next/static/css/86f6cc749f6b8493.css\",\"precedence\":\"next\",\"crossOrigin\":\"$undefined\"}],[\"$\",\"link\",\"1\",{\"rel\":\"stylesheet\",\"href\":\"/ui/_next/static/css/f41c66e22715ab00.css\",\"precedence\":\"next\",\"crossOrigin\":\"$undefined\"}]],[\"$\",\"html\",null,{\"lang\":\"en\",\"children\":[\"$\",\"body\",null,{\"className\":\"__className_cf7686\",\"children\":[\"$\",\"$L8\",null,{\"parallelRouterKey\":\"children\",\"segmentPath\":[\"children\"],\"error\":\"$undefined\",\"errorStyles\":\"$undefined\",\"errorScripts\":\"$undefined\",\"template\":[\"$\",\"$L9\",null,{}],\"templateStyles\":\"$undefined\",\"templateScripts\":\"$undefined\",\"notFound\":[[\"$\",\"title\",null,{\"children\":\"404: This page could not be found.\"}],[\"$\",\"div\",null,{\"style\":{\"fontFamily\":\"system-ui,\\\"Segoe UI\\\",Roboto,Helvetica,Arial,sans-serif,\\\"Apple Color Emoji\\\",\\\"Segoe UI Emoji\\\"\",\"height\":\"100vh\",\"textAlign\":\"center\",\"display\":\"flex\",\"flexDirection\":\"column\",\"alignItems\":\"center\",\"justifyContent\":\"center\"},\"children\":[\"$\",\"div\",null,{\"children\":[[\"$\",\"style\",null,{\"dangerouslySetInnerHTML\":{\"__html\":\"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}\"}}],[\"$\",\"h1\",null,{\"className\":\"next-error-h1\",\"style\":{\"display\":\"inline-block\",\"margin\":\"0 20px 0 0\",\"padding\":\"0 23px 0 0\",\"fontSize\":24,\"fontWeight\":500,\"verticalAlign\":\"top\",\"lineHeight\":\"49px\"},\"children\":\"404\"}],[\"$\",\"div\",null,{\"style\":{\"display\":\"inline-block\"},\"children\":[\"$\",\"h2\",null,{\"style\":{\"fontSize\":14,\"fontWeight\":400,\"lineHeight\":\"49px\",\"margin\":0},\"children\":\"This page could not be found.\"}]}]]}]}]],\"notFoundStyles\":[]}]}]}]],null],null],\"couldBeIntercepted\":false,\"initialHead\":[null,\"$La\"],\"globalErrorComponent\":\"$b\",\"missingSlots\":\"$Wc\"}]\n"])</script><script>self.__next_f.push([1,"a:[[\"$\",\"meta\",\"0\",{\"name\":\"viewport\",\"content\":\"width=device-width, initial-scale=1\"}],[\"$\",\"meta\",\"1\",{\"charSet\":\"utf-8\"}],[\"$\",\"title\",\"2\",{\"children\":\"LiteLLM Dashboard\"}],[\"$\",\"meta\",\"3\",{\"name\":\"description\",\"content\":\"LiteLLM Proxy Admin UI\"}],[\"$\",\"link\",\"4\",{\"rel\":\"icon\",\"href\":\"/ui/favicon.ico\",\"type\":\"image/x-icon\",\"sizes\":\"16x16\"}],[\"$\",\"meta\",\"5\",{\"name\":\"next-size-adjust\"}]]\n5:null\n"])</script></body></html> <!DOCTYPE html><html id="__next_error__"><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width, initial-scale=1"/><link rel="preload" as="script" fetchPriority="low" href="/ui/_next/static/chunks/webpack-75a5453f51d60261.js"/><script src="/ui/_next/static/chunks/fd9d1056-524b80e1a6b8bb06.js" async=""></script><script src="/ui/_next/static/chunks/117-883150efc583d711.js" async=""></script><script src="/ui/_next/static/chunks/main-app-475d6efe4080647d.js" async=""></script><title>LiteLLM Dashboard</title><meta name="description" content="LiteLLM Proxy Admin UI"/><link rel="icon" href="/ui/favicon.ico" type="image/x-icon" sizes="16x16"/><meta name="next-size-adjust"/><script src="/ui/_next/static/chunks/polyfills-42372ed130431b0a.js" noModule=""></script></head><body><script src="/ui/_next/static/chunks/webpack-75a5453f51d60261.js" async=""></script><script>(self.__next_f=self.__next_f||[]).push([0]);self.__next_f.push([2,null])</script><script>self.__next_f.push([1,"1:HL[\"/ui/_next/static/media/a34f9d1faa5f3315-s.p.woff2\",\"font\",{\"crossOrigin\":\"\",\"type\":\"font/woff2\"}]\n2:HL[\"/ui/_next/static/css/86f6cc749f6b8493.css\",\"style\"]\n3:HL[\"/ui/_next/static/css/b6d997482399c7e1.css\",\"style\"]\n"])</script><script>self.__next_f.push([1,"4:I[12846,[],\"\"]\n6:I[19107,[],\"ClientPageRoot\"]\n7:I[62177,[\"665\",\"static/chunks/3014691f-0b72c78cfebbd712.js\",\"990\",\"static/chunks/13b76428-ebdf3012af0e4489.js\",\"441\",\"static/chunks/441-79926bf2b9d89e04.js\",\"261\",\"static/chunks/261-cb27c20c4f8ec4c6.js\",\"899\",\"static/chunks/899-354f59ecde307dfa.js\",\"157\",\"static/chunks/157-cf7bc8b3ae1b80ba.js\",\"250\",\"static/chunks/250-51513f2f6dabf571.js\",\"699\",\"static/chunks/699-6b82f8e7b98ca1a3.js\",\"931\",\"static/chunks/app/page-a25b75c267486fe2.js\"],\"default\",1]\n8:I[4707,[],\"\"]\n9:I[36423,[],\"\"]\nb:I[61060,[],\"\"]\nc:[]\n"])</script><script>self.__next_f.push([1,"0:[\"$\",\"$L4\",null,{\"buildId\":\"i92Qc9kkJSCtCgV3DDmdu\",\"assetPrefix\":\"/ui\",\"urlParts\":[\"\",\"\"],\"initialTree\":[\"\",{\"children\":[\"__PAGE__\",{}]},\"$undefined\",\"$undefined\",true],\"initialSeedData\":[\"\",{\"children\":[\"__PAGE__\",{},[[\"$L5\",[\"$\",\"$L6\",null,{\"props\":{\"params\":{},\"searchParams\":{}},\"Component\":\"$7\"}],null],null],null]},[[[[\"$\",\"link\",\"0\",{\"rel\":\"stylesheet\",\"href\":\"/ui/_next/static/css/86f6cc749f6b8493.css\",\"precedence\":\"next\",\"crossOrigin\":\"$undefined\"}],[\"$\",\"link\",\"1\",{\"rel\":\"stylesheet\",\"href\":\"/ui/_next/static/css/b6d997482399c7e1.css\",\"precedence\":\"next\",\"crossOrigin\":\"$undefined\"}]],[\"$\",\"html\",null,{\"lang\":\"en\",\"children\":[\"$\",\"body\",null,{\"className\":\"__className_cf7686\",\"children\":[\"$\",\"$L8\",null,{\"parallelRouterKey\":\"children\",\"segmentPath\":[\"children\"],\"error\":\"$undefined\",\"errorStyles\":\"$undefined\",\"errorScripts\":\"$undefined\",\"template\":[\"$\",\"$L9\",null,{}],\"templateStyles\":\"$undefined\",\"templateScripts\":\"$undefined\",\"notFound\":[[\"$\",\"title\",null,{\"children\":\"404: This page could not be found.\"}],[\"$\",\"div\",null,{\"style\":{\"fontFamily\":\"system-ui,\\\"Segoe UI\\\",Roboto,Helvetica,Arial,sans-serif,\\\"Apple Color Emoji\\\",\\\"Segoe UI Emoji\\\"\",\"height\":\"100vh\",\"textAlign\":\"center\",\"display\":\"flex\",\"flexDirection\":\"column\",\"alignItems\":\"center\",\"justifyContent\":\"center\"},\"children\":[\"$\",\"div\",null,{\"children\":[[\"$\",\"style\",null,{\"dangerouslySetInnerHTML\":{\"__html\":\"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}\"}}],[\"$\",\"h1\",null,{\"className\":\"next-error-h1\",\"style\":{\"display\":\"inline-block\",\"margin\":\"0 20px 0 0\",\"padding\":\"0 23px 0 0\",\"fontSize\":24,\"fontWeight\":500,\"verticalAlign\":\"top\",\"lineHeight\":\"49px\"},\"children\":\"404\"}],[\"$\",\"div\",null,{\"style\":{\"display\":\"inline-block\"},\"children\":[\"$\",\"h2\",null,{\"style\":{\"fontSize\":14,\"fontWeight\":400,\"lineHeight\":\"49px\",\"margin\":0},\"children\":\"This page could not be found.\"}]}]]}]}]],\"notFoundStyles\":[]}]}]}]],null],null],\"couldBeIntercepted\":false,\"initialHead\":[null,\"$La\"],\"globalErrorComponent\":\"$b\",\"missingSlots\":\"$Wc\"}]\n"])</script><script>self.__next_f.push([1,"a:[[\"$\",\"meta\",\"0\",{\"name\":\"viewport\",\"content\":\"width=device-width, initial-scale=1\"}],[\"$\",\"meta\",\"1\",{\"charSet\":\"utf-8\"}],[\"$\",\"title\",\"2\",{\"children\":\"LiteLLM Dashboard\"}],[\"$\",\"meta\",\"3\",{\"name\":\"description\",\"content\":\"LiteLLM Proxy Admin UI\"}],[\"$\",\"link\",\"4\",{\"rel\":\"icon\",\"href\":\"/ui/favicon.ico\",\"type\":\"image/x-icon\",\"sizes\":\"16x16\"}],[\"$\",\"meta\",\"5\",{\"name\":\"next-size-adjust\"}]]\n5:null\n"])</script></body></html>

View file

@ -1,7 +1,7 @@
2:I[19107,[],"ClientPageRoot"] 2:I[19107,[],"ClientPageRoot"]
3:I[92222,["665","static/chunks/3014691f-0b72c78cfebbd712.js","990","static/chunks/13b76428-ebdf3012af0e4489.js","441","static/chunks/441-79926bf2b9d89e04.js","261","static/chunks/261-e48c2ac6ff0b811c.js","899","static/chunks/899-354f59ecde307dfa.js","914","static/chunks/914-e17acab83d0eadb5.js","250","static/chunks/250-51513f2f6dabf571.js","699","static/chunks/699-6b82f8e7b98ca1a3.js","931","static/chunks/app/page-b36633214e76cfd1.js"],"default",1] 3:I[62177,["665","static/chunks/3014691f-0b72c78cfebbd712.js","990","static/chunks/13b76428-ebdf3012af0e4489.js","441","static/chunks/441-79926bf2b9d89e04.js","261","static/chunks/261-cb27c20c4f8ec4c6.js","899","static/chunks/899-354f59ecde307dfa.js","157","static/chunks/157-cf7bc8b3ae1b80ba.js","250","static/chunks/250-51513f2f6dabf571.js","699","static/chunks/699-6b82f8e7b98ca1a3.js","931","static/chunks/app/page-a25b75c267486fe2.js"],"default",1]
4:I[4707,[],""] 4:I[4707,[],""]
5:I[36423,[],""] 5:I[36423,[],""]
0:["rCxUxULLkHhl5KoPY9DHv",[[["",{"children":["__PAGE__",{}]},"$undefined","$undefined",true],["",{"children":["__PAGE__",{},[["$L1",["$","$L2",null,{"props":{"params":{},"searchParams":{}},"Component":"$3"}],null],null],null]},[[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/86f6cc749f6b8493.css","precedence":"next","crossOrigin":"$undefined"}],["$","link","1",{"rel":"stylesheet","href":"/ui/_next/static/css/f41c66e22715ab00.css","precedence":"next","crossOrigin":"$undefined"}]],["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_cf7686","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[]}]}]}]],null],null],["$L6",null]]]] 0:["i92Qc9kkJSCtCgV3DDmdu",[[["",{"children":["__PAGE__",{}]},"$undefined","$undefined",true],["",{"children":["__PAGE__",{},[["$L1",["$","$L2",null,{"props":{"params":{},"searchParams":{}},"Component":"$3"}],null],null],null]},[[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/86f6cc749f6b8493.css","precedence":"next","crossOrigin":"$undefined"}],["$","link","1",{"rel":"stylesheet","href":"/ui/_next/static/css/b6d997482399c7e1.css","precedence":"next","crossOrigin":"$undefined"}]],["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_cf7686","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[]}]}]}]],null],null],["$L6",null]]]]
6:[["$","meta","0",{"name":"viewport","content":"width=device-width, initial-scale=1"}],["$","meta","1",{"charSet":"utf-8"}],["$","title","2",{"children":"LiteLLM Dashboard"}],["$","meta","3",{"name":"description","content":"LiteLLM Proxy Admin UI"}],["$","link","4",{"rel":"icon","href":"/ui/favicon.ico","type":"image/x-icon","sizes":"16x16"}],["$","meta","5",{"name":"next-size-adjust"}]] 6:[["$","meta","0",{"name":"viewport","content":"width=device-width, initial-scale=1"}],["$","meta","1",{"charSet":"utf-8"}],["$","title","2",{"children":"LiteLLM Dashboard"}],["$","meta","3",{"name":"description","content":"LiteLLM Proxy Admin UI"}],["$","link","4",{"rel":"icon","href":"/ui/favicon.ico","type":"image/x-icon","sizes":"16x16"}],["$","meta","5",{"name":"next-size-adjust"}]]
1:null 1:null

File diff suppressed because one or more lines are too long

View file

@ -1,7 +1,7 @@
2:I[19107,[],"ClientPageRoot"] 2:I[19107,[],"ClientPageRoot"]
3:I[52829,["441","static/chunks/441-79926bf2b9d89e04.js","261","static/chunks/261-e48c2ac6ff0b811c.js","250","static/chunks/250-51513f2f6dabf571.js","699","static/chunks/699-6b82f8e7b98ca1a3.js","418","static/chunks/app/model_hub/page-6f97b95f1023b0e9.js"],"default",1] 3:I[52829,["441","static/chunks/441-79926bf2b9d89e04.js","261","static/chunks/261-cb27c20c4f8ec4c6.js","250","static/chunks/250-51513f2f6dabf571.js","699","static/chunks/699-6b82f8e7b98ca1a3.js","418","static/chunks/app/model_hub/page-6f97b95f1023b0e9.js"],"default",1]
4:I[4707,[],""] 4:I[4707,[],""]
5:I[36423,[],""] 5:I[36423,[],""]
0:["rCxUxULLkHhl5KoPY9DHv",[[["",{"children":["model_hub",{"children":["__PAGE__",{}]}]},"$undefined","$undefined",true],["",{"children":["model_hub",{"children":["__PAGE__",{},[["$L1",["$","$L2",null,{"props":{"params":{},"searchParams":{}},"Component":"$3"}],null],null],null]},[null,["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children","model_hub","children"],"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":"$undefined","notFoundStyles":"$undefined"}]],null]},[[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/86f6cc749f6b8493.css","precedence":"next","crossOrigin":"$undefined"}],["$","link","1",{"rel":"stylesheet","href":"/ui/_next/static/css/f41c66e22715ab00.css","precedence":"next","crossOrigin":"$undefined"}]],["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_cf7686","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[]}]}]}]],null],null],["$L6",null]]]] 0:["i92Qc9kkJSCtCgV3DDmdu",[[["",{"children":["model_hub",{"children":["__PAGE__",{}]}]},"$undefined","$undefined",true],["",{"children":["model_hub",{"children":["__PAGE__",{},[["$L1",["$","$L2",null,{"props":{"params":{},"searchParams":{}},"Component":"$3"}],null],null],null]},[null,["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children","model_hub","children"],"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":"$undefined","notFoundStyles":"$undefined"}]],null]},[[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/86f6cc749f6b8493.css","precedence":"next","crossOrigin":"$undefined"}],["$","link","1",{"rel":"stylesheet","href":"/ui/_next/static/css/b6d997482399c7e1.css","precedence":"next","crossOrigin":"$undefined"}]],["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_cf7686","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[]}]}]}]],null],null],["$L6",null]]]]
6:[["$","meta","0",{"name":"viewport","content":"width=device-width, initial-scale=1"}],["$","meta","1",{"charSet":"utf-8"}],["$","title","2",{"children":"LiteLLM Dashboard"}],["$","meta","3",{"name":"description","content":"LiteLLM Proxy Admin UI"}],["$","link","4",{"rel":"icon","href":"/ui/favicon.ico","type":"image/x-icon","sizes":"16x16"}],["$","meta","5",{"name":"next-size-adjust"}]] 6:[["$","meta","0",{"name":"viewport","content":"width=device-width, initial-scale=1"}],["$","meta","1",{"charSet":"utf-8"}],["$","title","2",{"children":"LiteLLM Dashboard"}],["$","meta","3",{"name":"description","content":"LiteLLM Proxy Admin UI"}],["$","link","4",{"rel":"icon","href":"/ui/favicon.ico","type":"image/x-icon","sizes":"16x16"}],["$","meta","5",{"name":"next-size-adjust"}]]
1:null 1:null

File diff suppressed because one or more lines are too long

View file

@ -2,6 +2,6 @@
3:I[12011,["665","static/chunks/3014691f-0b72c78cfebbd712.js","441","static/chunks/441-79926bf2b9d89e04.js","899","static/chunks/899-354f59ecde307dfa.js","250","static/chunks/250-51513f2f6dabf571.js","461","static/chunks/app/onboarding/page-a31bc08c35f01c0a.js"],"default",1] 3:I[12011,["665","static/chunks/3014691f-0b72c78cfebbd712.js","441","static/chunks/441-79926bf2b9d89e04.js","899","static/chunks/899-354f59ecde307dfa.js","250","static/chunks/250-51513f2f6dabf571.js","461","static/chunks/app/onboarding/page-a31bc08c35f01c0a.js"],"default",1]
4:I[4707,[],""] 4:I[4707,[],""]
5:I[36423,[],""] 5:I[36423,[],""]
0:["rCxUxULLkHhl5KoPY9DHv",[[["",{"children":["onboarding",{"children":["__PAGE__",{}]}]},"$undefined","$undefined",true],["",{"children":["onboarding",{"children":["__PAGE__",{},[["$L1",["$","$L2",null,{"props":{"params":{},"searchParams":{}},"Component":"$3"}],null],null],null]},[null,["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children","onboarding","children"],"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":"$undefined","notFoundStyles":"$undefined"}]],null]},[[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/86f6cc749f6b8493.css","precedence":"next","crossOrigin":"$undefined"}],["$","link","1",{"rel":"stylesheet","href":"/ui/_next/static/css/f41c66e22715ab00.css","precedence":"next","crossOrigin":"$undefined"}]],["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_cf7686","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[]}]}]}]],null],null],["$L6",null]]]] 0:["i92Qc9kkJSCtCgV3DDmdu",[[["",{"children":["onboarding",{"children":["__PAGE__",{}]}]},"$undefined","$undefined",true],["",{"children":["onboarding",{"children":["__PAGE__",{},[["$L1",["$","$L2",null,{"props":{"params":{},"searchParams":{}},"Component":"$3"}],null],null],null]},[null,["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children","onboarding","children"],"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":"$undefined","notFoundStyles":"$undefined"}]],null]},[[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/86f6cc749f6b8493.css","precedence":"next","crossOrigin":"$undefined"}],["$","link","1",{"rel":"stylesheet","href":"/ui/_next/static/css/b6d997482399c7e1.css","precedence":"next","crossOrigin":"$undefined"}]],["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_cf7686","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[]}]}]}]],null],null],["$L6",null]]]]
6:[["$","meta","0",{"name":"viewport","content":"width=device-width, initial-scale=1"}],["$","meta","1",{"charSet":"utf-8"}],["$","title","2",{"children":"LiteLLM Dashboard"}],["$","meta","3",{"name":"description","content":"LiteLLM Proxy Admin UI"}],["$","link","4",{"rel":"icon","href":"/ui/favicon.ico","type":"image/x-icon","sizes":"16x16"}],["$","meta","5",{"name":"next-size-adjust"}]] 6:[["$","meta","0",{"name":"viewport","content":"width=device-width, initial-scale=1"}],["$","meta","1",{"charSet":"utf-8"}],["$","title","2",{"children":"LiteLLM Dashboard"}],["$","meta","3",{"name":"description","content":"LiteLLM Proxy Admin UI"}],["$","link","4",{"rel":"icon","href":"/ui/favicon.ico","type":"image/x-icon","sizes":"16x16"}],["$","meta","5",{"name":"next-size-adjust"}]]
1:null 1:null

View file

@ -215,6 +215,7 @@ export default function CreateKeyPage() {
userEmail={userEmail} userEmail={userEmail}
setProxySettings={setProxySettings} setProxySettings={setProxySettings}
proxySettings={proxySettings} proxySettings={proxySettings}
accessToken={accessToken}
/> />
<div className="flex flex-1 overflow-auto"> <div className="flex flex-1 overflow-auto">
<div className="mt-8"> <div className="mt-8">

View file

@ -23,7 +23,7 @@ const ProviderSpecificFields: React.FC<ProviderSpecificFieldsProps> = ({
console.log(`type of selectedProviderEnum: ${typeof selectedProviderEnum}`); console.log(`type of selectedProviderEnum: ${typeof selectedProviderEnum}`);
return ( return (
<> <>
{selectedProviderEnum === Providers.OpenAI && ( {selectedProviderEnum === Providers.OpenAI || selectedProviderEnum === Providers.OpenAI_Text && (
<> <>
<Form.Item <Form.Item
label="API Base" label="API Base"
@ -99,7 +99,8 @@ const ProviderSpecificFields: React.FC<ProviderSpecificFieldsProps> = ({
{(selectedProviderEnum === Providers.Azure || {(selectedProviderEnum === Providers.Azure ||
selectedProviderEnum === Providers.Azure_AI_Studio || selectedProviderEnum === Providers.Azure_AI_Studio ||
selectedProviderEnum === Providers.OpenAI_Compatible selectedProviderEnum === Providers.OpenAI_Compatible ||
selectedProviderEnum === Providers.OpenAI_Text_Compatible
) && ( ) && (
<Form.Item <Form.Item
rules={[{ required: true, message: "Required" }]} rules={[{ required: true, message: "Required" }]}

View file

@ -39,6 +39,9 @@ import { InfoCircleOutlined } from '@ant-design/icons';
import { Tooltip } from 'antd'; import { Tooltip } from 'antd';
import Createuser from "./create_user_button"; import Createuser from "./create_user_button";
import debounce from 'lodash/debounce'; import debounce from 'lodash/debounce';
import { rolesWithWriteAccess } from '../utils/roles';
const { Option } = Select; const { Option } = Select;
@ -335,9 +338,11 @@ const CreateKey: React.FC<CreateKeyProps> = ({
return ( return (
<div> <div>
{userRole && rolesWithWriteAccess.includes(userRole) && (
<Button className="mx-auto" onClick={() => setIsModalVisible(true)}> <Button className="mx-auto" onClick={() => setIsModalVisible(true)}>
+ Create New Key + Create New Key
</Button> </Button>
)}
<Modal <Modal
// title="Create Key" // title="Create Key"
visible={isModalVisible} visible={isModalVisible}

View file

@ -21,6 +21,7 @@ import { KeyResponse } from "./key_team_helpers/key_list";
import { Form, Input, InputNumber, message, Select } from "antd"; import { Form, Input, InputNumber, message, Select } from "antd";
import { KeyEditView } from "./key_edit_view"; import { KeyEditView } from "./key_edit_view";
import { RegenerateKeyModal } from "./regenerate_key_modal"; import { RegenerateKeyModal } from "./regenerate_key_modal";
import { rolesWithWriteAccess } from '../utils/roles';
interface KeyInfoViewProps { interface KeyInfoViewProps {
keyId: string; keyId: string;
@ -128,6 +129,7 @@ export default function KeyInfoView({ keyId, onClose, keyData, accessToken, user
<Title>{keyData.key_alias || "API Key"}</Title> <Title>{keyData.key_alias || "API Key"}</Title>
<Text className="text-gray-500 font-mono">{keyData.token}</Text> <Text className="text-gray-500 font-mono">{keyData.token}</Text>
</div> </div>
{userRole && rolesWithWriteAccess.includes(userRole) && (
<div className="flex gap-2"> <div className="flex gap-2">
<Button <Button
icon={RefreshIcon} icon={RefreshIcon}
@ -146,6 +148,7 @@ export default function KeyInfoView({ keyId, onClose, keyData, accessToken, user
Delete Key Delete Key
</Button> </Button>
</div> </div>
)}
</div> </div>
{/* Add RegenerateKeyModal */} {/* Add RegenerateKeyModal */}
@ -246,7 +249,7 @@ export default function KeyInfoView({ keyId, onClose, keyData, accessToken, user
<Card> <Card>
<div className="flex justify-between items-center mb-4"> <div className="flex justify-between items-center mb-4">
<Title>Key Settings</Title> <Title>Key Settings</Title>
{!isEditing && ( {!isEditing && userRole && rolesWithWriteAccess.includes(userRole) && (
<Button variant="light" onClick={() => setIsEditing(true)}> <Button variant="light" onClick={() => setIsEditing(true)}>
Edit Settings Edit Settings
</Button> </Button>

View file

@ -21,7 +21,7 @@ import {
ExperimentOutlined, ExperimentOutlined,
ThunderboltOutlined, ThunderboltOutlined,
} from '@ant-design/icons'; } from '@ant-design/icons';
import { old_admin_roles, v2_admin_role_names, all_admin_roles, rolesAllowedToSeeUsage } from '../utils/roles'; import { old_admin_roles, v2_admin_role_names, all_admin_roles, rolesAllowedToSeeUsage, rolesWithWriteAccess } from '../utils/roles';
const { Sider } = Layout; const { Sider } = Layout;
@ -45,7 +45,7 @@ interface MenuItem {
// Note: If a menu item does not have a role, it is visible to all roles. // Note: If a menu item does not have a role, it is visible to all roles.
const menuItems: MenuItem[] = [ const menuItems: MenuItem[] = [
{ key: "1", page: "api-keys", label: "Virtual Keys", icon: <KeyOutlined /> }, { key: "1", page: "api-keys", label: "Virtual Keys", icon: <KeyOutlined /> },
{ key: "3", page: "llm-playground", label: "Test Key", icon: <PlayCircleOutlined /> }, { key: "3", page: "llm-playground", label: "Test Key", icon: <PlayCircleOutlined />, roles: rolesWithWriteAccess },
{ key: "2", page: "models", label: "Models", icon: <BlockOutlined />, roles: all_admin_roles }, { key: "2", page: "models", label: "Models", icon: <BlockOutlined />, roles: all_admin_roles },
{ key: "4", page: "usage", label: "Usage", icon: <BarChartOutlined /> }, { key: "4", page: "usage", label: "Usage", icon: <BarChartOutlined /> },
{ key: "6", page: "teams", label: "Teams", icon: <TeamOutlined /> }, { key: "6", page: "teams", label: "Teams", icon: <TeamOutlined /> },

Some files were not shown because too many files have changed in this diff Show more