Merge branch 'main' into litellm_fix_team_model_access_checks

This commit is contained in:
Ishaan Jaff 2025-03-01 17:36:45 -08:00
commit f85d5afd58
213 changed files with 7650 additions and 1600 deletions

View file

@ -699,6 +699,7 @@ jobs:
pip install "pytest-cov==5.0.0"
pip install "pytest-asyncio==0.21.1"
pip install "respx==0.21.1"
pip install "hypercorn==0.17.3"
# Run pytest and generate JUnit XML report
- run:
name: Run tests
@ -1981,11 +1982,44 @@ jobs:
- run:
name: Wait for app to be ready
command: dockerize -wait http://localhost:4000 -timeout 5m
# Add Ruby installation and testing before the existing Node.js and Python tests
- run:
name: Install Ruby and Bundler
command: |
# Import GPG keys first
gpg --keyserver hkp://keyserver.ubuntu.com --recv-keys 409B6B1796C275462A1703113804BB82D39DC0E3 7D2BAF1CF37B13E2069D6956105BD0E739499BDB || {
curl -sSL https://rvm.io/mpapis.asc | gpg --import -
curl -sSL https://rvm.io/pkuczynski.asc | gpg --import -
}
# Install Ruby version manager (RVM)
curl -sSL https://get.rvm.io | bash -s stable
# Source RVM from the correct location
source $HOME/.rvm/scripts/rvm
# Install Ruby 3.2.2
rvm install 3.2.2
rvm use 3.2.2 --default
# Install latest Bundler
gem install bundler
- run:
name: Run Ruby tests
command: |
source $HOME/.rvm/scripts/rvm
cd tests/pass_through_tests/ruby_passthrough_tests
bundle install
bundle exec rspec
no_output_timeout: 30m
# New steps to run Node.js test
- run:
name: Install Node.js
command: |
export DEBIAN_FRONTEND=noninteractive
curl -fsSL https://deb.nodesource.com/setup_18.x | sudo -E bash -
sudo apt-get update
sudo apt-get install -y nodejs
node --version
npm --version

View file

@ -54,27 +54,29 @@ def interpret_results(csv_file):
def _get_docker_run_command_stable_release(release_version):
return f"""
\n\n
## Docker Run LiteLLM Proxy
\n\n
## Docker Run LiteLLM Proxy
```
docker run \\
-e STORE_MODEL_IN_DB=True \\
-p 4000:4000 \\
ghcr.io/berriai/litellm_stable_release_branch-{release_version}
```
docker run \\
-e STORE_MODEL_IN_DB=True \\
-p 4000:4000 \\
ghcr.io/berriai/litellm:litellm_stable_release_branch-{release_version}
```
"""
def _get_docker_run_command(release_version):
return f"""
\n\n
## Docker Run LiteLLM Proxy
\n\n
## Docker Run LiteLLM Proxy
```
docker run \\
-e STORE_MODEL_IN_DB=True \\
-p 4000:4000 \\
ghcr.io/berriai/litellm:main-{release_version}
```
docker run \\
-e STORE_MODEL_IN_DB=True \\
-p 4000:4000 \\
ghcr.io/berriai/litellm:main-{release_version}
```
"""

View file

@ -8,7 +8,7 @@ class MyUser(HttpUser):
def chat_completion(self):
headers = {
"Content-Type": "application/json",
"Authorization": "Bearer sk-ZoHqrLIs2-5PzJrqBaviAA",
"Authorization": "Bearer sk-8N1tLOOyH8TIxwOLahhIVg",
# Include any additional headers you may need for authentication, etc.
}

2
.gitignore vendored
View file

@ -77,3 +77,5 @@ litellm/proxy/_experimental/out/404.html
litellm/proxy/_experimental/out/model_hub.html
.mypy_cache/*
litellm/proxy/application.log
tests/llm_translation/vertex_test_account.json
tests/llm_translation/test_vertex_key.json

View file

@ -40,7 +40,7 @@ LiteLLM manages:
[**Jump to LiteLLM Proxy (LLM Gateway) Docs**](https://github.com/BerriAI/litellm?tab=readme-ov-file#openai-proxy---docs) <br>
[**Jump to Supported LLM Providers**](https://github.com/BerriAI/litellm?tab=readme-ov-file#supported-providers-docs)
🚨 **Stable Release:** Use docker images with the `-stable` tag. These have undergone 12 hour load tests, before being published.
🚨 **Stable Release:** Use docker images with the `-stable` tag. These have undergone 12 hour load tests, before being published. [More information about the release cycle here](https://docs.litellm.ai/docs/proxy/release_cycle)
Support for more providers. Missing a provider or LLM Platform, raise a [feature request](https://github.com/BerriAI/litellm/issues/new?assignees=&labels=enhancement&projects=&template=feature_request.yml&title=%5BFeature%5D%3A+).

View file

@ -18,7 +18,7 @@ type: application
# This is the chart version. This version number should be incremented each time you make changes
# to the chart and its templates, including the app version.
# Versions are expected to follow Semantic Versioning (https://semver.org/)
version: 0.3.0
version: 0.4.1
# This is the version number of the application being deployed. This version number should be
# incremented each time you make changes to the application. Versions are not expected to

View file

@ -48,6 +48,23 @@ spec:
{{- end }}
- name: DISABLE_SCHEMA_UPDATE
value: "false" # always run the migration from the Helm PreSync hook, override the value set
{{- with .Values.volumeMounts }}
volumeMounts:
{{- toYaml . | nindent 12 }}
{{- end }}
{{- with .Values.volumes }}
volumes:
{{- toYaml . | nindent 8 }}
{{- end }}
restartPolicy: OnFailure
{{- with .Values.affinity }}
affinity:
{{- toYaml . | nindent 8 }}
{{- end }}
{{- with .Values.tolerations }}
tolerations:
{{- toYaml . | nindent 8 }}
{{- end }}
ttlSecondsAfterFinished: {{ .Values.migrationJob.ttlSecondsAfterFinished }}
backoffLimit: {{ .Values.migrationJob.backoffLimit }}
{{- end }}

View file

@ -187,6 +187,7 @@ migrationJob:
backoffLimit: 4 # Backoff limit for Job restarts
disableSchemaUpdate: false # Skip schema migrations for specific environments. When True, the job will exit with code 0.
annotations: {}
ttlSecondsAfterFinished: 120
# Additional environment variables to be added to the deployment
envVars: {

View file

@ -0,0 +1,95 @@
# OpenAI Passthrough
Pass-through endpoints for `/openai`
## Overview
| Feature | Supported | Notes |
|-------|-------|-------|
| Cost Tracking | ❌ | Not supported |
| Logging | ✅ | Works across all integrations |
| Streaming | ✅ | Fully supported |
### When to use this?
- For 90% of your use cases, you should use the [native LiteLLM OpenAI Integration](https://docs.litellm.ai/docs/providers/openai) (`/chat/completions`, `/embeddings`, `/completions`, `/images`, `/batches`, etc.)
- Use this passthrough to call less popular or newer OpenAI endpoints that LiteLLM doesn't fully support yet, such as `/assistants`, `/threads`, `/vector_stores`
Simply replace `https://api.openai.com` with `LITELLM_PROXY_BASE_URL/openai`
## Usage Examples
### Assistants API
#### Create OpenAI Client
Make sure you do the following:
- Point `base_url` to your `LITELLM_PROXY_BASE_URL/openai`
- Use your `LITELLM_API_KEY` as the `api_key`
```python
import openai
client = openai.OpenAI(
base_url="http://0.0.0.0:4000/openai", # <your-proxy-url>/openai
api_key="sk-anything" # <your-proxy-api-key>
)
```
#### Create an Assistant
```python
# Create an assistant
assistant = client.beta.assistants.create(
name="Math Tutor",
instructions="You are a math tutor. Help solve equations.",
model="gpt-4o",
)
```
#### Create a Thread
```python
# Create a thread
thread = client.beta.threads.create()
```
#### Add a Message to the Thread
```python
# Add a message
message = client.beta.threads.messages.create(
thread_id=thread.id,
role="user",
content="Solve 3x + 11 = 14",
)
```
#### Run the Assistant
```python
# Create a run to get the assistant's response
run = client.beta.threads.runs.create(
thread_id=thread.id,
assistant_id=assistant.id,
)
# Check run status
run_status = client.beta.threads.runs.retrieve(
thread_id=thread.id,
run_id=run.id
)
```
#### Retrieve Messages
```python
# List messages after the run completes
messages = client.beta.threads.messages.list(
thread_id=thread.id
)
```
#### Delete the Assistant
```python
# Delete the assistant when done
client.beta.assistants.delete(assistant.id)
```

View file

@ -377,6 +377,121 @@ print(f"\nResponse: {resp}")
```
## Usage - 'thinking' / 'reasoning content'
This is currently only supported for Anthropic's Claude 3.7 Sonnet + Deepseek R1.
Works on v1.61.20+.
Returns 2 new fields in `message` and `delta` object:
- `reasoning_content` - string - The reasoning content of the response
- `thinking_blocks` - list of objects (Anthropic only) - The thinking blocks of the response
Each object has the following fields:
- `type` - Literal["thinking"] - The type of thinking block
- `thinking` - string - The thinking of the response. Also returned in `reasoning_content`
- `signature_delta` - string - A base64 encoded string, returned by Anthropic.
The `signature_delta` is required by Anthropic on subsequent calls, if 'thinking' content is passed in (only required to use `thinking` with tool calling). [Learn more](https://docs.anthropic.com/en/docs/build-with-claude/extended-thinking#understanding-thinking-blocks)
<Tabs>
<TabItem value="sdk" label="SDK">
```python
from litellm import completion
# set env
os.environ["AWS_ACCESS_KEY_ID"] = ""
os.environ["AWS_SECRET_ACCESS_KEY"] = ""
os.environ["AWS_REGION_NAME"] = ""
resp = completion(
model="bedrock/us.anthropic.claude-3-7-sonnet-20250219-v1:0",
messages=[{"role": "user", "content": "What is the capital of France?"}],
thinking={"type": "enabled", "budget_tokens": 1024},
)
print(resp)
```
</TabItem>
<TabItem value="proxy" label="PROXY">
1. Setup config.yaml
```yaml
model_list:
- model_name: bedrock-claude-3-7
litellm_params:
model: bedrock/us.anthropic.claude-3-7-sonnet-20250219-v1:0
thinking: {"type": "enabled", "budget_tokens": 1024} # 👈 EITHER HERE OR ON REQUEST
```
2. Start proxy
```bash
litellm --config /path/to/config.yaml
```
3. Test it!
```bash
curl http://0.0.0.0:4000/v1/chat/completions \
-H "Content-Type: application/json" \
-H "Authorization: Bearer <YOUR-LITELLM-KEY>" \
-d '{
"model": "bedrock-claude-3-7",
"messages": [{"role": "user", "content": "What is the capital of France?"}],
"thinking": {"type": "enabled", "budget_tokens": 1024} # 👈 EITHER HERE OR ON CONFIG.YAML
}'
```
</TabItem>
</Tabs>
**Expected Response**
Same as [Anthropic API response](../providers/anthropic#usage---thinking--reasoning_content).
```python
{
"id": "chatcmpl-c661dfd7-7530-49c9-b0cc-d5018ba4727d",
"created": 1740640366,
"model": "us.anthropic.claude-3-7-sonnet-20250219-v1:0",
"object": "chat.completion",
"system_fingerprint": null,
"choices": [
{
"finish_reason": "stop",
"index": 0,
"message": {
"content": "The capital of France is Paris. It's not only the capital city but also the largest city in France, serving as the country's major cultural, economic, and political center.",
"role": "assistant",
"tool_calls": null,
"function_call": null,
"reasoning_content": "The capital of France is Paris. This is a straightforward factual question.",
"thinking_blocks": [
{
"type": "thinking",
"thinking": "The capital of France is Paris. This is a straightforward factual question.",
"signature_delta": "EqoBCkgIARABGAIiQL2UoU0b1OHYi+yCHpBY7U6FQW8/FcoLewocJQPa2HnmLM+NECy50y44F/kD4SULFXi57buI9fAvyBwtyjlOiO0SDE3+r3spdg6PLOo9PBoMma2ku5OTAoR46j9VIjDRlvNmBvff7YW4WI9oU8XagaOBSxLPxElrhyuxppEn7m6bfT40dqBSTDrfiw4FYB4qEPETTI6TA6wtjGAAqmFqKTo="
}
]
}
}
],
"usage": {
"completion_tokens": 64,
"prompt_tokens": 42,
"total_tokens": 106,
"completion_tokens_details": null,
"prompt_tokens_details": null
}
}
```
## Usage - Bedrock Guardrails
Example of using [Bedrock Guardrails with LiteLLM](https://docs.aws.amazon.com/bedrock/latest/userguide/guardrails-use-converse-api.html)

View file

@ -23,14 +23,16 @@ import os
os.environ['CEREBRAS_API_KEY'] = ""
response = completion(
model="cerebras/meta/llama3-70b-instruct",
model="cerebras/llama3-70b-instruct",
messages=[
{
"role": "user",
"content": "What's the weather like in Boston today in Fahrenheit?",
"content": "What's the weather like in Boston today in Fahrenheit? (Write in JSON)",
}
],
max_tokens=10,
# The prompt should include JSON if 'json_object' is selected; otherwise, you will get error code 400.
response_format={ "type": "json_object" },
seed=123,
stop=["\n\n"],
@ -50,15 +52,17 @@ import os
os.environ['CEREBRAS_API_KEY'] = ""
response = completion(
model="cerebras/meta/llama3-70b-instruct",
model="cerebras/llama3-70b-instruct",
messages=[
{
"role": "user",
"content": "What's the weather like in Boston today in Fahrenheit?",
"content": "What's the weather like in Boston today in Fahrenheit? (Write in JSON)",
}
],
stream=True,
max_tokens=10,
# The prompt should include JSON if 'json_object' is selected; otherwise, you will get error code 400.
response_format={ "type": "json_object" },
seed=123,
stop=["\n\n"],

View file

@ -1,3 +1,6 @@
import Tabs from '@theme/Tabs';
import TabItem from '@theme/TabItem';
# Infinity
| Property | Details |
@ -12,6 +15,9 @@
```python
from litellm import rerank
import os
os.environ["INFINITY_API_BASE"] = "http://localhost:8080"
response = rerank(
model="infinity/rerank",
@ -65,3 +71,114 @@ curl http://0.0.0.0:4000/rerank \
```
## Supported Cohere Rerank API Params
| Param | Type | Description |
|-------|-------|-------|
| `query` | `str` | The query to rerank the documents against |
| `documents` | `list[str]` | The documents to rerank |
| `top_n` | `int` | The number of documents to return |
| `return_documents` | `bool` | Whether to return the documents in the response |
### Usage - Return Documents
<Tabs>
<TabItem value="sdk" label="SDK">
```python
response = rerank(
model="infinity/rerank",
query="What is the capital of France?",
documents=["Paris", "London", "Berlin", "Madrid"],
return_documents=True,
)
```
</TabItem>
<TabItem value="proxy" label="PROXY">
```bash
curl http://0.0.0.0:4000/rerank \
-H "Authorization: Bearer sk-1234" \
-H "Content-Type: application/json" \
-d '{
"model": "custom-infinity-rerank",
"query": "What is the capital of France?",
"documents": [
"Paris",
"London",
"Berlin",
"Madrid"
],
"return_documents": True,
}'
```
</TabItem>
</Tabs>
## Pass Provider-specific Params
Any unmapped params will be passed to the provider as-is.
<Tabs>
<TabItem value="sdk" label="SDK">
```python
from litellm import rerank
import os
os.environ["INFINITY_API_BASE"] = "http://localhost:8080"
response = rerank(
model="infinity/rerank",
query="What is the capital of France?",
documents=["Paris", "London", "Berlin", "Madrid"],
raw_scores=True, # 👈 PROVIDER-SPECIFIC PARAM
)
```
</TabItem>
<TabItem value="proxy" label="PROXY">
1. Setup config.yaml
```yaml
model_list:
- model_name: custom-infinity-rerank
litellm_params:
model: infinity/rerank
api_base: https://localhost:8080
raw_scores: True # 👈 EITHER SET PROVIDER-SPECIFIC PARAMS HERE OR IN REQUEST BODY
```
2. Start litellm
```bash
litellm --config /path/to/config.yaml
# RUNNING on http://0.0.0.0:4000
```
3. Test it!
```bash
curl http://0.0.0.0:4000/rerank \
-H "Authorization: Bearer sk-1234" \
-H "Content-Type: application/json" \
-d '{
"model": "custom-infinity-rerank",
"query": "What is the capital of the United States?",
"documents": [
"Carson City is the capital city of the American state of Nevada.",
"The Commonwealth of the Northern Mariana Islands is a group of islands in the Pacific Ocean. Its capital is Saipan.",
"Washington, D.C. is the capital of the United States.",
"Capital punishment has existed in the United States since before it was a country."
],
"raw_scores": True # 👈 PROVIDER-SPECIFIC PARAM
}'
```
</TabItem>
</Tabs>

View file

@ -3,13 +3,15 @@ import TabItem from '@theme/TabItem';
# LiteLLM Proxy (LLM Gateway)
:::tip
[LiteLLM Providers a **self hosted** proxy server (AI Gateway)](../simple_proxy) to call all the LLMs in the OpenAI format
| Property | Details |
|-------|-------|
| Description | LiteLLM Proxy is an OpenAI-compatible gateway that allows you to interact with multiple LLM providers through a unified API. Simply use the `litellm_proxy/` prefix before the model name to route your requests through the proxy. |
| Provider Route on LiteLLM | `litellm_proxy/` (add this prefix to the model name, to route any requests to litellm_proxy - e.g. `litellm_proxy/your-model-name`) |
| Setup LiteLLM Gateway | [LiteLLM Gateway ↗](../simple_proxy) |
| Supported Endpoints |`/chat/completions`, `/completions`, `/embeddings`, `/audio/speech`, `/audio/transcriptions`, `/images`, `/rerank` |
:::
**[LiteLLM Proxy](../simple_proxy) is OpenAI compatible**, you just need the `litellm_proxy/` prefix before the model
## Required Variables
@ -83,7 +85,76 @@ for chunk in response:
print(chunk)
```
## Embeddings
```python
import litellm
response = litellm.embedding(
model="litellm_proxy/your-embedding-model",
input="Hello world",
api_base="your-litellm-proxy-url",
api_key="your-litellm-proxy-api-key"
)
```
## Image Generation
```python
import litellm
response = litellm.image_generation(
model="litellm_proxy/dall-e-3",
prompt="A beautiful sunset over mountains",
api_base="your-litellm-proxy-url",
api_key="your-litellm-proxy-api-key"
)
```
## Audio Transcription
```python
import litellm
response = litellm.transcription(
model="litellm_proxy/whisper-1",
file="your-audio-file",
api_base="your-litellm-proxy-url",
api_key="your-litellm-proxy-api-key"
)
```
## Text to Speech
```python
import litellm
response = litellm.speech(
model="litellm_proxy/tts-1",
input="Hello world",
api_base="your-litellm-proxy-url",
api_key="your-litellm-proxy-api-key"
)
```
## Rerank
```python
import litellm
import litellm
response = litellm.rerank(
model="litellm_proxy/rerank-english-v2.0",
query="What is machine learning?",
documents=[
"Machine learning is a field of study in artificial intelligence",
"Biology is the study of living organisms"
],
api_base="your-litellm-proxy-url",
api_key="your-litellm-proxy-api-key"
)
```
## **Usage with Langchain, LLamaindex, OpenAI Js, Anthropic SDK, Instructor**
#### [Follow this doc to see how to use litellm proxy with langchain, llamaindex, anthropic etc](../proxy/user_keys)

View file

@ -2,11 +2,11 @@ import Tabs from '@theme/Tabs';
import TabItem from '@theme/TabItem';
# Sambanova
https://community.sambanova.ai/t/create-chat-completion-api/
https://cloud.sambanova.ai/
:::tip
**We support ALL Sambanova models, just set `model=sambanova/<any-model-on-sambanova>` as a prefix when sending litellm requests. For the complete supported model list, visit https://sambanova.ai/technology/models **
**We support ALL Sambanova models, just set `model=sambanova/<any-model-on-sambanova>` as a prefix when sending litellm requests. For the complete supported model list, visit https://docs.sambanova.ai/cloud/docs/get-started/supported-models **
:::
@ -27,12 +27,11 @@ response = completion(
messages=[
{
"role": "user",
"content": "What do you know about sambanova.ai",
"content": "What do you know about sambanova.ai. Give your response in json format",
}
],
max_tokens=10,
response_format={ "type": "json_object" },
seed=123,
stop=["\n\n"],
temperature=0.2,
top_p=0.9,
@ -54,13 +53,12 @@ response = completion(
messages=[
{
"role": "user",
"content": "What do you know about sambanova.ai",
"content": "What do you know about sambanova.ai. Give your response in json format",
}
],
stream=True,
max_tokens=10,
response_format={ "type": "json_object" },
seed=123,
stop=["\n\n"],
temperature=0.2,
top_p=0.9,

View file

@ -852,6 +852,7 @@ litellm.vertex_location = "us-central1 # Your Location
| claude-3-5-sonnet@20240620 | `completion('vertex_ai/claude-3-5-sonnet@20240620', messages)` |
| claude-3-sonnet@20240229 | `completion('vertex_ai/claude-3-sonnet@20240229', messages)` |
| claude-3-haiku@20240307 | `completion('vertex_ai/claude-3-haiku@20240307', messages)` |
| claude-3-7-sonnet@20250219 | `completion('vertex_ai/claude-3-7-sonnet@20250219', messages)` |
### Usage
@ -926,6 +927,119 @@ curl --location 'http://0.0.0.0:4000/chat/completions' \
</Tabs>
### Usage - `thinking` / `reasoning_content`
<Tabs>
<TabItem value="sdk" label="SDK">
```python
from litellm import completion
resp = completion(
model="vertex_ai/claude-3-7-sonnet-20250219",
messages=[{"role": "user", "content": "What is the capital of France?"}],
thinking={"type": "enabled", "budget_tokens": 1024},
)
```
</TabItem>
<TabItem value="proxy" label="PROXY">
1. Setup config.yaml
```yaml
- model_name: claude-3-7-sonnet-20250219
litellm_params:
model: vertex_ai/claude-3-7-sonnet-20250219
vertex_ai_project: "my-test-project"
vertex_ai_location: "us-west-1"
```
2. Start proxy
```bash
litellm --config /path/to/config.yaml
```
3. Test it!
```bash
curl http://0.0.0.0:4000/v1/chat/completions \
-H "Content-Type: application/json" \
-H "Authorization: Bearer <YOUR-LITELLM-KEY>" \
-d '{
"model": "claude-3-7-sonnet-20250219",
"messages": [{"role": "user", "content": "What is the capital of France?"}],
"thinking": {"type": "enabled", "budget_tokens": 1024}
}'
```
</TabItem>
</Tabs>
**Expected Response**
```python
ModelResponse(
id='chatcmpl-c542d76d-f675-4e87-8e5f-05855f5d0f5e',
created=1740470510,
model='claude-3-7-sonnet-20250219',
object='chat.completion',
system_fingerprint=None,
choices=[
Choices(
finish_reason='stop',
index=0,
message=Message(
content="The capital of France is Paris.",
role='assistant',
tool_calls=None,
function_call=None,
provider_specific_fields={
'citations': None,
'thinking_blocks': [
{
'type': 'thinking',
'thinking': 'The capital of France is Paris. This is a very straightforward factual question.',
'signature': 'EuYBCkQYAiJAy6...'
}
]
}
),
thinking_blocks=[
{
'type': 'thinking',
'thinking': 'The capital of France is Paris. This is a very straightforward factual question.',
'signature': 'EuYBCkQYAiJAy6AGB...'
}
],
reasoning_content='The capital of France is Paris. This is a very straightforward factual question.'
)
],
usage=Usage(
completion_tokens=68,
prompt_tokens=42,
total_tokens=110,
completion_tokens_details=None,
prompt_tokens_details=PromptTokensDetailsWrapper(
audio_tokens=None,
cached_tokens=0,
text_tokens=None,
image_tokens=None
),
cache_creation_input_tokens=0,
cache_read_input_tokens=0
)
)
```
## Llama 3 API
| Model Name | Function Call |

View file

@ -157,6 +157,98 @@ curl -L -X POST 'http://0.0.0.0:4000/embeddings' \
</TabItem>
</Tabs>
## Send Video URL to VLLM
Example Implementation from VLLM [here](https://github.com/vllm-project/vllm/pull/10020)
There are two ways to send a video url to VLLM:
1. Pass the video url directly
```
{"type": "video_url", "video_url": {"url": video_url}},
```
2. Pass the video data as base64
```
{"type": "video_url", "video_url": {"url": f"data:video/mp4;base64,{video_data_base64}"}}
```
<Tabs>
<TabItem value="sdk" label="SDK">
```python
from litellm import completion
response = completion(
model="hosted_vllm/qwen", # pass the vllm model name
messages=[
{
"role": "user",
"content": [
{
"type": "text",
"text": "Summarize the following video"
},
{
"type": "video_url",
"video_url": {
"url": "https://www.youtube.com/watch?v=dQw4w9WgXcQ"
}
}
]
}
],
api_base="https://hosted-vllm-api.co")
print(response)
```
</TabItem>
<TabItem value="proxy" label="PROXY">
1. Setup config.yaml
```yaml
model_list:
- model_name: my-model
litellm_params:
model: hosted_vllm/qwen # add hosted_vllm/ prefix to route as OpenAI provider
api_base: https://hosted-vllm-api.co # add api base for OpenAI compatible provider
```
2. Start the proxy
```bash
$ litellm --config /path/to/config.yaml
# RUNNING on http://0.0.0.0:4000
```
3. Test it!
```bash
curl -X POST http://0.0.0.0:4000/chat/completions \
-H "Authorization: Bearer sk-1234" \
-H "Content-Type: application/json" \
-d '{
"model": "my-model",
"messages": [
{"role": "user", "content":
[
{"type": "text", "text": "Summarize the following video"},
{"type": "video_url", "video_url": {"url": "https://www.youtube.com/watch?v=dQw4w9WgXcQ"}}
]
}
]
}'
```
</TabItem>
</Tabs>
## (Deprecated) for `vllm pip package`
### Using - `litellm.completion`

View file

@ -36,7 +36,7 @@ import TabItem from '@theme/TabItem';
- Virtual Key Rate Limit
- User Rate Limit
- Team Limit
- The `_PROXY_track_cost_callback` updates spend / usage in the LiteLLM database. [Here is everything tracked in the DB per request](https://github.com/BerriAI/litellm/blob/ba41a72f92a9abf1d659a87ec880e8e319f87481/schema.prisma#L172)
- The `_ProxyDBLogger` updates spend / usage in the LiteLLM database. [Here is everything tracked in the DB per request](https://github.com/BerriAI/litellm/blob/ba41a72f92a9abf1d659a87ec880e8e319f87481/schema.prisma#L172)
## Frequently Asked Questions

View file

@ -0,0 +1,12 @@
# Release Cycle
Litellm Proxy has the following release cycle:
- `v1.x.x-nightly`: These are releases which pass ci/cd.
- `v1.x.x.rc`: These are releases which pass ci/cd + [manual review](https://github.com/BerriAI/litellm/discussions/8495#discussioncomment-12180711).
- `v1.x.x`: These are releases which pass ci/cd + manual review + 3 days of production testing.
In production, we recommend using the latest `v1.x.x` release.
Follow our release notes [here](https://github.com/BerriAI/litellm/releases).

View file

@ -0,0 +1,357 @@
import Tabs from '@theme/Tabs';
import TabItem from '@theme/TabItem';
# 'Thinking' / 'Reasoning Content'
Supported Providers:
- Deepseek (`deepseek/`)
- Anthropic API (`anthropic/`)
- Bedrock (Anthropic + Deepseek) (`bedrock/`)
- Vertex AI (Anthropic) (`vertexai/`)
```python
"message": {
...
"reasoning_content": "The capital of France is Paris.",
"thinking_blocks": [
{
"type": "thinking",
"thinking": "The capital of France is Paris.",
"signature_delta": "EqoBCkgIARABGAIiQL2UoU0b1OHYi+..."
}
]
}
```
## Quick Start
<Tabs>
<TabItem value="sdk" label="SDK">
```python
from litellm import completion
import os
os.environ["ANTHROPIC_API_KEY"] = ""
response = completion(
model="anthropic/claude-3-7-sonnet-20250219",
messages=[
{"role": "user", "content": "What is the capital of France?"},
],
thinking={"type": "enabled", "budget_tokens": 1024} # 👈 REQUIRED FOR ANTHROPIC models (on `anthropic/`, `bedrock/`, `vertexai/`)
)
print(response.choices[0].message.content)
```
</TabItem>
<TabItem value="proxy" label="PROXY">
```bash
curl http://0.0.0.0:4000/v1/chat/completions \
-H "Content-Type: application/json" \
-H "Authorization: Bearer $LITELLM_KEY" \
-d '{
"model": "anthropic/claude-3-7-sonnet-20250219",
"messages": [
{
"role": "user",
"content": "What is the capital of France?"
}
],
"thinking": {"type": "enabled", "budget_tokens": 1024}
}'
```
</TabItem>
</Tabs>
**Expected Response**
```bash
{
"id": "3b66124d79a708e10c603496b363574c",
"choices": [
{
"finish_reason": "stop",
"index": 0,
"message": {
"content": " won the FIFA World Cup in 2022.",
"role": "assistant",
"tool_calls": null,
"function_call": null
}
}
],
"created": 1723323084,
"model": "deepseek/deepseek-chat",
"object": "chat.completion",
"system_fingerprint": "fp_7e0991cad4",
"usage": {
"completion_tokens": 12,
"prompt_tokens": 16,
"total_tokens": 28,
},
"service_tier": null
}
```
## Tool Calling with `thinking`
Here's how to use `thinking` blocks by Anthropic with tool calling.
<Tabs>
<TabItem value="sdk" label="SDK">
```python
litellm._turn_on_debug()
litellm.modify_params = True
model = "anthropic/claude-3-7-sonnet-20250219" # works across Anthropic, Bedrock, Vertex AI
# Step 1: send the conversation and available functions to the model
messages = [
{
"role": "user",
"content": "What's the weather like in San Francisco, Tokyo, and Paris? - give me 3 responses",
}
]
tools = [
{
"type": "function",
"function": {
"name": "get_current_weather",
"description": "Get the current weather in a given location",
"parameters": {
"type": "object",
"properties": {
"location": {
"type": "string",
"description": "The city and state",
},
"unit": {
"type": "string",
"enum": ["celsius", "fahrenheit"],
},
},
"required": ["location"],
},
},
}
]
response = litellm.completion(
model=model,
messages=messages,
tools=tools,
tool_choice="auto", # auto is default, but we'll be explicit
thinking={"type": "enabled", "budget_tokens": 1024},
)
print("Response\n", response)
response_message = response.choices[0].message
tool_calls = response_message.tool_calls
print("Expecting there to be 3 tool calls")
assert (
len(tool_calls) > 0
) # this has to call the function for SF, Tokyo and paris
# Step 2: check if the model wanted to call a function
print(f"tool_calls: {tool_calls}")
if tool_calls:
# Step 3: call the function
# Note: the JSON response may not always be valid; be sure to handle errors
available_functions = {
"get_current_weather": get_current_weather,
} # only one function in this example, but you can have multiple
messages.append(
response_message
) # extend conversation with assistant's reply
print("Response message\n", response_message)
# Step 4: send the info for each function call and function response to the model
for tool_call in tool_calls:
function_name = tool_call.function.name
if function_name not in available_functions:
# the model called a function that does not exist in available_functions - don't try calling anything
return
function_to_call = available_functions[function_name]
function_args = json.loads(tool_call.function.arguments)
function_response = function_to_call(
location=function_args.get("location"),
unit=function_args.get("unit"),
)
messages.append(
{
"tool_call_id": tool_call.id,
"role": "tool",
"name": function_name,
"content": function_response,
}
) # extend conversation with function response
print(f"messages: {messages}")
second_response = litellm.completion(
model=model,
messages=messages,
seed=22,
# tools=tools,
drop_params=True,
thinking={"type": "enabled", "budget_tokens": 1024},
) # get a new response from the model where it can see the function response
print("second response\n", second_response)
```
</TabItem>
<TabItem value="proxy" label="PROXY">
1. Setup config.yaml
```yaml
model_list:
- model_name: claude-3-7-sonnet-thinking
litellm_params:
model: anthropic/claude-3-7-sonnet-20250219
api_key: os.environ/ANTHROPIC_API_KEY
thinking: {
"type": "enabled",
"budget_tokens": 1024
}
```
2. Run proxy
```bash
litellm --config config.yaml
# RUNNING on http://0.0.0.0:4000
```
3. Make 1st call
```bash
curl http://0.0.0.0:4000/v1/chat/completions \
-H "Content-Type: application/json" \
-H "Authorization: Bearer $LITELLM_KEY" \
-d '{
"model": "claude-3-7-sonnet-thinking",
"messages": [
{"role": "user", "content": "What's the weather like in San Francisco, Tokyo, and Paris? - give me 3 responses"},
],
"tools": [
{
"type": "function",
"function": {
"name": "get_current_weather",
"description": "Get the current weather in a given location",
"parameters": {
"type": "object",
"properties": {
"location": {
"type": "string",
"description": "The city and state",
},
"unit": {
"type": "string",
"enum": ["celsius", "fahrenheit"],
},
},
"required": ["location"],
},
},
}
],
"tool_choice": "auto"
}'
```
4. Make 2nd call with tool call results
```bash
curl http://0.0.0.0:4000/v1/chat/completions \
-H "Content-Type: application/json" \
-H "Authorization: Bearer $LITELLM_KEY" \
-d '{
"model": "claude-3-7-sonnet-thinking",
"messages": [
{
"role": "user",
"content": "What\'s the weather like in San Francisco, Tokyo, and Paris? - give me 3 responses"
},
{
"role": "assistant",
"content": "I\'ll check the current weather for these three cities for you:",
"tool_calls": [
{
"index": 2,
"function": {
"arguments": "{\"location\": \"San Francisco\"}",
"name": "get_current_weather"
},
"id": "tooluse_mnqzmtWYRjCxUInuAdK7-w",
"type": "function"
}
],
"function_call": null,
"reasoning_content": "The user is asking for the current weather in three different locations: San Francisco, Tokyo, and Paris. I have access to the `get_current_weather` function that can provide this information.\n\nThe function requires a `location` parameter, and has an optional `unit` parameter. The user hasn't specified which unit they prefer (celsius or fahrenheit), so I'll use the default provided by the function.\n\nI need to make three separate function calls, one for each location:\n1. San Francisco\n2. Tokyo\n3. Paris\n\nThen I'll compile the results into a response with three distinct weather reports as requested by the user.",
"thinking_blocks": [
{
"type": "thinking",
"thinking": "The user is asking for the current weather in three different locations: San Francisco, Tokyo, and Paris. I have access to the `get_current_weather` function that can provide this information.\n\nThe function requires a `location` parameter, and has an optional `unit` parameter. The user hasn't specified which unit they prefer (celsius or fahrenheit), so I'll use the default provided by the function.\n\nI need to make three separate function calls, one for each location:\n1. San Francisco\n2. Tokyo\n3. Paris\n\nThen I'll compile the results into a response with three distinct weather reports as requested by the user.",
"signature_delta": "EqoBCkgIARABGAIiQCkBXENoyB+HstUOs/iGjG+bvDbIQRrxPsPpOSt5yDxX6iulZ/4K/w9Rt4J5Nb2+3XUYsyOH+CpZMfADYvItFR4SDPb7CmzoGKoolCMAJRoM62p1ZRASZhrD3swqIjAVY7vOAFWKZyPEJglfX/60+bJphN9W1wXR6rWrqn3MwUbQ5Mb/pnpeb10HMploRgUqEGKOd6fRKTkUoNDuAnPb55c="
}
],
"provider_specific_fields": {
"reasoningContentBlocks": [
{
"reasoningText": {
"signature": "EqoBCkgIARABGAIiQCkBXENoyB+HstUOs/iGjG+bvDbIQRrxPsPpOSt5yDxX6iulZ/4K/w9Rt4J5Nb2+3XUYsyOH+CpZMfADYvItFR4SDPb7CmzoGKoolCMAJRoM62p1ZRASZhrD3swqIjAVY7vOAFWKZyPEJglfX/60+bJphN9W1wXR6rWrqn3MwUbQ5Mb/pnpeb10HMploRgUqEGKOd6fRKTkUoNDuAnPb55c=",
"text": "The user is asking for the current weather in three different locations: San Francisco, Tokyo, and Paris. I have access to the `get_current_weather` function that can provide this information.\n\nThe function requires a `location` parameter, and has an optional `unit` parameter. The user hasn't specified which unit they prefer (celsius or fahrenheit), so I'll use the default provided by the function.\n\nI need to make three separate function calls, one for each location:\n1. San Francisco\n2. Tokyo\n3. Paris\n\nThen I'll compile the results into a response with three distinct weather reports as requested by the user."
}
}
]
}
},
{
"tool_call_id": "tooluse_mnqzmtWYRjCxUInuAdK7-w",
"role": "tool",
"name": "get_current_weather",
"content": "{\"location\": \"San Francisco\", \"temperature\": \"72\", \"unit\": \"fahrenheit\"}"
}
]
}'
```
</TabItem>
</Tabs>
## Switching between Anthropic + Deepseek models
Set `drop_params=True` to drop the 'thinking' blocks when swapping from Anthropic to Deepseek models. Suggest improvements to this approach [here](https://github.com/BerriAI/litellm/discussions/8927).
```python
litellm.drop_params = True # 👈 EITHER GLOBALLY or per request
# or per request
## Anthropic
response = litellm.completion(
model="anthropic/claude-3-7-sonnet-20250219",
messages=[{"role": "user", "content": "What is the capital of France?"}],
thinking={"type": "enabled", "budget_tokens": 1024},
drop_params=True,
)
## Deepseek
response = litellm.completion(
model="deepseek/deepseek-chat",
messages=[{"role": "user", "content": "What is the capital of France?"}],
thinking={"type": "enabled", "budget_tokens": 1024},
drop_params=True,
)
```
## Spec
These fields can be accessed via `response.choices[0].message.reasoning_content` and `response.choices[0].message.thinking_blocks`.
- `reasoning_content` - str: The reasoning content from the model. Returned across all providers.
- `thinking_blocks` - Optional[List[Dict[str, str]]]: A list of thinking blocks from the model. Only returned for Anthropic models.
- `type` - str: The type of thinking block.
- `thinking` - str: The thinking from the model.
- `signature_delta` - str: The signature delta from the model.

View file

@ -18,13 +18,6 @@ hide_table_of_contents: false
`alerting`, `prometheus`, `secret management`, `management endpoints`, `ui`, `prompt management`, `finetuning`, `batch`
:::note
v1.57.8-stable, is currently being tested. It will be released on 2025-01-12.
:::
## New / Updated Models
1. Mistral large pricing - https://github.com/BerriAI/litellm/pull/7452

View file

@ -0,0 +1,109 @@
---
title: v1.61.20-stable
slug: v1.61.20-stable
date: 2025-03-01T10:00:00
authors:
- name: Krrish Dholakia
title: CEO, LiteLLM
url: https://www.linkedin.com/in/krish-d/
image_url: https://media.licdn.com/dms/image/v2/D4D03AQGrlsJ3aqpHmQ/profile-displayphoto-shrink_400_400/B4DZSAzgP7HYAg-/0/1737327772964?e=1743638400&v=beta&t=39KOXMUFedvukiWWVPHf3qI45fuQD7lNglICwN31DrI
- name: Ishaan Jaffer
title: CTO, LiteLLM
url: https://www.linkedin.com/in/reffajnaahsi/
image_url: https://media.licdn.com/dms/image/v2/D4D03AQGiM7ZrUwqu_Q/profile-displayphoto-shrink_800_800/profile-displayphoto-shrink_800_800/0/1675971026692?e=1741824000&v=beta&t=eQnRdXPJo4eiINWTZARoYTfqh064pgZ-E21pQTSy8jc
tags: [llm translation, rerank, ui, thinking, reasoning_content, claude-3-7-sonnet]
hide_table_of_contents: false
---
import Image from '@theme/IdealImage';
# v1.61.20-stable
:::info
`v1.61.20-stable` will be live on 2025-02-04.
:::
These are the changes since `v1.61.13-stable`.
This release is primarily focused on:
- LLM Translation improvements (claude-3-7-sonnet + 'thinking'/'reasoning_content' support)
- UI improvements (add model flow, user management, etc)
## Demo Instance
Here's a Demo Instance to test changes:
- Instance: https://demo.litellm.ai/
- Login Credentials:
- Username: admin
- Password: sk-1234
## New Models / Updated Models
1. Anthropic 3-7 sonnet support + cost tracking (Anthropic API + Bedrock + Vertex AI + OpenRouter)
1. Anthropic API [Start here](https://docs.litellm.ai/docs/providers/anthropic#usage---thinking--reasoning_content)
2. Bedrock API [Start here](https://docs.litellm.ai/docs/providers/bedrock#usage---thinking--reasoning-content)
3. Vertex AI API [See here](../../docs/providers/vertex#usage---thinking--reasoning_content)
4. OpenRouter [See here](https://github.com/BerriAI/litellm/blob/ba5bdce50a0b9bc822de58c03940354f19a733ed/model_prices_and_context_window.json#L5626)
2. Gpt-4.5-preview support + cost tracking [See here](https://github.com/BerriAI/litellm/blob/ba5bdce50a0b9bc822de58c03940354f19a733ed/model_prices_and_context_window.json#L79)
3. Azure AI - Phi-4 cost tracking [See here](https://github.com/BerriAI/litellm/blob/ba5bdce50a0b9bc822de58c03940354f19a733ed/model_prices_and_context_window.json#L1773)
4. Claude-3.5-sonnet - vision support updated on Anthropic API [See here](https://github.com/BerriAI/litellm/blob/ba5bdce50a0b9bc822de58c03940354f19a733ed/model_prices_and_context_window.json#L2888)
5. Bedrock llama vision support [See here](https://github.com/BerriAI/litellm/blob/ba5bdce50a0b9bc822de58c03940354f19a733ed/model_prices_and_context_window.json#L7714)
6. Cerebras llama3.3-70b pricing [See here](https://github.com/BerriAI/litellm/blob/ba5bdce50a0b9bc822de58c03940354f19a733ed/model_prices_and_context_window.json#L2697)
## LLM Translation
1. Infinity Rerank - support returning documents when return_documents=True [Start here](../../docs/providers/infinity#usage---returning-documents)
2. Amazon Deepseek - `<think>` param extraction into reasoning_content [Start here](https://docs.litellm.ai/docs/providers/bedrock#bedrock-imported-models-deepseek-deepseek-r1)
3. Amazon Titan Embeddings - filter out aws_ params from request body [Start here](https://docs.litellm.ai/docs/providers/bedrock#bedrock-embedding)
4. Anthropic thinking + reasoning_content translation support (Anthropic API, Bedrock, Vertex AI) [Start here](https://docs.litellm.ai/docs/reasoning_content)
5. VLLM - support video_url [Start here](../../docs/providers/vllm#send-video-url-to-vllm)
6. Call proxy via litellm SDK: Support `litellm_proxy/` for embedding, image_generation, transcription, speech, rerank [Start here](https://docs.litellm.ai/docs/providers/litellm_proxy)
7. OpenAI Pass-through - allow using Assistants GET, DELETE on /openai pass through routes [Start here](https://docs.litellm.ai/docs/pass_through/openai_passthrough)
8. Message Translation - fix openai message for assistant msg if role is missing - openai allows this
9. O1/O3 - support drop_params for o3-mini and o1 parallel_tool_calls param (not supported currently) [See here](https://docs.litellm.ai/docs/completion/drop_params)
## Spend Tracking Improvements
1. Cost tracking for rerank via Bedrock [See PR](https://github.com/BerriAI/litellm/commit/b682dc4ec8fd07acf2f4c981d2721e36ae2a49c5)
2. Anthropic pass-through - fix race condition causing cost to not be tracked [See PR](https://github.com/BerriAI/litellm/pull/8874)
3. Anthropic pass-through: Ensure accurate token counting [See PR](https://github.com/BerriAI/litellm/pull/8880)
## Management Endpoints / UI
1. Models Page - Allow sorting models by created at
2. Models Page - Edit Model Flow Improvements
3. Models Page - Fix Adding Azure, Azure AI Studio models on UI
4. Internal Users Page - Allow Bulk Adding Internal Users on UI
5. Internal Users Page - Allow sorting users by created at
6. Virtual Keys Page - Allow searching for UserIDs on the dropdown when assigning a user to a team [See PR](https://github.com/BerriAI/litellm/pull/8844)
7. Virtual Keys Page - allow creating a user when assigning keys to users [See PR](https://github.com/BerriAI/litellm/pull/8844)
8. Model Hub Page - fix text overflow issue [See PR](https://github.com/BerriAI/litellm/pull/8749)
9. Admin Settings Page - Allow adding MSFT SSO on UI
10. Backend - don't allow creating duplicate internal users in DB
## Helm
1. support ttlSecondsAfterFinished on the migration job - [See PR](https://github.com/BerriAI/litellm/pull/8593)
2. enhance migrations job with additional configurable properties - [See PR](https://github.com/BerriAI/litellm/pull/8636)
## Logging / Guardrail Integrations
1. Arize Phoenix support
2. No-log - fix no-log param support on embedding calls
## Performance / Loadbalancing / Reliability improvements
1. Single Deployment Cooldown logic - Use allowed_fails or allowed_fail_policy if set [Start here](https://docs.litellm.ai/docs/routing#advanced-custom-retries-cooldowns-based-on-error-type)
## General Proxy Improvements
1. Hypercorn - fix reading / parsing request body
2. Windows - fix running proxy in windows
3. DD-Trace - fix dd-trace enablement on proxy
## Complete Git Diff
View the complete git diff [here](https://github.com/BerriAI/litellm/compare/v1.61.13-stable...v1.61.20-stable).

View file

@ -41,6 +41,7 @@ const sidebars = {
"proxy/deploy",
"proxy/prod",
"proxy/cli",
"proxy/release_cycle",
"proxy/model_management",
"proxy/health",
"proxy/debugging",
@ -242,6 +243,7 @@ const sidebars = {
"completion/document_understanding",
"completion/vision",
"completion/json_mode",
"reasoning_content",
"completion/prompt_caching",
"completion/predict_outputs",
"completion/prefix",
@ -303,6 +305,7 @@ const sidebars = {
"pass_through/vertex_ai",
"pass_through/google_ai_studio",
"pass_through/cohere",
"pass_through/openai_passthrough",
"pass_through/anthropic_completion",
"pass_through/bedrock",
"pass_through/assembly_ai",

View file

@ -53,6 +53,7 @@ from litellm.constants import (
cohere_embedding_models,
bedrock_embedding_models,
known_tokenizer_config,
BEDROCK_INVOKE_PROVIDERS_LITERAL,
)
from litellm.types.guardrails import GuardrailItem
from litellm.proxy._types import (
@ -361,17 +362,7 @@ BEDROCK_CONVERSE_MODELS = [
"meta.llama3-2-11b-instruct-v1:0",
"meta.llama3-2-90b-instruct-v1:0",
]
BEDROCK_INVOKE_PROVIDERS_LITERAL = Literal[
"cohere",
"anthropic",
"mistral",
"amazon",
"meta",
"llama",
"ai21",
"nova",
"deepseek_r1",
]
####### COMPLETION MODELS ###################
open_ai_chat_completion_models: List = []
open_ai_text_completion_models: List = []

View file

@ -13,26 +13,14 @@ import json
import time
import traceback
from enum import Enum
from typing import Any, Dict, List, Optional, Set, Union
from typing import Any, Dict, List, Optional, Union
from openai.types.audio.transcription_create_params import TranscriptionCreateParams
from openai.types.chat.completion_create_params import (
CompletionCreateParamsNonStreaming,
CompletionCreateParamsStreaming,
)
from openai.types.completion_create_params import (
CompletionCreateParamsNonStreaming as TextCompletionCreateParamsNonStreaming,
)
from openai.types.completion_create_params import (
CompletionCreateParamsStreaming as TextCompletionCreateParamsStreaming,
)
from openai.types.embedding_create_params import EmbeddingCreateParams
from pydantic import BaseModel
import litellm
from litellm._logging import verbose_logger
from litellm.litellm_core_utils.model_param_helper import ModelParamHelper
from litellm.types.caching import *
from litellm.types.rerank import RerankRequest
from litellm.types.utils import all_litellm_params
from .base_cache import BaseCache
@ -257,7 +245,7 @@ class Cache:
verbose_logger.debug("\nReturning preset cache key: %s", preset_cache_key)
return preset_cache_key
combined_kwargs = self._get_relevant_args_to_use_for_cache_key()
combined_kwargs = ModelParamHelper._get_all_llm_api_params()
litellm_param_kwargs = all_litellm_params
for param in kwargs:
if param in combined_kwargs:
@ -364,76 +352,6 @@ class Cache:
if "litellm_params" in kwargs:
kwargs["litellm_params"]["preset_cache_key"] = preset_cache_key
def _get_relevant_args_to_use_for_cache_key(self) -> Set[str]:
"""
Gets the supported kwargs for each call type and combines them
"""
chat_completion_kwargs = self._get_litellm_supported_chat_completion_kwargs()
text_completion_kwargs = self._get_litellm_supported_text_completion_kwargs()
embedding_kwargs = self._get_litellm_supported_embedding_kwargs()
transcription_kwargs = self._get_litellm_supported_transcription_kwargs()
rerank_kwargs = self._get_litellm_supported_rerank_kwargs()
exclude_kwargs = self._get_kwargs_to_exclude_from_cache_key()
combined_kwargs = chat_completion_kwargs.union(
text_completion_kwargs,
embedding_kwargs,
transcription_kwargs,
rerank_kwargs,
)
combined_kwargs = combined_kwargs.difference(exclude_kwargs)
return combined_kwargs
def _get_litellm_supported_chat_completion_kwargs(self) -> Set[str]:
"""
Get the litellm supported chat completion kwargs
This follows the OpenAI API Spec
"""
all_chat_completion_kwargs = set(
CompletionCreateParamsNonStreaming.__annotations__.keys()
).union(set(CompletionCreateParamsStreaming.__annotations__.keys()))
return all_chat_completion_kwargs
def _get_litellm_supported_text_completion_kwargs(self) -> Set[str]:
"""
Get the litellm supported text completion kwargs
This follows the OpenAI API Spec
"""
all_text_completion_kwargs = set(
TextCompletionCreateParamsNonStreaming.__annotations__.keys()
).union(set(TextCompletionCreateParamsStreaming.__annotations__.keys()))
return all_text_completion_kwargs
def _get_litellm_supported_rerank_kwargs(self) -> Set[str]:
"""
Get the litellm supported rerank kwargs
"""
return set(RerankRequest.model_fields.keys())
def _get_litellm_supported_embedding_kwargs(self) -> Set[str]:
"""
Get the litellm supported embedding kwargs
This follows the OpenAI API Spec
"""
return set(EmbeddingCreateParams.__annotations__.keys())
def _get_litellm_supported_transcription_kwargs(self) -> Set[str]:
"""
Get the litellm supported transcription kwargs
This follows the OpenAI API Spec
"""
return set(TranscriptionCreateParams.__annotations__.keys())
def _get_kwargs_to_exclude_from_cache_key(self) -> Set[str]:
"""
Get the kwargs to exclude from the cache key
"""
return set(["metadata"])
@staticmethod
def _get_hashed_cache_key(cache_key: str) -> str:
"""

View file

@ -1,4 +1,4 @@
from typing import List
from typing import List, Literal
ROUTER_MAX_FALLBACKS = 5
DEFAULT_BATCH_SIZE = 512
@ -120,6 +120,7 @@ OPENAI_CHAT_COMPLETION_PARAMS = [
"top_logprobs",
"reasoning_effort",
"extra_headers",
"thinking",
]
openai_compatible_endpoints: List = [
@ -319,6 +320,17 @@ baseten_models: List = [
"31dxrj3",
] # FALCON 7B # WizardLM # Mosaic ML
BEDROCK_INVOKE_PROVIDERS_LITERAL = Literal[
"cohere",
"anthropic",
"mistral",
"amazon",
"meta",
"llama",
"ai21",
"nova",
"deepseek_r1",
]
open_ai_embedding_models: List = ["text-embedding-ada-002"]
cohere_embedding_models: List = [

View file

@ -577,6 +577,4 @@ class DataDogLogger(
start_time_utc: Optional[datetimeObj],
end_time_utc: Optional[datetimeObj],
) -> Optional[dict]:
raise NotImplementedError(
"Datdog Integration for getting request/response payloads not implemented as yet"
)
pass

View file

@ -5,17 +5,19 @@ If the ddtrace package is not installed, the tracer will be a no-op.
"""
from contextlib import contextmanager
from typing import TYPE_CHECKING, Any, Union
try:
from ddtrace import tracer as dd_tracer
from litellm.secret_managers.main import get_secret_bool
has_ddtrace = True
except ImportError:
has_ddtrace = False
if TYPE_CHECKING:
from ddtrace.tracer import Tracer as DD_TRACER
else:
DD_TRACER = Any
class NullSpan:
"""A no-op span implementation."""
@contextmanager
def null_tracer(name, **kwargs):
class NullSpan:
def __enter__(self):
return self
@ -25,29 +27,47 @@ except ImportError:
def finish(self):
pass
@contextmanager
def null_tracer(name, **kwargs):
"""Context manager that yields a no-op span."""
yield NullSpan()
class NullTracer:
class NullTracer:
"""A no-op tracer implementation."""
def trace(self, name, **kwargs):
class NullSpan:
def __enter__(self):
return self
def __exit__(self, *args):
pass
def finish(self):
pass
return NullSpan()
def wrap(self, name=None, **kwargs):
# If called with no arguments (as @tracer.wrap())
if callable(name):
return name
# If called with arguments (as @tracer.wrap(name="something"))
def decorator(f):
return f
return decorator
dd_tracer = NullTracer()
# Export the tracer instance
tracer = dd_tracer
def _should_use_dd_tracer():
"""Returns True if `USE_DDTRACE` is set to True in .env"""
return get_secret_bool("USE_DDTRACE", False) is True
# Initialize tracer
should_use_dd_tracer = _should_use_dd_tracer()
tracer: Union[NullTracer, DD_TRACER] = NullTracer()
# We need to ensure tracer is never None and always has the required methods
if should_use_dd_tracer:
try:
from ddtrace import tracer as dd_tracer
# Define the type to match what's expected by the code using this module
tracer = dd_tracer
except ImportError:
tracer = NullTracer()
else:
tracer = NullTracer()

View file

@ -278,6 +278,7 @@ def exception_type( # type: ignore # noqa: PLR0915
"This model's maximum context length is" in error_str
or "string too long. Expected a string with maximum length"
in error_str
or "model's maximum context limit" in error_str
):
exception_mapping_worked = True
raise ContextWindowExceededError(
@ -692,6 +693,13 @@ def exception_type( # type: ignore # noqa: PLR0915
response=getattr(original_exception, "response", None),
litellm_debug_info=extra_information,
)
elif "model's maximum context limit" in error_str:
exception_mapping_worked = True
raise ContextWindowExceededError(
message=f"{custom_llm_provider}Exception: Context Window Error - {error_str}",
model=model,
llm_provider=custom_llm_provider,
)
elif "token_quota_reached" in error_str:
exception_mapping_worked = True
raise RateLimitError(

View file

@ -75,7 +75,7 @@ def get_litellm_params(
"model_info": model_info,
"proxy_server_request": proxy_server_request,
"preset_cache_key": preset_cache_key,
"no-log": no_log,
"no-log": no_log or kwargs.get("no-log"),
"stream_response": {}, # litellm_call_id: ModelResponse Dict
"input_cost_per_token": input_cost_per_token,
"input_cost_per_second": input_cost_per_second,

View file

@ -3,7 +3,6 @@
# Logging function -> log the exact model details + what's being sent | Non-Blocking
import copy
import datetime
from functools import lru_cache
import json
import os
import re
@ -13,6 +12,7 @@ import time
import traceback
import uuid
from datetime import datetime as dt_object
from functools import lru_cache
from typing import Any, Callable, Dict, List, Literal, Optional, Tuple, Union, cast
from pydantic import BaseModel
@ -33,6 +33,7 @@ from litellm.integrations.custom_logger import CustomLogger
from litellm.integrations.mlflow import MlflowLogger
from litellm.integrations.pagerduty.pagerduty import PagerDutyAlerting
from litellm.litellm_core_utils.get_litellm_params import get_litellm_params
from litellm.litellm_core_utils.model_param_helper import ModelParamHelper
from litellm.litellm_core_utils.redact_messages import (
redact_message_input_output_from_custom_logger,
redact_message_input_output_from_logging,
@ -2513,7 +2514,9 @@ def _init_custom_logger_compatible_class( # noqa: PLR0915
# auth can be disabled on local deployments of arize phoenix
if arize_phoenix_config.otlp_auth_headers is not None:
os.environ["OTEL_EXPORTER_OTLP_TRACES_HEADERS"] = arize_phoenix_config.otlp_auth_headers
os.environ["OTEL_EXPORTER_OTLP_TRACES_HEADERS"] = (
arize_phoenix_config.otlp_auth_headers
)
for callback in _in_memory_loggers:
if (
@ -2521,7 +2524,9 @@ def _init_custom_logger_compatible_class( # noqa: PLR0915
and callback.callback_name == "arize_phoenix"
):
return callback # type: ignore
_otel_logger = OpenTelemetry(config=otel_config, callback_name="arize_phoenix")
_otel_logger = OpenTelemetry(
config=otel_config, callback_name="arize_phoenix"
)
_in_memory_loggers.append(_otel_logger)
return _otel_logger # type: ignore
elif logging_integration == "otel":
@ -3110,10 +3115,26 @@ class StandardLoggingPayloadSetup:
str(original_exception.__class__.__name__) if original_exception else ""
)
_llm_provider_in_exception = getattr(original_exception, "llm_provider", "")
# Get traceback information (first 100 lines)
traceback_info = ""
if original_exception:
tb = getattr(original_exception, "__traceback__", None)
if tb:
import traceback
tb_lines = traceback.format_tb(tb)
traceback_info = "".join(tb_lines[:100]) # Limit to first 100 lines
# Get additional error details
error_message = str(original_exception)
return StandardLoggingPayloadErrorInformation(
error_code=error_status,
error_class=error_class,
llm_provider=_llm_provider_in_exception,
traceback=traceback_info,
error_message=error_message if original_exception else "",
)
@staticmethod
@ -3310,7 +3331,9 @@ def get_standard_logging_object_payload(
requester_ip_address=clean_metadata.get("requester_ip_address", None),
messages=kwargs.get("messages"),
response=final_response_obj,
model_parameters=kwargs.get("optional_params", None),
model_parameters=ModelParamHelper.get_standard_logging_model_parameters(
kwargs.get("optional_params", None) or {}
),
hidden_params=clean_hidden_params,
model_map_information=model_cost_information,
error_str=error_str,

View file

@ -473,6 +473,7 @@ def convert_to_model_response_object( # noqa: PLR0915
tool_calls=tool_calls,
audio=choice["message"].get("audio", None),
provider_specific_fields=provider_specific_fields,
reasoning_content=reasoning_content,
)
finish_reason = choice.get("finish_reason", None)
if finish_reason is None:

View file

@ -0,0 +1,133 @@
from typing import Set
from openai.types.audio.transcription_create_params import TranscriptionCreateParams
from openai.types.chat.completion_create_params import (
CompletionCreateParamsNonStreaming,
CompletionCreateParamsStreaming,
)
from openai.types.completion_create_params import (
CompletionCreateParamsNonStreaming as TextCompletionCreateParamsNonStreaming,
)
from openai.types.completion_create_params import (
CompletionCreateParamsStreaming as TextCompletionCreateParamsStreaming,
)
from openai.types.embedding_create_params import EmbeddingCreateParams
from litellm.types.rerank import RerankRequest
class ModelParamHelper:
@staticmethod
def get_standard_logging_model_parameters(
model_parameters: dict,
) -> dict:
""" """
standard_logging_model_parameters: dict = {}
supported_model_parameters = (
ModelParamHelper._get_relevant_args_to_use_for_logging()
)
for key, value in model_parameters.items():
if key in supported_model_parameters:
standard_logging_model_parameters[key] = value
return standard_logging_model_parameters
@staticmethod
def get_exclude_params_for_model_parameters() -> Set[str]:
return set(["messages", "prompt", "input"])
@staticmethod
def _get_relevant_args_to_use_for_logging() -> Set[str]:
"""
Gets all relevant llm api params besides the ones with prompt content
"""
all_openai_llm_api_params = ModelParamHelper._get_all_llm_api_params()
# Exclude parameters that contain prompt content
combined_kwargs = all_openai_llm_api_params.difference(
set(ModelParamHelper.get_exclude_params_for_model_parameters())
)
return combined_kwargs
@staticmethod
def _get_all_llm_api_params() -> Set[str]:
"""
Gets the supported kwargs for each call type and combines them
"""
chat_completion_kwargs = (
ModelParamHelper._get_litellm_supported_chat_completion_kwargs()
)
text_completion_kwargs = (
ModelParamHelper._get_litellm_supported_text_completion_kwargs()
)
embedding_kwargs = ModelParamHelper._get_litellm_supported_embedding_kwargs()
transcription_kwargs = (
ModelParamHelper._get_litellm_supported_transcription_kwargs()
)
rerank_kwargs = ModelParamHelper._get_litellm_supported_rerank_kwargs()
exclude_kwargs = ModelParamHelper._get_exclude_kwargs()
combined_kwargs = chat_completion_kwargs.union(
text_completion_kwargs,
embedding_kwargs,
transcription_kwargs,
rerank_kwargs,
)
combined_kwargs = combined_kwargs.difference(exclude_kwargs)
return combined_kwargs
@staticmethod
def _get_litellm_supported_chat_completion_kwargs() -> Set[str]:
"""
Get the litellm supported chat completion kwargs
This follows the OpenAI API Spec
"""
all_chat_completion_kwargs = set(
CompletionCreateParamsNonStreaming.__annotations__.keys()
).union(set(CompletionCreateParamsStreaming.__annotations__.keys()))
return all_chat_completion_kwargs
@staticmethod
def _get_litellm_supported_text_completion_kwargs() -> Set[str]:
"""
Get the litellm supported text completion kwargs
This follows the OpenAI API Spec
"""
all_text_completion_kwargs = set(
TextCompletionCreateParamsNonStreaming.__annotations__.keys()
).union(set(TextCompletionCreateParamsStreaming.__annotations__.keys()))
return all_text_completion_kwargs
@staticmethod
def _get_litellm_supported_rerank_kwargs() -> Set[str]:
"""
Get the litellm supported rerank kwargs
"""
return set(RerankRequest.model_fields.keys())
@staticmethod
def _get_litellm_supported_embedding_kwargs() -> Set[str]:
"""
Get the litellm supported embedding kwargs
This follows the OpenAI API Spec
"""
return set(EmbeddingCreateParams.__annotations__.keys())
@staticmethod
def _get_litellm_supported_transcription_kwargs() -> Set[str]:
"""
Get the litellm supported transcription kwargs
This follows the OpenAI API Spec
"""
return set(TranscriptionCreateParams.__annotations__.keys())
@staticmethod
def _get_exclude_kwargs() -> Set[str]:
"""
Get the kwargs to exclude from the cache key
"""
return set(["metadata"])

View file

@ -2151,6 +2151,10 @@ from email.message import Message
import httpx
from litellm.types.llms.bedrock import (
BedrockConverseReasoningContentBlock,
BedrockConverseReasoningTextBlock,
)
from litellm.types.llms.bedrock import ContentBlock as BedrockContentBlock
from litellm.types.llms.bedrock import DocumentBlock as BedrockDocumentBlock
from litellm.types.llms.bedrock import ImageBlock as BedrockImageBlock
@ -2963,6 +2967,28 @@ class BedrockConverseMessagesProcessor:
return contents
@staticmethod
def translate_thinking_blocks_to_reasoning_content_blocks(
thinking_blocks: List[ChatCompletionThinkingBlock],
) -> List[BedrockContentBlock]:
reasoning_content_blocks: List[BedrockContentBlock] = []
for thinking_block in thinking_blocks:
reasoning_text = thinking_block.get("thinking")
reasoning_signature = thinking_block.get("signature_delta")
text_block = BedrockConverseReasoningTextBlock(
text=reasoning_text or "",
)
if reasoning_signature is not None:
text_block["signature"] = reasoning_signature
reasoning_content_block = BedrockConverseReasoningContentBlock(
reasoningText=text_block,
)
bedrock_content_block = BedrockContentBlock(
reasoningContent=reasoning_content_block
)
reasoning_content_blocks.append(bedrock_content_block)
return reasoning_content_blocks
def _bedrock_converse_messages_pt( # noqa: PLR0915
messages: List,
@ -3109,11 +3135,23 @@ def _bedrock_converse_messages_pt( # noqa: PLR0915
assistant_content: List[BedrockContentBlock] = []
## MERGE CONSECUTIVE ASSISTANT CONTENT ##
while msg_i < len(messages) and messages[msg_i]["role"] == "assistant":
assistant_message_block = get_assistant_message_block_or_continue_message(
message=messages[msg_i],
assistant_continue_message=assistant_continue_message,
)
_assistant_content = assistant_message_block.get("content", None)
thinking_blocks = cast(
Optional[List[ChatCompletionThinkingBlock]],
assistant_message_block.get("thinking_blocks"),
)
if thinking_blocks is not None:
assistant_content.extend(
BedrockConverseMessagesProcessor.translate_thinking_blocks_to_reasoning_content_blocks(
thinking_blocks
)
)
if _assistant_content is not None and isinstance(_assistant_content, list):
assistants_parts: List[BedrockContentBlock] = []

View file

@ -5,7 +5,7 @@ import threading
import time
import traceback
import uuid
from typing import Any, Callable, Dict, List, Optional, cast
from typing import Any, Callable, Dict, List, Optional, Union, cast
import httpx
from pydantic import BaseModel
@ -14,6 +14,7 @@ import litellm
from litellm import verbose_logger
from litellm.litellm_core_utils.redact_messages import LiteLLMLoggingObject
from litellm.litellm_core_utils.thread_pool_executor import executor
from litellm.types.llms.openai import ChatCompletionChunk
from litellm.types.utils import Delta
from litellm.types.utils import GenericStreamingChunk as GChunk
from litellm.types.utils import (
@ -110,7 +111,7 @@ class CustomStreamWrapper:
) # GUARANTEE OPENAI HEADERS IN RESPONSE
self._response_headers = _response_headers
self.response_id = None
self.response_id: Optional[str] = None
self.logging_loop = None
self.rules = Rules()
self.stream_options = stream_options or getattr(
@ -713,7 +714,7 @@ class CustomStreamWrapper:
def is_delta_empty(self, delta: Delta) -> bool:
is_empty = True
if delta.content is not None:
if delta.content:
is_empty = False
elif delta.tool_calls is not None:
is_empty = False
@ -721,6 +722,39 @@ class CustomStreamWrapper:
is_empty = False
return is_empty
def set_model_id(
self, id: str, model_response: ModelResponseStream
) -> ModelResponseStream:
"""
Set the model id and response id to the given id.
Ensure model id is always the same across all chunks.
If first chunk sent + id set, use that id for all chunks.
"""
if self.response_id is None:
self.response_id = id
if self.response_id is not None and isinstance(self.response_id, str):
model_response.id = self.response_id
return model_response
def copy_model_response_level_provider_specific_fields(
self,
original_chunk: Union[ModelResponseStream, ChatCompletionChunk],
model_response: ModelResponseStream,
) -> ModelResponseStream:
"""
Copy provider_specific_fields from original_chunk to model_response.
"""
provider_specific_fields = getattr(
original_chunk, "provider_specific_fields", None
)
if provider_specific_fields is not None:
model_response.provider_specific_fields = provider_specific_fields
for k, v in provider_specific_fields.items():
setattr(model_response, k, v)
return model_response
def return_processed_chunk_logic( # noqa
self,
completion_obj: Dict[str, Any],
@ -747,6 +781,10 @@ class CustomStreamWrapper:
and completion_obj["function_call"] is not None
)
or (model_response.choices[0].delta.provider_specific_fields is not None)
or (
"provider_specific_fields" in model_response
and model_response.choices[0].delta.provider_specific_fields is not None
)
or (
"provider_specific_fields" in response_obj
and response_obj["provider_specific_fields"] is not None
@ -763,8 +801,6 @@ class CustomStreamWrapper:
## check if openai/azure chunk
original_chunk = response_obj.get("original_chunk", None)
if original_chunk:
model_response.id = original_chunk.id
self.response_id = original_chunk.id
if len(original_chunk.choices) > 0:
choices = []
for choice in original_chunk.choices:
@ -798,9 +834,10 @@ class CustomStreamWrapper:
model_response.choices[0].delta, "role"
):
_initial_delta = model_response.choices[0].delta.model_dump()
_initial_delta.pop("role", None)
model_response.choices[0].delta = Delta(**_initial_delta)
print_verbose(
verbose_logger.debug(
f"model_response.choices[0].delta: {model_response.choices[0].delta}"
)
else:
@ -842,6 +879,9 @@ class CustomStreamWrapper:
_is_delta_empty = self.is_delta_empty(delta=model_response.choices[0].delta)
if _is_delta_empty:
model_response.choices[0].delta = Delta(
content=None
) # ensure empty delta chunk returned
# get any function call arguments
model_response.choices[0].finish_reason = map_finish_reason(
finish_reason=self.received_finish_reason
@ -870,7 +910,7 @@ class CustomStreamWrapper:
self.chunks.append(model_response)
return
def chunk_creator(self, chunk): # type: ignore # noqa: PLR0915
def chunk_creator(self, chunk: Any): # type: ignore # noqa: PLR0915
model_response = self.model_response_creator()
response_obj: Dict[str, Any] = {}
@ -886,16 +926,13 @@ class CustomStreamWrapper:
) # check if chunk is a generic streaming chunk
) or (
self.custom_llm_provider
and (
self.custom_llm_provider == "anthropic"
or self.custom_llm_provider in litellm._custom_providers
)
and self.custom_llm_provider in litellm._custom_providers
):
if self.received_finish_reason is not None:
if "provider_specific_fields" not in chunk:
raise StopIteration
anthropic_response_obj: GChunk = chunk
anthropic_response_obj: GChunk = cast(GChunk, chunk)
completion_obj["content"] = anthropic_response_obj["text"]
if anthropic_response_obj["is_finished"]:
self.received_finish_reason = anthropic_response_obj[
@ -927,7 +964,7 @@ class CustomStreamWrapper:
].items():
setattr(model_response, key, value)
response_obj = anthropic_response_obj
response_obj = cast(Dict[str, Any], anthropic_response_obj)
elif self.model == "replicate" or self.custom_llm_provider == "replicate":
response_obj = self.handle_replicate_chunk(chunk)
completion_obj["content"] = response_obj["text"]
@ -989,6 +1026,7 @@ class CustomStreamWrapper:
try:
completion_obj["content"] = chunk.text
except Exception as e:
original_exception = e
if "Part has no text." in str(e):
## check for function calling
function_call = (
@ -1030,7 +1068,7 @@ class CustomStreamWrapper:
_model_response.choices = [_streaming_response]
response_obj = {"original_chunk": _model_response}
else:
raise e
raise original_exception
if (
hasattr(chunk.candidates[0], "finish_reason")
and chunk.candidates[0].finish_reason.name
@ -1093,8 +1131,9 @@ class CustomStreamWrapper:
total_tokens=response_obj["usage"].total_tokens,
)
elif self.custom_llm_provider == "text-completion-codestral":
response_obj = litellm.CodestralTextCompletionConfig()._chunk_parser(
chunk
response_obj = cast(
Dict[str, Any],
litellm.CodestralTextCompletionConfig()._chunk_parser(chunk),
)
completion_obj["content"] = response_obj["text"]
print_verbose(f"completion obj content: {completion_obj['content']}")
@ -1156,8 +1195,9 @@ class CustomStreamWrapper:
self.received_finish_reason = response_obj["finish_reason"]
if response_obj.get("original_chunk", None) is not None:
if hasattr(response_obj["original_chunk"], "id"):
model_response.id = response_obj["original_chunk"].id
self.response_id = model_response.id
model_response = self.set_model_id(
response_obj["original_chunk"].id, model_response
)
if hasattr(response_obj["original_chunk"], "system_fingerprint"):
model_response.system_fingerprint = response_obj[
"original_chunk"
@ -1206,8 +1246,16 @@ class CustomStreamWrapper:
): # function / tool calling branch - only set for openai/azure compatible endpoints
# enter this branch when no content has been passed in response
original_chunk = response_obj.get("original_chunk", None)
model_response.id = original_chunk.id
self.response_id = original_chunk.id
if hasattr(original_chunk, "id"):
model_response = self.set_model_id(
original_chunk.id, model_response
)
if hasattr(original_chunk, "provider_specific_fields"):
model_response = (
self.copy_model_response_level_provider_specific_fields(
original_chunk, model_response
)
)
if original_chunk.choices and len(original_chunk.choices) > 0:
delta = original_chunk.choices[0].delta
if delta is not None and (

View file

@ -26,7 +26,7 @@ else:
class AiohttpOpenAIChatConfig(OpenAILikeChatConfig):
def get_complete_url(
self,
api_base: str,
api_base: Optional[str],
model: str,
optional_params: dict,
stream: Optional[bool] = None,
@ -35,6 +35,8 @@ class AiohttpOpenAIChatConfig(OpenAILikeChatConfig):
Ensure - /v1/chat/completions is at the end of the url
"""
if api_base is None:
api_base = "https://api.openai.com"
if not api_base.endswith("/chat/completions"):
api_base += "/chat/completions"

View file

@ -34,7 +34,12 @@ from litellm.types.llms.openai import (
ChatCompletionToolCallChunk,
ChatCompletionUsageBlock,
)
from litellm.types.utils import GenericStreamingChunk
from litellm.types.utils import (
Delta,
GenericStreamingChunk,
ModelResponseStream,
StreamingChoices,
)
from litellm.utils import CustomStreamWrapper, ModelResponse, ProviderConfigManager
from ...base import BaseLLM
@ -507,7 +512,12 @@ class ModelResponseIterator:
return usage_block
def _content_block_delta_helper(self, chunk: dict):
def _content_block_delta_helper(self, chunk: dict) -> Tuple[
str,
Optional[ChatCompletionToolCallChunk],
List[ChatCompletionThinkingBlock],
Dict[str, Any],
]:
"""
Helper function to handle the content block delta
"""
@ -516,6 +526,7 @@ class ModelResponseIterator:
tool_use: Optional[ChatCompletionToolCallChunk] = None
provider_specific_fields = {}
content_block = ContentBlockDelta(**chunk) # type: ignore
thinking_blocks: List[ChatCompletionThinkingBlock] = []
self.content_blocks.append(content_block)
if "text" in content_block["delta"]:
text = content_block["delta"]["text"]
@ -535,25 +546,41 @@ class ModelResponseIterator:
"thinking" in content_block["delta"]
or "signature_delta" == content_block["delta"]
):
provider_specific_fields["thinking_blocks"] = [
thinking_blocks = [
ChatCompletionThinkingBlock(
type="thinking",
thinking=content_block["delta"].get("thinking"),
signature_delta=content_block["delta"].get("signature"),
)
]
return text, tool_use, provider_specific_fields
provider_specific_fields["thinking_blocks"] = thinking_blocks
return text, tool_use, thinking_blocks, provider_specific_fields
def chunk_parser(self, chunk: dict) -> GenericStreamingChunk:
def _handle_reasoning_content(
self, thinking_blocks: List[ChatCompletionThinkingBlock]
) -> Optional[str]:
"""
Handle the reasoning content
"""
reasoning_content = None
for block in thinking_blocks:
if reasoning_content is None:
reasoning_content = ""
if "thinking" in block:
reasoning_content += block["thinking"]
return reasoning_content
def chunk_parser(self, chunk: dict) -> ModelResponseStream:
try:
type_chunk = chunk.get("type", "") or ""
text = ""
tool_use: Optional[ChatCompletionToolCallChunk] = None
is_finished = False
finish_reason = ""
usage: Optional[ChatCompletionUsageBlock] = None
provider_specific_fields: Dict[str, Any] = {}
reasoning_content: Optional[str] = None
thinking_blocks: Optional[List[ChatCompletionThinkingBlock]] = None
index = int(chunk.get("index", 0))
if type_chunk == "content_block_delta":
@ -561,9 +588,13 @@ class ModelResponseIterator:
Anthropic content chunk
chunk = {'type': 'content_block_delta', 'index': 0, 'delta': {'type': 'text_delta', 'text': 'Hello'}}
"""
text, tool_use, provider_specific_fields = (
text, tool_use, thinking_blocks, provider_specific_fields = (
self._content_block_delta_helper(chunk=chunk)
)
if thinking_blocks:
reasoning_content = self._handle_reasoning_content(
thinking_blocks=thinking_blocks
)
elif type_chunk == "content_block_start":
"""
event: content_block_start
@ -610,7 +641,6 @@ class ModelResponseIterator:
or "stop"
)
usage = self._handle_usage(anthropic_usage_chunk=message_delta["usage"])
is_finished = True
elif type_chunk == "message_start":
"""
Anthropic
@ -649,16 +679,27 @@ class ModelResponseIterator:
text, tool_use = self._handle_json_mode_chunk(text=text, tool_use=tool_use)
returned_chunk = GenericStreamingChunk(
text=text,
tool_use=tool_use,
is_finished=is_finished,
finish_reason=finish_reason,
usage=usage,
returned_chunk = ModelResponseStream(
choices=[
StreamingChoices(
index=index,
delta=Delta(
content=text,
tool_calls=[tool_use] if tool_use is not None else None,
provider_specific_fields=(
provider_specific_fields if provider_specific_fields else None
provider_specific_fields
if provider_specific_fields
else None
),
thinking_blocks=(
thinking_blocks if thinking_blocks else None
),
reasoning_content=reasoning_content,
),
finish_reason=finish_reason,
)
],
usage=usage,
)
return returned_chunk
@ -769,7 +810,7 @@ class ModelResponseIterator:
except ValueError as e:
raise RuntimeError(f"Error parsing chunk: {e},\nReceived chunk: {chunk}")
def convert_str_chunk_to_generic_chunk(self, chunk: str) -> GenericStreamingChunk:
def convert_str_chunk_to_generic_chunk(self, chunk: str) -> ModelResponseStream:
"""
Convert a string chunk to a GenericStreamingChunk
@ -789,11 +830,4 @@ class ModelResponseIterator:
data_json = json.loads(str_line[5:])
return self.chunk_parser(chunk=data_json)
else:
return GenericStreamingChunk(
text="",
is_finished=False,
finish_reason="",
usage=None,
index=0,
tool_use=None,
)
return ModelResponseStream()

View file

@ -23,6 +23,7 @@ from litellm.types.llms.openai import (
AllMessageValues,
ChatCompletionCachedContent,
ChatCompletionSystemMessage,
ChatCompletionThinkingBlock,
ChatCompletionToolCallChunk,
ChatCompletionToolCallFunctionChunk,
ChatCompletionToolParam,
@ -80,7 +81,7 @@ class AnthropicConfig(BaseConfig):
return super().get_config()
def get_supported_openai_params(self, model: str):
return [
params = [
"stream",
"stop",
"temperature",
@ -95,6 +96,11 @@ class AnthropicConfig(BaseConfig):
"user",
]
if "claude-3-7-sonnet" in model:
params.append("thinking")
return params
def get_json_schema_from_pydantic_object(
self, response_format: Union[Any, Dict, None]
) -> Optional[dict]:
@ -117,6 +123,7 @@ class AnthropicConfig(BaseConfig):
prompt_caching_set: bool = False,
pdf_used: bool = False,
is_vertex_request: bool = False,
user_anthropic_beta_headers: Optional[List[str]] = None,
) -> dict:
betas = []
@ -133,6 +140,9 @@ class AnthropicConfig(BaseConfig):
"content-type": "application/json",
}
if user_anthropic_beta_headers is not None:
betas.extend(user_anthropic_beta_headers)
# Don't send any beta headers to Vertex, Vertex has failed requests when they are sent
if is_vertex_request is True:
pass
@ -283,18 +293,6 @@ class AnthropicConfig(BaseConfig):
new_stop = new_v
return new_stop
def _add_tools_to_optional_params(
self, optional_params: dict, tools: List[AllAnthropicToolsValues]
) -> dict:
if "tools" not in optional_params:
optional_params["tools"] = tools
else:
optional_params["tools"] = [
*optional_params["tools"],
*tools,
]
return optional_params
def map_openai_params(
self,
non_default_params: dict,
@ -335,6 +333,10 @@ class AnthropicConfig(BaseConfig):
optional_params["top_p"] = value
if param == "response_format" and isinstance(value, dict):
ignore_response_format_types = ["text"]
if value["type"] in ignore_response_format_types: # value is a no-op
continue
json_schema: Optional[dict] = None
if "response_schema" in value:
json_schema = value["response_schema"]
@ -358,7 +360,8 @@ class AnthropicConfig(BaseConfig):
optional_params["json_mode"] = True
if param == "user":
optional_params["metadata"] = {"user_id": value}
if param == "thinking":
optional_params["thinking"] = value
return optional_params
def _create_json_tool_call_for_response_format(
@ -584,12 +587,14 @@ class AnthropicConfig(BaseConfig):
def extract_response_content(self, completion_response: dict) -> Tuple[
str,
Optional[List[Any]],
Optional[List[Dict[str, Any]]],
Optional[List[ChatCompletionThinkingBlock]],
Optional[str],
List[ChatCompletionToolCallChunk],
]:
text_content = ""
citations: Optional[List[Any]] = None
thinking_blocks: Optional[List[Dict[str, Any]]] = None
thinking_blocks: Optional[List[ChatCompletionThinkingBlock]] = None
reasoning_content: Optional[str] = None
tool_calls: List[ChatCompletionToolCallChunk] = []
for idx, content in enumerate(completion_response["content"]):
if content["type"] == "text":
@ -615,8 +620,13 @@ class AnthropicConfig(BaseConfig):
if content.get("thinking", None) is not None:
if thinking_blocks is None:
thinking_blocks = []
thinking_blocks.append(content)
return text_content, citations, thinking_blocks, tool_calls
thinking_blocks.append(cast(ChatCompletionThinkingBlock, content))
if thinking_blocks is not None:
reasoning_content = ""
for block in thinking_blocks:
if "thinking" in block:
reasoning_content += block["thinking"]
return text_content, citations, thinking_blocks, reasoning_content, tool_calls
def transform_response(
self,
@ -666,10 +676,11 @@ class AnthropicConfig(BaseConfig):
else:
text_content = ""
citations: Optional[List[Any]] = None
thinking_blocks: Optional[List[Dict[str, Any]]] = None
thinking_blocks: Optional[List[ChatCompletionThinkingBlock]] = None
reasoning_content: Optional[str] = None
tool_calls: List[ChatCompletionToolCallChunk] = []
text_content, citations, thinking_blocks, tool_calls = (
text_content, citations, thinking_blocks, reasoning_content, tool_calls = (
self.extract_response_content(completion_response=completion_response)
)
@ -680,6 +691,8 @@ class AnthropicConfig(BaseConfig):
"citations": citations,
"thinking_blocks": thinking_blocks,
},
thinking_blocks=thinking_blocks,
reasoning_content=reasoning_content,
)
## HANDLE JSON MODE - anthropic returns single function call
@ -774,6 +787,13 @@ class AnthropicConfig(BaseConfig):
headers=cast(httpx.Headers, headers),
)
def _get_user_anthropic_beta_headers(
self, anthropic_beta_header: Optional[str]
) -> Optional[List[str]]:
if anthropic_beta_header is None:
return None
return anthropic_beta_header.split(",")
def validate_environment(
self,
headers: dict,
@ -794,13 +814,18 @@ class AnthropicConfig(BaseConfig):
prompt_caching_set = self.is_cache_control_set(messages=messages)
computer_tool_used = self.is_computer_tool_used(tools=tools)
pdf_used = self.is_pdf_used(messages=messages)
user_anthropic_beta_headers = self._get_user_anthropic_beta_headers(
anthropic_beta_header=headers.get("anthropic-beta")
)
anthropic_headers = self.get_anthropic_headers(
computer_tool_used=computer_tool_used,
prompt_caching_set=prompt_caching_set,
pdf_used=pdf_used,
api_key=api_key,
is_vertex_request=optional_params.get("is_vertex_request", False),
user_anthropic_beta_headers=user_anthropic_beta_headers,
)
headers = {**headers, **anthropic_headers}
return headers

View file

@ -1,4 +1,5 @@
from typing import Any, List, Optional, Tuple, cast
from urllib.parse import urlparse
import httpx
from httpx import Response
@ -28,16 +29,29 @@ class AzureAIStudioConfig(OpenAIConfig):
api_key: Optional[str] = None,
api_base: Optional[str] = None,
) -> dict:
if api_base and "services.ai.azure.com" in api_base:
if api_base and self._should_use_api_key_header(api_base):
headers["api-key"] = api_key
else:
headers["Authorization"] = f"Bearer {api_key}"
return headers
def _should_use_api_key_header(self, api_base: str) -> bool:
"""
Returns True if the request should use `api-key` header for authentication.
"""
parsed_url = urlparse(api_base)
host = parsed_url.hostname
if host and (
host.endswith(".services.ai.azure.com")
or host.endswith(".openai.azure.com")
):
return True
return False
def get_complete_url(
self,
api_base: str,
api_base: Optional[str],
model: str,
optional_params: dict,
stream: Optional[bool] = None,
@ -58,6 +72,10 @@ class AzureAIStudioConfig(OpenAIConfig):
- A complete URL string, e.g.,
"https://litellm8397336933.services.ai.azure.com/models/chat/completions?api-version=2024-05-01-preview"
"""
if api_base is None:
raise ValueError(
f"api_base is required for Azure AI Studio. Please set the api_base parameter. Passed `api_base={api_base}`"
)
original_url = httpx.URL(api_base)
# Extract api_version or use default

View file

@ -111,6 +111,19 @@ class BaseConfig(ABC):
"""
return False
def _add_tools_to_optional_params(self, optional_params: dict, tools: List) -> dict:
"""
Helper util to add tools to optional_params.
"""
if "tools" not in optional_params:
optional_params["tools"] = tools
else:
optional_params["tools"] = [
*optional_params["tools"],
*tools,
]
return optional_params
def translate_developer_role_to_system_role(
self,
messages: List[AllMessageValues],
@ -158,6 +171,7 @@ class BaseConfig(ABC):
optional_params: dict,
value: dict,
is_response_format_supported: bool,
enforce_tool_choice: bool = True,
) -> dict:
"""
Follow similar approach to anthropic - translate to a single tool call.
@ -195,9 +209,11 @@ class BaseConfig(ABC):
optional_params.setdefault("tools", [])
optional_params["tools"].append(_tool)
if enforce_tool_choice:
optional_params["tool_choice"] = _tool_choice
optional_params["json_mode"] = True
else:
elif is_response_format_supported:
optional_params["response_format"] = value
return optional_params
@ -249,7 +265,7 @@ class BaseConfig(ABC):
def get_complete_url(
self,
api_base: str,
api_base: Optional[str],
model: str,
optional_params: dict,
stream: Optional[bool] = None,
@ -261,6 +277,8 @@ class BaseConfig(ABC):
Some providers need `model` in `api_base`
"""
if api_base is None:
raise ValueError("api_base is required")
return api_base
@abstractmethod
@ -315,6 +333,7 @@ class BaseConfig(ABC):
data: dict,
messages: list,
client: Optional[AsyncHTTPHandler] = None,
json_mode: Optional[bool] = None,
) -> CustomStreamWrapper:
raise NotImplementedError
@ -328,6 +347,7 @@ class BaseConfig(ABC):
data: dict,
messages: list,
client: Optional[Union[HTTPHandler, AsyncHTTPHandler]] = None,
json_mode: Optional[bool] = None,
) -> CustomStreamWrapper:
raise NotImplementedError

View file

@ -2,13 +2,14 @@ import hashlib
import json
import os
from datetime import datetime
from typing import TYPE_CHECKING, Any, Dict, List, Optional, Tuple
from typing import TYPE_CHECKING, Any, Dict, List, Optional, Tuple, cast, get_args
import httpx
from pydantic import BaseModel
from litellm._logging import verbose_logger
from litellm.caching.caching import DualCache
from litellm.constants import BEDROCK_INVOKE_PROVIDERS_LITERAL
from litellm.litellm_core_utils.dd_tracing import tracer
from litellm.secret_managers.main import get_secret
@ -223,6 +224,60 @@ class BaseAWSLLM:
# Catch any unexpected errors and return None
return None
@staticmethod
def _get_provider_from_model_path(
model_path: str,
) -> Optional[BEDROCK_INVOKE_PROVIDERS_LITERAL]:
"""
Helper function to get the provider from a model path with format: provider/model-name
Args:
model_path (str): The model path (e.g., 'llama/arn:aws:bedrock:us-east-1:086734376398:imported-model/r4c4kewx2s0n' or 'anthropic/model-name')
Returns:
Optional[str]: The provider name, or None if no valid provider found
"""
parts = model_path.split("/")
if len(parts) >= 1:
provider = parts[0]
if provider in get_args(BEDROCK_INVOKE_PROVIDERS_LITERAL):
return cast(BEDROCK_INVOKE_PROVIDERS_LITERAL, provider)
return None
@staticmethod
def get_bedrock_invoke_provider(
model: str,
) -> Optional[BEDROCK_INVOKE_PROVIDERS_LITERAL]:
"""
Helper function to get the bedrock provider from the model
handles 3 scenarions:
1. model=invoke/anthropic.claude-3-5-sonnet-20240620-v1:0 -> Returns `anthropic`
2. model=anthropic.claude-3-5-sonnet-20240620-v1:0 -> Returns `anthropic`
3. model=llama/arn:aws:bedrock:us-east-1:086734376398:imported-model/r4c4kewx2s0n -> Returns `llama`
4. model=us.amazon.nova-pro-v1:0 -> Returns `nova`
"""
if model.startswith("invoke/"):
model = model.replace("invoke/", "", 1)
_split_model = model.split(".")[0]
if _split_model in get_args(BEDROCK_INVOKE_PROVIDERS_LITERAL):
return cast(BEDROCK_INVOKE_PROVIDERS_LITERAL, _split_model)
# If not a known provider, check for pattern with two slashes
provider = BaseAWSLLM._get_provider_from_model_path(model)
if provider is not None:
return provider
# check if provider == "nova"
if "nova" in model:
return "nova"
else:
for provider in get_args(BEDROCK_INVOKE_PROVIDERS_LITERAL):
if provider in model:
return provider
return None
def _get_aws_region_name(
self, optional_params: dict, model: Optional[str] = None
) -> str:

View file

@ -23,6 +23,7 @@ from litellm.types.llms.openai import (
AllMessageValues,
ChatCompletionResponseMessage,
ChatCompletionSystemMessage,
ChatCompletionThinkingBlock,
ChatCompletionToolCallChunk,
ChatCompletionToolCallFunctionChunk,
ChatCompletionToolParam,
@ -116,6 +117,10 @@ class AmazonConverseConfig(BaseConfig):
# only anthropic and mistral support tool choice config. otherwise (E.g. cohere) will fail the call - https://docs.aws.amazon.com/bedrock/latest/APIReference/API_runtime_ToolChoice.html
supported_params.append("tool_choice")
if (
"claude-3-7" in model
): # [TODO]: move to a 'supports_reasoning_content' param from model cost map
supported_params.append("thinking")
return supported_params
def map_tool_choice_values(
@ -162,6 +167,7 @@ class AmazonConverseConfig(BaseConfig):
self,
json_schema: Optional[dict] = None,
schema_name: str = "json_tool_call",
description: Optional[str] = None,
) -> ChatCompletionToolParam:
"""
Handles creating a tool call for getting responses in JSON format.
@ -184,11 +190,15 @@ class AmazonConverseConfig(BaseConfig):
else:
_input_schema = json_schema
tool_param_function_chunk = ChatCompletionToolParamFunctionChunk(
name=schema_name, parameters=_input_schema
)
if description:
tool_param_function_chunk["description"] = description
_tool = ChatCompletionToolParam(
type="function",
function=ChatCompletionToolParamFunctionChunk(
name=schema_name, parameters=_input_schema
),
function=tool_param_function_chunk,
)
return _tool
@ -201,15 +211,26 @@ class AmazonConverseConfig(BaseConfig):
messages: Optional[List[AllMessageValues]] = None,
) -> dict:
for param, value in non_default_params.items():
if param == "response_format":
if param == "response_format" and isinstance(value, dict):
ignore_response_format_types = ["text"]
if value["type"] in ignore_response_format_types: # value is a no-op
continue
json_schema: Optional[dict] = None
schema_name: str = ""
description: Optional[str] = None
if "response_schema" in value:
json_schema = value["response_schema"]
schema_name = "json_tool_call"
elif "json_schema" in value:
json_schema = value["json_schema"]["schema"]
schema_name = value["json_schema"]["name"]
description = value["json_schema"].get("description")
if "type" in value and value["type"] == "text":
continue
"""
Follow similar approach to anthropic - translate to a single tool call.
@ -218,12 +239,14 @@ class AmazonConverseConfig(BaseConfig):
- You should set tool_choice (see Forcing tool use) to instruct the model to explicitly use that tool
- Remember that the model will pass the input to the tool, so the name of the tool and description should be from the models perspective.
"""
_tool_choice = {"name": schema_name, "type": "tool"}
_tool = self._create_json_tool_call_for_response_format(
json_schema=json_schema,
schema_name=schema_name if schema_name != "" else "json_tool_call",
description=description,
)
optional_params = self._add_tools_to_optional_params(
optional_params=optional_params, tools=[_tool]
)
optional_params["tools"] = [_tool]
if litellm.utils.supports_tool_choice(
model=model, custom_llm_provider=self.custom_llm_provider
):
@ -250,14 +273,17 @@ class AmazonConverseConfig(BaseConfig):
if param == "top_p":
optional_params["topP"] = value
if param == "tools":
optional_params["tools"] = value
optional_params = self._add_tools_to_optional_params(
optional_params=optional_params, tools=value
)
if param == "tool_choice":
_tool_choice_value = self.map_tool_choice_values(
model=model, tool_choice=value, drop_params=drop_params # type: ignore
)
if _tool_choice_value is not None:
optional_params["tool_choice"] = _tool_choice_value
if param == "thinking":
optional_params["thinking"] = value
return optional_params
@overload
@ -545,6 +571,37 @@ class AmazonConverseConfig(BaseConfig):
encoding=encoding,
)
def _transform_reasoning_content(
self, reasoning_content_blocks: List[BedrockConverseReasoningContentBlock]
) -> str:
"""
Extract the reasoning text from the reasoning content blocks
Ensures deepseek reasoning content compatible output.
"""
reasoning_content_str = ""
for block in reasoning_content_blocks:
if "reasoningText" in block:
reasoning_content_str += block["reasoningText"]["text"]
return reasoning_content_str
def _transform_thinking_blocks(
self, thinking_blocks: List[BedrockConverseReasoningContentBlock]
) -> List[ChatCompletionThinkingBlock]:
"""Return a consistent format for thinking blocks between Anthropic and Bedrock."""
thinking_blocks_list: List[ChatCompletionThinkingBlock] = []
for block in thinking_blocks:
if "reasoningText" in block:
_thinking_block = ChatCompletionThinkingBlock(type="thinking")
_text = block["reasoningText"].get("text")
_signature = block["reasoningText"].get("signature")
if _text is not None:
_thinking_block["thinking"] = _text
if _signature is not None:
_thinking_block["signature_delta"] = _signature
thinking_blocks_list.append(_thinking_block)
return thinking_blocks_list
def _transform_response(
self,
model: str,
@ -618,6 +675,10 @@ class AmazonConverseConfig(BaseConfig):
chat_completion_message: ChatCompletionResponseMessage = {"role": "assistant"}
content_str = ""
tools: List[ChatCompletionToolCallChunk] = []
reasoningContentBlocks: Optional[List[BedrockConverseReasoningContentBlock]] = (
None
)
if message is not None:
for idx, content in enumerate(message["content"]):
"""
@ -644,8 +705,22 @@ class AmazonConverseConfig(BaseConfig):
index=idx,
)
tools.append(_tool_response_chunk)
chat_completion_message["content"] = content_str
if "reasoningContent" in content:
if reasoningContentBlocks is None:
reasoningContentBlocks = []
reasoningContentBlocks.append(content["reasoningContent"])
if reasoningContentBlocks is not None:
chat_completion_message["provider_specific_fields"] = {
"reasoningContentBlocks": reasoningContentBlocks,
}
chat_completion_message["reasoning_content"] = (
self._transform_reasoning_content(reasoningContentBlocks)
)
chat_completion_message["thinking_blocks"] = (
self._transform_thinking_blocks(reasoningContentBlocks)
)
chat_completion_message["content"] = content_str
if json_mode is True and tools is not None and len(tools) == 1:
# to support 'json_schema' logic on bedrock models
json_mode_content_str: Optional[str] = tools[0]["function"].get("arguments")

View file

@ -26,7 +26,6 @@ import httpx # type: ignore
import litellm
from litellm import verbose_logger
from litellm._logging import print_verbose
from litellm.caching.caching import InMemoryCache
from litellm.litellm_core_utils.core_helpers import map_finish_reason
from litellm.litellm_core_utils.litellm_logging import Logging
@ -51,13 +50,19 @@ from litellm.llms.custom_httpx.http_handler import (
)
from litellm.types.llms.bedrock import *
from litellm.types.llms.openai import (
ChatCompletionThinkingBlock,
ChatCompletionToolCallChunk,
ChatCompletionToolCallFunctionChunk,
ChatCompletionUsageBlock,
)
from litellm.types.utils import ChatCompletionMessageToolCall, Choices
from litellm.types.utils import ChatCompletionMessageToolCall, Choices, Delta
from litellm.types.utils import GenericStreamingChunk as GChunk
from litellm.types.utils import ModelResponse, ModelResponseStream, Usage
from litellm.types.utils import (
ModelResponse,
ModelResponseStream,
StreamingChoices,
Usage,
)
from litellm.utils import CustomStreamWrapper, get_secret
from ..base_aws_llm import BaseAWSLLM
@ -212,7 +217,6 @@ async def make_call(
api_key="",
data=data,
messages=messages,
print_verbose=print_verbose,
encoding=litellm.encoding,
) # type: ignore
completion_stream: Any = MockResponseIterator(
@ -222,6 +226,7 @@ async def make_call(
decoder: AWSEventStreamDecoder = AmazonAnthropicClaudeStreamDecoder(
model=model,
sync_stream=False,
json_mode=json_mode,
)
completion_stream = decoder.aiter_bytes(
response.aiter_bytes(chunk_size=1024)
@ -298,7 +303,6 @@ def make_sync_call(
api_key="",
data=data,
messages=messages,
print_verbose=print_verbose,
encoding=litellm.encoding,
) # type: ignore
completion_stream: Any = MockResponseIterator(
@ -308,6 +312,7 @@ def make_sync_call(
decoder: AWSEventStreamDecoder = AmazonAnthropicClaudeStreamDecoder(
model=model,
sync_stream=True,
json_mode=json_mode,
)
completion_stream = decoder.iter_bytes(response.iter_bytes(chunk_size=1024))
elif bedrock_invoke_provider == "deepseek_r1":
@ -525,7 +530,7 @@ class BedrockLLM(BaseAWSLLM):
].message.tool_calls:
_tool_call = {**tool_call.dict(), "index": 0}
_tool_calls.append(_tool_call)
delta_obj = litellm.utils.Delta(
delta_obj = Delta(
content=getattr(
model_response.choices[0].message, "content", None
),
@ -1146,27 +1151,6 @@ class BedrockLLM(BaseAWSLLM):
)
return streaming_response
@staticmethod
def get_bedrock_invoke_provider(
model: str,
) -> Optional[litellm.BEDROCK_INVOKE_PROVIDERS_LITERAL]:
"""
Helper function to get the bedrock provider from the model
handles 2 scenarions:
1. model=anthropic.claude-3-5-sonnet-20240620-v1:0 -> Returns `anthropic`
2. model=llama/arn:aws:bedrock:us-east-1:086734376398:imported-model/r4c4kewx2s0n -> Returns `llama`
"""
_split_model = model.split(".")[0]
if _split_model in get_args(litellm.BEDROCK_INVOKE_PROVIDERS_LITERAL):
return cast(litellm.BEDROCK_INVOKE_PROVIDERS_LITERAL, _split_model)
# If not a known provider, check for pattern with two slashes
provider = BedrockLLM._get_provider_from_model_path(model)
if provider is not None:
return provider
return None
@staticmethod
def _get_provider_from_model_path(
model_path: str,
@ -1258,14 +1242,37 @@ class AWSEventStreamDecoder:
return True
return False
def converse_chunk_parser(self, chunk_data: dict) -> GChunk:
def extract_reasoning_content_str(
self, reasoning_content_block: BedrockConverseReasoningContentBlockDelta
) -> Optional[str]:
if "text" in reasoning_content_block:
return reasoning_content_block["text"]
return None
def translate_thinking_blocks(
self, thinking_block: BedrockConverseReasoningContentBlockDelta
) -> Optional[List[ChatCompletionThinkingBlock]]:
"""
Translate the thinking blocks to a string
"""
thinking_blocks_list: List[ChatCompletionThinkingBlock] = []
_thinking_block = ChatCompletionThinkingBlock(type="thinking")
if "text" in thinking_block:
_thinking_block["thinking"] = thinking_block["text"]
thinking_blocks_list.append(_thinking_block)
return thinking_blocks_list
def converse_chunk_parser(self, chunk_data: dict) -> ModelResponseStream:
try:
verbose_logger.debug("\n\nRaw Chunk: {}\n\n".format(chunk_data))
text = ""
tool_use: Optional[ChatCompletionToolCallChunk] = None
is_finished = False
finish_reason = ""
usage: Optional[ChatCompletionUsageBlock] = None
provider_specific_fields: dict = {}
reasoning_content: Optional[str] = None
thinking_blocks: Optional[List[ChatCompletionThinkingBlock]] = None
index = int(chunk_data.get("contentBlockIndex", 0))
if "start" in chunk_data:
@ -1305,6 +1312,16 @@ class AWSEventStreamDecoder:
},
"index": index,
}
elif "reasoningContent" in delta_obj:
provider_specific_fields = {
"reasoningContent": delta_obj["reasoningContent"],
}
reasoning_content = self.extract_reasoning_content_str(
delta_obj["reasoningContent"]
)
thinking_blocks = self.translate_thinking_blocks(
delta_obj["reasoningContent"]
)
elif (
"contentBlockIndex" in chunk_data
): # stop block, no 'start' or 'delta' object
@ -1321,7 +1338,6 @@ class AWSEventStreamDecoder:
}
elif "stopReason" in chunk_data:
finish_reason = map_finish_reason(chunk_data.get("stopReason", "stop"))
is_finished = True
elif "usage" in chunk_data:
usage = ChatCompletionUsageBlock(
prompt_tokens=chunk_data.get("inputTokens", 0),
@ -1329,18 +1345,33 @@ class AWSEventStreamDecoder:
total_tokens=chunk_data.get("totalTokens", 0),
)
response = GChunk(
text=text,
tool_use=tool_use,
is_finished=is_finished,
finish_reason=finish_reason,
usage=usage,
index=index,
)
model_response_provider_specific_fields = {}
if "trace" in chunk_data:
trace = chunk_data.get("trace")
response["provider_specific_fields"] = {"trace": trace}
model_response_provider_specific_fields["trace"] = trace
response = ModelResponseStream(
choices=[
StreamingChoices(
finish_reason=finish_reason,
index=index,
delta=Delta(
content=text,
role="assistant",
tool_calls=[tool_use] if tool_use else None,
provider_specific_fields=(
provider_specific_fields
if provider_specific_fields
else None
),
thinking_blocks=thinking_blocks,
reasoning_content=reasoning_content,
),
)
],
usage=usage,
provider_specific_fields=model_response_provider_specific_fields,
)
return response
except Exception as e:
raise Exception("Received streaming error - {}".format(str(e)))
@ -1474,6 +1505,7 @@ class AmazonAnthropicClaudeStreamDecoder(AWSEventStreamDecoder):
self,
model: str,
sync_stream: bool,
json_mode: Optional[bool] = None,
) -> None:
"""
Child class of AWSEventStreamDecoder that handles the streaming response from the Anthropic family of models
@ -1484,9 +1516,10 @@ class AmazonAnthropicClaudeStreamDecoder(AWSEventStreamDecoder):
self.anthropic_model_response_iterator = AnthropicModelResponseIterator(
streaming_response=None,
sync_stream=sync_stream,
json_mode=json_mode,
)
def _chunk_parser(self, chunk_data: dict) -> GChunk:
def _chunk_parser(self, chunk_data: dict) -> ModelResponseStream:
return self.anthropic_model_response_iterator.chunk_parser(chunk=chunk_data)

View file

@ -3,8 +3,10 @@ from typing import Optional
import litellm
from .base_invoke_transformation import AmazonInvokeConfig
class AmazonAnthropicConfig:
class AmazonAnthropicConfig(AmazonInvokeConfig):
"""
Reference: https://us-west-2.console.aws.amazon.com/bedrock/home?region=us-west-2#/providers?model=claude
@ -57,9 +59,7 @@ class AmazonAnthropicConfig:
and v is not None
}
def get_supported_openai_params(
self,
):
def get_supported_openai_params(self, model: str):
return [
"max_tokens",
"max_completion_tokens",
@ -69,7 +69,13 @@ class AmazonAnthropicConfig:
"stream",
]
def map_openai_params(self, non_default_params: dict, optional_params: dict):
def map_openai_params(
self,
non_default_params: dict,
optional_params: dict,
model: str,
drop_params: bool,
):
for param, value in non_default_params.items():
if param == "max_tokens" or param == "max_completion_tokens":
optional_params["max_tokens_to_sample"] = value

View file

@ -2,7 +2,7 @@ from typing import TYPE_CHECKING, Any, List, Optional
import httpx
import litellm
from litellm.llms.anthropic.chat.transformation import AnthropicConfig
from litellm.llms.bedrock.chat.invoke_transformations.base_invoke_transformation import (
AmazonInvokeConfig,
)
@ -17,7 +17,7 @@ else:
LiteLLMLoggingObj = Any
class AmazonAnthropicClaude3Config(AmazonInvokeConfig):
class AmazonAnthropicClaude3Config(AmazonInvokeConfig, AnthropicConfig):
"""
Reference:
https://us-west-2.console.aws.amazon.com/bedrock/home?region=us-west-2#/providers?model=claude
@ -28,18 +28,8 @@ class AmazonAnthropicClaude3Config(AmazonInvokeConfig):
anthropic_version: str = "bedrock-2023-05-31"
def get_supported_openai_params(self, model: str):
return [
"max_tokens",
"max_completion_tokens",
"tools",
"tool_choice",
"stream",
"stop",
"temperature",
"top_p",
"extra_headers",
]
def get_supported_openai_params(self, model: str) -> List[str]:
return AnthropicConfig.get_supported_openai_params(self, model)
def map_openai_params(
self,
@ -47,21 +37,14 @@ class AmazonAnthropicClaude3Config(AmazonInvokeConfig):
optional_params: dict,
model: str,
drop_params: bool,
):
for param, value in non_default_params.items():
if param == "max_tokens" or param == "max_completion_tokens":
optional_params["max_tokens"] = value
if param == "tools":
optional_params["tools"] = value
if param == "stream":
optional_params["stream"] = value
if param == "stop":
optional_params["stop_sequences"] = value
if param == "temperature":
optional_params["temperature"] = value
if param == "top_p":
optional_params["top_p"] = value
return optional_params
) -> dict:
return AnthropicConfig.map_openai_params(
self,
non_default_params,
optional_params,
model,
drop_params,
)
def transform_request(
self,
@ -71,7 +54,8 @@ class AmazonAnthropicClaude3Config(AmazonInvokeConfig):
litellm_params: dict,
headers: dict,
) -> dict:
_anthropic_request = litellm.AnthropicConfig().transform_request(
_anthropic_request = AnthropicConfig.transform_request(
self,
model=model,
messages=messages,
optional_params=optional_params,
@ -80,6 +64,7 @@ class AmazonAnthropicClaude3Config(AmazonInvokeConfig):
)
_anthropic_request.pop("model", None)
_anthropic_request.pop("stream", None)
if "anthropic_version" not in _anthropic_request:
_anthropic_request["anthropic_version"] = self.anthropic_version
@ -99,7 +84,8 @@ class AmazonAnthropicClaude3Config(AmazonInvokeConfig):
api_key: Optional[str] = None,
json_mode: Optional[bool] = None,
) -> ModelResponse:
return litellm.AnthropicConfig().transform_response(
return AnthropicConfig.transform_response(
self,
model=model,
raw_response=raw_response,
model_response=model_response,

View file

@ -73,7 +73,7 @@ class AmazonInvokeConfig(BaseConfig, BaseAWSLLM):
def get_complete_url(
self,
api_base: str,
api_base: Optional[str],
model: str,
optional_params: dict,
stream: Optional[bool] = None,
@ -461,6 +461,7 @@ class AmazonInvokeConfig(BaseConfig, BaseAWSLLM):
data: dict,
messages: list,
client: Optional[AsyncHTTPHandler] = None,
json_mode: Optional[bool] = None,
) -> CustomStreamWrapper:
streaming_response = CustomStreamWrapper(
completion_stream=None,
@ -475,6 +476,7 @@ class AmazonInvokeConfig(BaseConfig, BaseAWSLLM):
logging_obj=logging_obj,
fake_stream=True if "ai21" in api_base else False,
bedrock_invoke_provider=self.get_bedrock_invoke_provider(model),
json_mode=json_mode,
),
model=model,
custom_llm_provider="bedrock",
@ -493,6 +495,7 @@ class AmazonInvokeConfig(BaseConfig, BaseAWSLLM):
data: dict,
messages: list,
client: Optional[Union[HTTPHandler, AsyncHTTPHandler]] = None,
json_mode: Optional[bool] = None,
) -> CustomStreamWrapper:
if client is None or isinstance(client, AsyncHTTPHandler):
client = _get_httpx_client(params={})
@ -509,6 +512,7 @@ class AmazonInvokeConfig(BaseConfig, BaseAWSLLM):
logging_obj=logging_obj,
fake_stream=True if "ai21" in api_base else False,
bedrock_invoke_provider=self.get_bedrock_invoke_provider(model),
json_mode=json_mode,
),
model=model,
custom_llm_provider="bedrock",
@ -534,7 +538,7 @@ class AmazonInvokeConfig(BaseConfig, BaseAWSLLM):
"""
Helper function to get the bedrock provider from the model
handles 3 scenarions:
handles 4 scenarios:
1. model=invoke/anthropic.claude-3-5-sonnet-20240620-v1:0 -> Returns `anthropic`
2. model=anthropic.claude-3-5-sonnet-20240620-v1:0 -> Returns `anthropic`
3. model=llama/arn:aws:bedrock:us-east-1:086734376398:imported-model/r4c4kewx2s0n -> Returns `llama`
@ -555,6 +559,10 @@ class AmazonInvokeConfig(BaseConfig, BaseAWSLLM):
# check if provider == "nova"
if "nova" in model:
return "nova"
for provider in get_args(litellm.BEDROCK_INVOKE_PROVIDERS_LITERAL):
if provider in model:
return provider
return None
@staticmethod

View file

@ -11,6 +11,7 @@ from litellm.llms.base_llm.chat.transformation import (
BaseLLMException,
LiteLLMLoggingObj,
)
from litellm.secret_managers.main import get_secret_str
from litellm.types.llms.openai import AllMessageValues
from litellm.types.utils import (
ChatCompletionToolCallChunk,
@ -75,11 +76,16 @@ class CloudflareChatConfig(BaseConfig):
def get_complete_url(
self,
api_base: str,
api_base: Optional[str],
model: str,
optional_params: dict,
stream: Optional[bool] = None,
) -> str:
if api_base is None:
account_id = get_secret_str("CLOUDFLARE_ACCOUNT_ID")
api_base = (
f"https://api.cloudflare.com/client/v4/accounts/{account_id}/ai/run/"
)
return api_base + model
def get_supported_openai_params(self, model: str) -> List[str]:

View file

@ -159,6 +159,7 @@ class BaseLLMHTTPHandler:
encoding: Any,
api_key: Optional[str] = None,
client: Optional[AsyncHTTPHandler] = None,
json_mode: bool = False,
):
if client is None:
async_httpx_client = get_async_httpx_client(
@ -190,6 +191,7 @@ class BaseLLMHTTPHandler:
optional_params=optional_params,
litellm_params=litellm_params,
encoding=encoding,
json_mode=json_mode,
)
def completion(
@ -211,6 +213,7 @@ class BaseLLMHTTPHandler:
headers: Optional[dict] = {},
client: Optional[Union[HTTPHandler, AsyncHTTPHandler]] = None,
):
json_mode: bool = optional_params.pop("json_mode", False)
provider_config = ProviderConfigManager.get_provider_chat_config(
model=model, provider=litellm.LlmProviders(custom_llm_provider)
@ -286,6 +289,7 @@ class BaseLLMHTTPHandler:
else None
),
litellm_params=litellm_params,
json_mode=json_mode,
)
else:
@ -309,6 +313,7 @@ class BaseLLMHTTPHandler:
if client is not None and isinstance(client, AsyncHTTPHandler)
else None
),
json_mode=json_mode,
)
if stream is True:
@ -327,6 +332,7 @@ class BaseLLMHTTPHandler:
data=data,
messages=messages,
client=client,
json_mode=json_mode,
)
completion_stream, headers = self.make_sync_call(
provider_config=provider_config,
@ -380,6 +386,7 @@ class BaseLLMHTTPHandler:
optional_params=optional_params,
litellm_params=litellm_params,
encoding=encoding,
json_mode=json_mode,
)
def make_sync_call(
@ -453,6 +460,7 @@ class BaseLLMHTTPHandler:
litellm_params: dict,
fake_stream: bool = False,
client: Optional[AsyncHTTPHandler] = None,
json_mode: Optional[bool] = None,
):
if provider_config.has_custom_stream_wrapper is True:
return provider_config.get_async_custom_stream_wrapper(
@ -464,6 +472,7 @@ class BaseLLMHTTPHandler:
data=data,
messages=messages,
client=client,
json_mode=json_mode,
)
completion_stream, _response_headers = await self.make_async_call_stream_helper(

View file

@ -34,3 +34,21 @@ class DeepSeekChatConfig(OpenAIGPTConfig):
) # type: ignore
dynamic_api_key = api_key or get_secret_str("DEEPSEEK_API_KEY")
return api_base, dynamic_api_key
def get_complete_url(
self,
api_base: Optional[str],
model: str,
optional_params: dict,
stream: Optional[bool] = None,
) -> str:
"""
If api_base is not provided, use the default DeepSeek /chat/completions endpoint.
"""
if not api_base:
api_base = "https://api.deepseek.com/beta"
if not api_base.endswith("/chat/completions"):
api_base = f"{api_base}/chat/completions"
return api_base

View file

@ -90,6 +90,11 @@ class FireworksAIConfig(OpenAIGPTConfig):
) -> dict:
supported_openai_params = self.get_supported_openai_params(model=model)
is_tools_set = any(
param == "tools" and value is not None
for param, value in non_default_params.items()
)
for param, value in non_default_params.items():
if param == "tool_choice":
if value == "required":
@ -98,18 +103,30 @@ class FireworksAIConfig(OpenAIGPTConfig):
else:
# pass through the value of tool choice
optional_params["tool_choice"] = value
elif (
param == "response_format" and value.get("type", None) == "json_schema"
):
elif param == "response_format":
if (
is_tools_set
): # fireworks ai doesn't support tools and response_format together
optional_params = self._add_response_format_to_tools(
optional_params=optional_params,
value=value,
is_response_format_supported=False,
enforce_tool_choice=False, # tools and response_format are both set, don't enforce tool_choice
)
elif "json_schema" in value:
optional_params["response_format"] = {
"type": "json_object",
"schema": value["json_schema"]["schema"],
}
else:
optional_params["response_format"] = value
elif param == "max_completion_tokens":
optional_params["max_tokens"] = value
elif param in supported_openai_params:
if value is not None:
optional_params[param] = value
return optional_params
def _add_transform_inline_image_block(

View file

@ -353,7 +353,7 @@ class OllamaConfig(BaseConfig):
def get_complete_url(
self,
api_base: str,
api_base: Optional[str],
model: str,
optional_params: dict,
stream: Optional[bool] = None,
@ -365,6 +365,8 @@ class OllamaConfig(BaseConfig):
Some providers need `model` in `api_base`
"""
if api_base is None:
api_base = "http://localhost:11434"
if api_base.endswith("/api/generate"):
url = api_base
else:

View file

@ -508,6 +508,7 @@ async def ollama_async_streaming(
verbose_logger.exception(
"LiteLLM.ollama(): Exception occured - {}".format(str(e))
)
raise e
async def ollama_acompletion(

View file

@ -263,7 +263,7 @@ class OpenAIGPTConfig(BaseLLMModelInfo, BaseConfig):
def get_complete_url(
self,
api_base: str,
api_base: Optional[str],
model: str,
optional_params: dict,
stream: Optional[bool] = None,
@ -274,6 +274,8 @@ class OpenAIGPTConfig(BaseLLMModelInfo, BaseConfig):
Returns:
str: The complete URL for the API call.
"""
if api_base is None:
api_base = "https://api.openai.com"
endpoint = "chat/completions"
# Remove trailing slash from api_base if present

View file

@ -19,6 +19,7 @@ from litellm.litellm_core_utils.get_llm_provider_logic import get_llm_provider
from litellm.types.llms.openai import AllMessageValues, ChatCompletionUserMessage
from litellm.utils import (
supports_function_calling,
supports_parallel_function_calling,
supports_response_schema,
supports_system_messages,
)
@ -76,14 +77,19 @@ class OpenAIOSeriesConfig(OpenAIGPTConfig):
model, custom_llm_provider
)
_supports_response_schema = supports_response_schema(model, custom_llm_provider)
_supports_parallel_tool_calls = supports_parallel_function_calling(
model, custom_llm_provider
)
if not _supports_function_calling:
non_supported_params.append("tools")
non_supported_params.append("tool_choice")
non_supported_params.append("parallel_tool_calls")
non_supported_params.append("function_call")
non_supported_params.append("functions")
if not _supports_parallel_tool_calls:
non_supported_params.append("parallel_tool_calls")
if not _supports_response_schema:
non_supported_params.append("response_format")

View file

@ -112,6 +112,7 @@ class OpenAIAudioTranscription(OpenAIChatCompletion):
api_base=api_base,
timeout=timeout,
max_retries=max_retries,
client=client,
)
## LOGGING

View file

@ -138,7 +138,7 @@ class ReplicateConfig(BaseConfig):
def get_complete_url(
self,
api_base: str,
api_base: Optional[str],
model: str,
optional_params: dict,
stream: Optional[bool] = None,

View file

@ -433,6 +433,10 @@ class SagemakerLLM(BaseAWSLLM):
"messages": messages,
}
prepared_request = await asyncified_prepare_request(**prepared_request_args)
if model_id is not None: # Fixes https://github.com/BerriAI/litellm/issues/8889
prepared_request.headers.update(
{"X-Amzn-SageMaker-Inference-Component": model_id}
)
completion_stream = await self.make_async_call(
api_base=prepared_request.url,
headers=prepared_request.headers, # type: ignore
@ -511,7 +515,7 @@ class SagemakerLLM(BaseAWSLLM):
# Add model_id as InferenceComponentName header
# boto3 doc: https://docs.aws.amazon.com/sagemaker/latest/APIReference/API_runtime_InvokeEndpoint.html
prepared_request.headers.update(
{"X-Amzn-SageMaker-Inference-Componen": model_id}
{"X-Amzn-SageMaker-Inference-Component": model_id}
)
# make async httpx post request here
try:

View file

@ -11,7 +11,7 @@ from litellm.llms.openai.chat.gpt_transformation import OpenAIGPTConfig
class SambanovaConfig(OpenAIGPTConfig):
"""
Reference: https://community.sambanova.ai/t/create-chat-completion-api/
Reference: https://docs.sambanova.ai/cloud/api-reference/
Below are the parameters:
"""

View file

@ -10,7 +10,10 @@ from litellm.llms.custom_httpx.http_handler import (
)
from litellm.llms.vertex_ai.gemini.vertex_and_google_ai_studio_gemini import VertexLLM
from litellm.types.llms.openai import Batch, CreateBatchRequest
from litellm.types.llms.vertex_ai import VertexAIBatchPredictionJob
from litellm.types.llms.vertex_ai import (
VERTEX_CREDENTIALS_TYPES,
VertexAIBatchPredictionJob,
)
from .transformation import VertexAIBatchTransformation
@ -25,7 +28,7 @@ class VertexAIBatchPrediction(VertexLLM):
_is_async: bool,
create_batch_data: CreateBatchRequest,
api_base: Optional[str],
vertex_credentials: Optional[str],
vertex_credentials: Optional[VERTEX_CREDENTIALS_TYPES],
vertex_project: Optional[str],
vertex_location: Optional[str],
timeout: Union[float, httpx.Timeout],
@ -130,7 +133,7 @@ class VertexAIBatchPrediction(VertexLLM):
_is_async: bool,
batch_id: str,
api_base: Optional[str],
vertex_credentials: Optional[str],
vertex_credentials: Optional[VERTEX_CREDENTIALS_TYPES],
vertex_project: Optional[str],
vertex_location: Optional[str],
timeout: Union[float, httpx.Timeout],

View file

@ -9,6 +9,7 @@ from litellm.integrations.gcs_bucket.gcs_bucket_base import (
)
from litellm.llms.custom_httpx.http_handler import get_async_httpx_client
from litellm.types.llms.openai import CreateFileRequest, FileObject
from litellm.types.llms.vertex_ai import VERTEX_CREDENTIALS_TYPES
from .transformation import VertexAIFilesTransformation
@ -34,7 +35,7 @@ class VertexAIFilesHandler(GCSBucketBase):
self,
create_file_data: CreateFileRequest,
api_base: Optional[str],
vertex_credentials: Optional[str],
vertex_credentials: Optional[VERTEX_CREDENTIALS_TYPES],
vertex_project: Optional[str],
vertex_location: Optional[str],
timeout: Union[float, httpx.Timeout],
@ -70,7 +71,7 @@ class VertexAIFilesHandler(GCSBucketBase):
_is_async: bool,
create_file_data: CreateFileRequest,
api_base: Optional[str],
vertex_credentials: Optional[str],
vertex_credentials: Optional[VERTEX_CREDENTIALS_TYPES],
vertex_project: Optional[str],
vertex_location: Optional[str],
timeout: Union[float, httpx.Timeout],

View file

@ -13,6 +13,7 @@ from litellm.llms.vertex_ai.gemini.vertex_and_google_ai_studio_gemini import Ver
from litellm.types.fine_tuning import OpenAIFineTuningHyperparameters
from litellm.types.llms.openai import FineTuningJobCreate
from litellm.types.llms.vertex_ai import (
VERTEX_CREDENTIALS_TYPES,
FineTuneHyperparameters,
FineTuneJobCreate,
FineTunesupervisedTuningSpec,
@ -222,7 +223,7 @@ class VertexFineTuningAPI(VertexLLM):
create_fine_tuning_job_data: FineTuningJobCreate,
vertex_project: Optional[str],
vertex_location: Optional[str],
vertex_credentials: Optional[str],
vertex_credentials: Optional[VERTEX_CREDENTIALS_TYPES],
api_base: Optional[str],
timeout: Union[float, httpx.Timeout],
kwargs: Optional[dict] = None,

View file

@ -40,6 +40,7 @@ from litellm.types.llms.openai import (
ChatCompletionUsageBlock,
)
from litellm.types.llms.vertex_ai import (
VERTEX_CREDENTIALS_TYPES,
Candidates,
ContentType,
FunctionCallingConfig,
@ -930,7 +931,7 @@ class VertexLLM(VertexBase):
client: Optional[AsyncHTTPHandler] = None,
vertex_project: Optional[str] = None,
vertex_location: Optional[str] = None,
vertex_credentials: Optional[str] = None,
vertex_credentials: Optional[VERTEX_CREDENTIALS_TYPES] = None,
gemini_api_key: Optional[str] = None,
extra_headers: Optional[dict] = None,
) -> CustomStreamWrapper:
@ -1018,7 +1019,7 @@ class VertexLLM(VertexBase):
client: Optional[AsyncHTTPHandler] = None,
vertex_project: Optional[str] = None,
vertex_location: Optional[str] = None,
vertex_credentials: Optional[str] = None,
vertex_credentials: Optional[VERTEX_CREDENTIALS_TYPES] = None,
gemini_api_key: Optional[str] = None,
extra_headers: Optional[dict] = None,
) -> Union[ModelResponse, CustomStreamWrapper]:
@ -1123,7 +1124,7 @@ class VertexLLM(VertexBase):
timeout: Optional[Union[float, httpx.Timeout]],
vertex_project: Optional[str],
vertex_location: Optional[str],
vertex_credentials: Optional[str],
vertex_credentials: Optional[VERTEX_CREDENTIALS_TYPES],
gemini_api_key: Optional[str],
litellm_params: dict,
logger_fn=None,

View file

@ -11,6 +11,7 @@ from litellm.llms.custom_httpx.http_handler import (
get_async_httpx_client,
)
from litellm.llms.vertex_ai.gemini.vertex_and_google_ai_studio_gemini import VertexLLM
from litellm.types.llms.vertex_ai import VERTEX_CREDENTIALS_TYPES
from litellm.types.utils import ImageResponse
@ -44,7 +45,7 @@ class VertexImageGeneration(VertexLLM):
prompt: str,
vertex_project: Optional[str],
vertex_location: Optional[str],
vertex_credentials: Optional[str],
vertex_credentials: Optional[VERTEX_CREDENTIALS_TYPES],
model_response: ImageResponse,
logging_obj: Any,
model: Optional[
@ -139,7 +140,7 @@ class VertexImageGeneration(VertexLLM):
prompt: str,
vertex_project: Optional[str],
vertex_location: Optional[str],
vertex_credentials: Optional[str],
vertex_credentials: Optional[VERTEX_CREDENTIALS_TYPES],
model_response: litellm.ImageResponse,
logging_obj: Any,
model: Optional[

View file

@ -9,6 +9,7 @@ from litellm.llms.custom_httpx.http_handler import (
)
from litellm.llms.openai.openai import HttpxBinaryResponseContent
from litellm.llms.vertex_ai.gemini.vertex_and_google_ai_studio_gemini import VertexLLM
from litellm.types.llms.vertex_ai import VERTEX_CREDENTIALS_TYPES
class VertexInput(TypedDict, total=False):
@ -45,7 +46,7 @@ class VertexTextToSpeechAPI(VertexLLM):
logging_obj,
vertex_project: Optional[str],
vertex_location: Optional[str],
vertex_credentials: Optional[str],
vertex_credentials: Optional[VERTEX_CREDENTIALS_TYPES],
api_base: Optional[str],
timeout: Union[float, httpx.Timeout],
model: str,

View file

@ -160,6 +160,7 @@ class VertexAIPartnerModels(VertexBase):
url=default_api_base,
)
if "codestral" in model or "mistral" in model:
model = model.split("@")[0]
if "codestral" in model and litellm_params.get("text_completion") is True:

View file

@ -41,7 +41,7 @@ class VertexEmbedding(VertexBase):
client: Optional[Union[AsyncHTTPHandler, HTTPHandler]] = None,
vertex_project: Optional[str] = None,
vertex_location: Optional[str] = None,
vertex_credentials: Optional[str] = None,
vertex_credentials: Optional[VERTEX_CREDENTIALS_TYPES] = None,
gemini_api_key: Optional[str] = None,
extra_headers: Optional[dict] = None,
) -> EmbeddingResponse:
@ -148,7 +148,7 @@ class VertexEmbedding(VertexBase):
client: Optional[AsyncHTTPHandler] = None,
vertex_project: Optional[str] = None,
vertex_location: Optional[str] = None,
vertex_credentials: Optional[str] = None,
vertex_credentials: Optional[VERTEX_CREDENTIALS_TYPES] = None,
gemini_api_key: Optional[str] = None,
extra_headers: Optional[dict] = None,
encoding=None,

View file

@ -12,6 +12,7 @@ from litellm._logging import verbose_logger
from litellm.litellm_core_utils.asyncify import asyncify
from litellm.llms.base import BaseLLM
from litellm.llms.custom_httpx.http_handler import AsyncHTTPHandler
from litellm.types.llms.vertex_ai import VERTEX_CREDENTIALS_TYPES
from .common_utils import _get_gemini_url, _get_vertex_url, all_gemini_url_modes
@ -34,7 +35,7 @@ class VertexBase(BaseLLM):
return vertex_region or "us-central1"
def load_auth(
self, credentials: Optional[str], project_id: Optional[str]
self, credentials: Optional[VERTEX_CREDENTIALS_TYPES], project_id: Optional[str]
) -> Tuple[Any, str]:
import google.auth as google_auth
from google.auth import identity_pool
@ -42,9 +43,10 @@ class VertexBase(BaseLLM):
Request, # type: ignore[import-untyped]
)
if credentials is not None and isinstance(credentials, str):
if credentials is not None:
import google.oauth2.service_account
if isinstance(credentials, str):
verbose_logger.debug(
"Vertex: Loading vertex credentials from %s", credentials
)
@ -66,6 +68,12 @@ class VertexBase(BaseLLM):
credentials
)
)
elif isinstance(credentials, dict):
json_obj = credentials
else:
raise ValueError(
"Invalid credentials type: {}".format(type(credentials))
)
# Check if the JSON object contains Workload Identity Federation configuration
if "type" in json_obj and json_obj["type"] == "external_account":
@ -109,7 +117,7 @@ class VertexBase(BaseLLM):
def _ensure_access_token(
self,
credentials: Optional[str],
credentials: Optional[VERTEX_CREDENTIALS_TYPES],
project_id: Optional[str],
custom_llm_provider: Literal[
"vertex_ai", "vertex_ai_beta", "gemini"
@ -202,7 +210,7 @@ class VertexBase(BaseLLM):
gemini_api_key: Optional[str],
vertex_project: Optional[str],
vertex_location: Optional[str],
vertex_credentials: Optional[str],
vertex_credentials: Optional[VERTEX_CREDENTIALS_TYPES],
stream: Optional[bool],
custom_llm_provider: Literal["vertex_ai", "vertex_ai_beta", "gemini"],
api_base: Optional[str],
@ -253,7 +261,7 @@ class VertexBase(BaseLLM):
async def _ensure_access_token_async(
self,
credentials: Optional[str],
credentials: Optional[VERTEX_CREDENTIALS_TYPES],
project_id: Optional[str],
custom_llm_provider: Literal[
"vertex_ai", "vertex_ai_beta", "gemini"

View file

@ -80,7 +80,7 @@ class IBMWatsonXChatConfig(IBMWatsonXMixin, OpenAIGPTConfig):
def get_complete_url(
self,
api_base: str,
api_base: Optional[str],
model: str,
optional_params: dict,
stream: Optional[bool] = None,

View file

@ -315,7 +315,7 @@ class IBMWatsonXAIConfig(IBMWatsonXMixin, BaseConfig):
def get_complete_url(
self,
api_base: str,
api_base: Optional[str],
model: str,
optional_params: dict,
stream: Optional[bool] = None,

View file

@ -94,7 +94,7 @@ from litellm.utils import (
read_config_args,
supports_httpx_timeout,
token_counter,
validate_chat_completion_messages,
validate_and_fix_openai_messages,
validate_chat_completion_tool_choice,
)
@ -166,6 +166,7 @@ from .llms.vertex_ai.vertex_model_garden.main import VertexAIModelGardenModels
from .llms.vllm.completion import handler as vllm_handler
from .llms.watsonx.chat.handler import WatsonXChatHandler
from .llms.watsonx.common_utils import IBMWatsonXMixin
from .types.llms.anthropic import AnthropicThinkingParam
from .types.llms.openai import (
ChatCompletionAssistantMessage,
ChatCompletionAudioParam,
@ -341,6 +342,7 @@ async def acompletion(
model_list: Optional[list] = None, # pass in a list of api_base,keys, etc.
extra_headers: Optional[dict] = None,
# Optional liteLLM function params
thinking: Optional[AnthropicThinkingParam] = None,
**kwargs,
) -> Union[ModelResponse, CustomStreamWrapper]:
"""
@ -431,6 +433,7 @@ async def acompletion(
"reasoning_effort": reasoning_effort,
"extra_headers": extra_headers,
"acompletion": True, # assuming this is a required parameter
"thinking": thinking,
}
if custom_llm_provider is None:
_, custom_llm_provider, _, _ = get_llm_provider(
@ -800,6 +803,7 @@ def completion( # type: ignore # noqa: PLR0915
api_key: Optional[str] = None,
model_list: Optional[list] = None, # pass in a list of api_base,keys, etc.
# Optional liteLLM function params
thinking: Optional[AnthropicThinkingParam] = None,
**kwargs,
) -> Union[ModelResponse, CustomStreamWrapper]:
"""
@ -851,7 +855,7 @@ def completion( # type: ignore # noqa: PLR0915
if model is None:
raise ValueError("model param not passed in.")
# validate messages
messages = validate_chat_completion_messages(messages=messages)
messages = validate_and_fix_openai_messages(messages=messages)
# validate tool_choice
tool_choice = validate_chat_completion_tool_choice(tool_choice=tool_choice)
######### unpacking kwargs #####################
@ -1106,6 +1110,7 @@ def completion( # type: ignore # noqa: PLR0915
parallel_tool_calls=parallel_tool_calls,
messages=messages,
reasoning_effort=reasoning_effort,
thinking=thinking,
**non_default_params,
)
@ -3409,6 +3414,7 @@ def embedding( # noqa: PLR0915
or custom_llm_provider == "openai"
or custom_llm_provider == "together_ai"
or custom_llm_provider == "nvidia_nim"
or custom_llm_provider == "litellm_proxy"
):
api_base = (
api_base
@ -3485,7 +3491,8 @@ def embedding( # noqa: PLR0915
# set API KEY
if api_key is None:
api_key = (
litellm.api_key
api_key
or litellm.api_key
or litellm.openai_like_key
or get_secret_str("OPENAI_LIKE_API_KEY")
)
@ -4596,7 +4603,10 @@ def image_generation( # noqa: PLR0915
client=client,
headers=headers,
)
elif custom_llm_provider == "openai":
elif (
custom_llm_provider == "openai"
or custom_llm_provider in litellm.openai_compatible_providers
):
model_response = openai_chat_completions.image_generation(
model=model,
prompt=prompt,
@ -5042,8 +5052,7 @@ def transcription(
)
elif (
custom_llm_provider == "openai"
or custom_llm_provider == "groq"
or custom_llm_provider == "fireworks_ai"
or custom_llm_provider in litellm.openai_compatible_providers
):
api_base = (
api_base
@ -5201,7 +5210,10 @@ def speech(
custom_llm_provider=custom_llm_provider,
)
response: Optional[HttpxBinaryResponseContent] = None
if custom_llm_provider == "openai":
if (
custom_llm_provider == "openai"
or custom_llm_provider in litellm.openai_compatible_providers
):
if voice is None or not (isinstance(voice, str)):
raise litellm.BadRequestError(
message="'voice' is required to be passed as a string for OpenAI TTS",

View file

@ -76,6 +76,44 @@
"supports_system_messages": true,
"supports_tool_choice": true
},
"gpt-4.5-preview": {
"max_tokens": 16384,
"max_input_tokens": 128000,
"max_output_tokens": 16384,
"input_cost_per_token": 0.000075,
"output_cost_per_token": 0.00015,
"input_cost_per_token_batches": 0.0000375,
"output_cost_per_token_batches": 0.000075,
"cache_read_input_token_cost": 0.0000375,
"litellm_provider": "openai",
"mode": "chat",
"supports_function_calling": true,
"supports_parallel_function_calling": true,
"supports_response_schema": true,
"supports_vision": true,
"supports_prompt_caching": true,
"supports_system_messages": true,
"supports_tool_choice": true
},
"gpt-4.5-preview-2025-02-27": {
"max_tokens": 16384,
"max_input_tokens": 128000,
"max_output_tokens": 16384,
"input_cost_per_token": 0.000075,
"output_cost_per_token": 0.00015,
"input_cost_per_token_batches": 0.0000375,
"output_cost_per_token_batches": 0.000075,
"cache_read_input_token_cost": 0.0000375,
"litellm_provider": "openai",
"mode": "chat",
"supports_function_calling": true,
"supports_parallel_function_calling": true,
"supports_response_schema": true,
"supports_vision": true,
"supports_prompt_caching": true,
"supports_system_messages": true,
"supports_tool_choice": true
},
"gpt-4o-audio-preview": {
"max_tokens": 16384,
"max_input_tokens": 128000,
@ -1409,7 +1447,7 @@
"mode": "chat",
"supports_function_calling": true,
"supports_parallel_function_calling": true,
"deprecation_date": "2025-03-31",
"deprecation_date": "2025-05-31",
"supports_tool_choice": true
},
"azure/gpt-3.5-turbo-0125": {
@ -1732,6 +1770,19 @@
"source":"https://azuremarketplace.microsoft.com/en-us/marketplace/apps/metagenai.meta-llama-3-1-405b-instruct-offer?tab=PlansAndPrice",
"supports_tool_choice": true
},
"azure_ai/Phi-4": {
"max_tokens": 4096,
"max_input_tokens": 128000,
"max_output_tokens": 4096,
"input_cost_per_token": 0.000000125,
"output_cost_per_token": 0.0000005,
"litellm_provider": "azure_ai",
"mode": "chat",
"supports_vision": false,
"source": "https://techcommunity.microsoft.com/blog/machinelearningblog/affordable-innovation-unveiling-the-pricing-of-phi-3-slms-on-models-as-a-service/4156495",
"supports_function_calling": true,
"supports_tool_choice": true
},
"azure_ai/Phi-3.5-mini-instruct": {
"max_tokens": 4096,
"max_input_tokens": 128000,
@ -2731,6 +2782,25 @@
"supports_tool_choice": true
},
"claude-3-5-haiku-20241022": {
"max_tokens": 8192,
"max_input_tokens": 200000,
"max_output_tokens": 8192,
"input_cost_per_token": 0.0000008,
"output_cost_per_token": 0.000004,
"cache_creation_input_token_cost": 0.000001,
"cache_read_input_token_cost": 0.0000008,
"litellm_provider": "anthropic",
"mode": "chat",
"supports_function_calling": true,
"supports_vision": true,
"tool_use_system_prompt_tokens": 264,
"supports_assistant_prefill": true,
"supports_prompt_caching": true,
"supports_response_schema": true,
"deprecation_date": "2025-10-01",
"supports_tool_choice": true
},
"claude-3-5-haiku-latest": {
"max_tokens": 8192,
"max_input_tokens": 200000,
"max_output_tokens": 8192,
@ -2741,6 +2811,7 @@
"litellm_provider": "anthropic",
"mode": "chat",
"supports_function_calling": true,
"supports_vision": true,
"tool_use_system_prompt_tokens": 264,
"supports_assistant_prefill": true,
"supports_prompt_caching": true,
@ -2748,6 +2819,25 @@
"deprecation_date": "2025-10-01",
"supports_tool_choice": true
},
"claude-3-opus-latest": {
"max_tokens": 4096,
"max_input_tokens": 200000,
"max_output_tokens": 4096,
"input_cost_per_token": 0.000015,
"output_cost_per_token": 0.000075,
"cache_creation_input_token_cost": 0.00001875,
"cache_read_input_token_cost": 0.0000015,
"litellm_provider": "anthropic",
"mode": "chat",
"supports_function_calling": true,
"supports_vision": true,
"tool_use_system_prompt_tokens": 395,
"supports_assistant_prefill": true,
"supports_prompt_caching": true,
"supports_response_schema": true,
"deprecation_date": "2025-03-01",
"supports_tool_choice": true
},
"claude-3-opus-20240229": {
"max_tokens": 4096,
"max_input_tokens": 200000,
@ -2784,6 +2874,25 @@
"deprecation_date": "2025-07-21",
"supports_tool_choice": true
},
"claude-3-5-sonnet-latest": {
"max_tokens": 8192,
"max_input_tokens": 200000,
"max_output_tokens": 8192,
"input_cost_per_token": 0.000003,
"output_cost_per_token": 0.000015,
"cache_creation_input_token_cost": 0.00000375,
"cache_read_input_token_cost": 0.0000003,
"litellm_provider": "anthropic",
"mode": "chat",
"supports_function_calling": true,
"supports_vision": true,
"tool_use_system_prompt_tokens": 159,
"supports_assistant_prefill": true,
"supports_prompt_caching": true,
"supports_response_schema": true,
"deprecation_date": "2025-06-01",
"supports_tool_choice": true
},
"claude-3-5-sonnet-20240620": {
"max_tokens": 8192,
"max_input_tokens": 200000,
@ -2803,6 +2912,25 @@
"deprecation_date": "2025-06-01",
"supports_tool_choice": true
},
"claude-3-7-sonnet-latest": {
"max_tokens": 8192,
"max_input_tokens": 200000,
"max_output_tokens": 8192,
"input_cost_per_token": 0.000003,
"output_cost_per_token": 0.000015,
"cache_creation_input_token_cost": 0.00000375,
"cache_read_input_token_cost": 0.0000003,
"litellm_provider": "anthropic",
"mode": "chat",
"supports_function_calling": true,
"supports_vision": true,
"tool_use_system_prompt_tokens": 159,
"supports_assistant_prefill": true,
"supports_prompt_caching": true,
"supports_response_schema": true,
"deprecation_date": "2025-06-01",
"supports_tool_choice": true
},
"claude-3-7-sonnet-20250219": {
"max_tokens": 8192,
"max_input_tokens": 200000,
@ -2819,7 +2947,7 @@
"supports_assistant_prefill": true,
"supports_prompt_caching": true,
"supports_response_schema": true,
"deprecation_date": "2025-06-01",
"deprecation_date": "2026-02-01",
"supports_tool_choice": true
},
"claude-3-5-sonnet-20241022": {
@ -4074,7 +4202,7 @@
"supports_assistant_prefill": true,
"supports_tool_choice": true
},
"vertex_ai/claude-3-7-sonnet-20250219": {
"vertex_ai/claude-3-7-sonnet@20250219": {
"max_tokens": 8192,
"max_input_tokens": 200000,
"max_output_tokens": 8192,
@ -5495,6 +5623,35 @@
"tool_use_system_prompt_tokens": 159,
"supports_tool_choice": true
},
"openrouter/anthropic/claude-3.7-sonnet": {
"max_tokens": 8192,
"max_input_tokens": 200000,
"max_output_tokens": 8192,
"input_cost_per_token": 0.000003,
"output_cost_per_token": 0.000015,
"input_cost_per_image": 0.0048,
"litellm_provider": "openrouter",
"mode": "chat",
"supports_function_calling": true,
"supports_vision": true,
"tool_use_system_prompt_tokens": 159,
"supports_assistant_prefill": true,
"supports_tool_choice": true
},
"openrouter/anthropic/claude-3.7-sonnet:beta": {
"max_tokens": 8192,
"max_input_tokens": 200000,
"max_output_tokens": 8192,
"input_cost_per_token": 0.000003,
"output_cost_per_token": 0.000015,
"input_cost_per_image": 0.0048,
"litellm_provider": "openrouter",
"mode": "chat",
"supports_function_calling": true,
"supports_vision": true,
"tool_use_system_prompt_tokens": 159,
"supports_tool_choice": true
},
"openrouter/anthropic/claude-3-sonnet": {
"max_tokens": 200000,
"input_cost_per_token": 0.000003,
@ -6468,6 +6625,21 @@
"supports_response_schema": true,
"supports_tool_choice": true
},
"us.anthropic.claude-3-7-sonnet-20250219-v1:0": {
"max_tokens": 8192,
"max_input_tokens": 200000,
"max_output_tokens": 8192,
"input_cost_per_token": 0.000003,
"output_cost_per_token": 0.000015,
"litellm_provider": "bedrock_converse",
"mode": "chat",
"supports_function_calling": true,
"supports_vision": true,
"supports_assistant_prefill": true,
"supports_prompt_caching": true,
"supports_response_schema": true,
"supports_tool_choice": true
},
"us.anthropic.claude-3-haiku-20240307-v1:0": {
"max_tokens": 4096,
"max_input_tokens": 200000,

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

View file

@ -1 +1 @@
<!DOCTYPE html><html id="__next_error__"><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width, initial-scale=1"/><link rel="preload" as="script" fetchPriority="low" href="/ui/_next/static/chunks/webpack-75a5453f51d60261.js"/><script src="/ui/_next/static/chunks/fd9d1056-524b80e1a6b8bb06.js" async=""></script><script src="/ui/_next/static/chunks/117-883150efc583d711.js" async=""></script><script src="/ui/_next/static/chunks/main-app-475d6efe4080647d.js" async=""></script><title>LiteLLM Dashboard</title><meta name="description" content="LiteLLM Proxy Admin UI"/><link rel="icon" href="/ui/favicon.ico" type="image/x-icon" sizes="16x16"/><meta name="next-size-adjust"/><script src="/ui/_next/static/chunks/polyfills-42372ed130431b0a.js" noModule=""></script></head><body><script src="/ui/_next/static/chunks/webpack-75a5453f51d60261.js" async=""></script><script>(self.__next_f=self.__next_f||[]).push([0]);self.__next_f.push([2,null])</script><script>self.__next_f.push([1,"1:HL[\"/ui/_next/static/media/a34f9d1faa5f3315-s.p.woff2\",\"font\",{\"crossOrigin\":\"\",\"type\":\"font/woff2\"}]\n2:HL[\"/ui/_next/static/css/86f6cc749f6b8493.css\",\"style\"]\n3:HL[\"/ui/_next/static/css/2a6af5dc23d92a9a.css\",\"style\"]\n"])</script><script>self.__next_f.push([1,"4:I[12846,[],\"\"]\n6:I[19107,[],\"ClientPageRoot\"]\n7:I[35319,[\"665\",\"static/chunks/3014691f-0b72c78cfebbd712.js\",\"990\",\"static/chunks/13b76428-ebdf3012af0e4489.js\",\"441\",\"static/chunks/441-79926bf2b9d89e04.js\",\"261\",\"static/chunks/261-cb27c20c4f8ec4c6.js\",\"899\",\"static/chunks/899-354f59ecde307dfa.js\",\"678\",\"static/chunks/678-58bcfc3337902198.js\",\"250\",\"static/chunks/250-fd088aaa064b7d46.js\",\"699\",\"static/chunks/699-a194d60126b95923.js\",\"931\",\"static/chunks/app/page-84c68f24f2d4d77b.js\"],\"default\",1]\n8:I[4707,[],\"\"]\n9:I[36423,[],\"\"]\nb:I[61060,[],\"\"]\nc:[]\n"])</script><script>self.__next_f.push([1,"0:[\"$\",\"$L4\",null,{\"buildId\":\"Z74g7wOKfx1z1d_BuB0ip\",\"assetPrefix\":\"/ui\",\"urlParts\":[\"\",\"\"],\"initialTree\":[\"\",{\"children\":[\"__PAGE__\",{}]},\"$undefined\",\"$undefined\",true],\"initialSeedData\":[\"\",{\"children\":[\"__PAGE__\",{},[[\"$L5\",[\"$\",\"$L6\",null,{\"props\":{\"params\":{},\"searchParams\":{}},\"Component\":\"$7\"}],null],null],null]},[[[[\"$\",\"link\",\"0\",{\"rel\":\"stylesheet\",\"href\":\"/ui/_next/static/css/86f6cc749f6b8493.css\",\"precedence\":\"next\",\"crossOrigin\":\"$undefined\"}],[\"$\",\"link\",\"1\",{\"rel\":\"stylesheet\",\"href\":\"/ui/_next/static/css/2a6af5dc23d92a9a.css\",\"precedence\":\"next\",\"crossOrigin\":\"$undefined\"}]],[\"$\",\"html\",null,{\"lang\":\"en\",\"children\":[\"$\",\"body\",null,{\"className\":\"__className_cf7686\",\"children\":[\"$\",\"$L8\",null,{\"parallelRouterKey\":\"children\",\"segmentPath\":[\"children\"],\"error\":\"$undefined\",\"errorStyles\":\"$undefined\",\"errorScripts\":\"$undefined\",\"template\":[\"$\",\"$L9\",null,{}],\"templateStyles\":\"$undefined\",\"templateScripts\":\"$undefined\",\"notFound\":[[\"$\",\"title\",null,{\"children\":\"404: This page could not be found.\"}],[\"$\",\"div\",null,{\"style\":{\"fontFamily\":\"system-ui,\\\"Segoe UI\\\",Roboto,Helvetica,Arial,sans-serif,\\\"Apple Color Emoji\\\",\\\"Segoe UI Emoji\\\"\",\"height\":\"100vh\",\"textAlign\":\"center\",\"display\":\"flex\",\"flexDirection\":\"column\",\"alignItems\":\"center\",\"justifyContent\":\"center\"},\"children\":[\"$\",\"div\",null,{\"children\":[[\"$\",\"style\",null,{\"dangerouslySetInnerHTML\":{\"__html\":\"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}\"}}],[\"$\",\"h1\",null,{\"className\":\"next-error-h1\",\"style\":{\"display\":\"inline-block\",\"margin\":\"0 20px 0 0\",\"padding\":\"0 23px 0 0\",\"fontSize\":24,\"fontWeight\":500,\"verticalAlign\":\"top\",\"lineHeight\":\"49px\"},\"children\":\"404\"}],[\"$\",\"div\",null,{\"style\":{\"display\":\"inline-block\"},\"children\":[\"$\",\"h2\",null,{\"style\":{\"fontSize\":14,\"fontWeight\":400,\"lineHeight\":\"49px\",\"margin\":0},\"children\":\"This page could not be found.\"}]}]]}]}]],\"notFoundStyles\":[]}]}]}]],null],null],\"couldBeIntercepted\":false,\"initialHead\":[null,\"$La\"],\"globalErrorComponent\":\"$b\",\"missingSlots\":\"$Wc\"}]\n"])</script><script>self.__next_f.push([1,"a:[[\"$\",\"meta\",\"0\",{\"name\":\"viewport\",\"content\":\"width=device-width, initial-scale=1\"}],[\"$\",\"meta\",\"1\",{\"charSet\":\"utf-8\"}],[\"$\",\"title\",\"2\",{\"children\":\"LiteLLM Dashboard\"}],[\"$\",\"meta\",\"3\",{\"name\":\"description\",\"content\":\"LiteLLM Proxy Admin UI\"}],[\"$\",\"link\",\"4\",{\"rel\":\"icon\",\"href\":\"/ui/favicon.ico\",\"type\":\"image/x-icon\",\"sizes\":\"16x16\"}],[\"$\",\"meta\",\"5\",{\"name\":\"next-size-adjust\"}]]\n5:null\n"])</script></body></html>
<!DOCTYPE html><html id="__next_error__"><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width, initial-scale=1"/><link rel="preload" as="script" fetchPriority="low" href="/ui/_next/static/chunks/webpack-75a5453f51d60261.js"/><script src="/ui/_next/static/chunks/fd9d1056-524b80e1a6b8bb06.js" async=""></script><script src="/ui/_next/static/chunks/117-883150efc583d711.js" async=""></script><script src="/ui/_next/static/chunks/main-app-475d6efe4080647d.js" async=""></script><title>LiteLLM Dashboard</title><meta name="description" content="LiteLLM Proxy Admin UI"/><link rel="icon" href="/ui/favicon.ico" type="image/x-icon" sizes="16x16"/><meta name="next-size-adjust"/><script src="/ui/_next/static/chunks/polyfills-42372ed130431b0a.js" noModule=""></script></head><body><script src="/ui/_next/static/chunks/webpack-75a5453f51d60261.js" async=""></script><script>(self.__next_f=self.__next_f||[]).push([0]);self.__next_f.push([2,null])</script><script>self.__next_f.push([1,"1:HL[\"/ui/_next/static/media/a34f9d1faa5f3315-s.p.woff2\",\"font\",{\"crossOrigin\":\"\",\"type\":\"font/woff2\"}]\n2:HL[\"/ui/_next/static/css/86f6cc749f6b8493.css\",\"style\"]\n3:HL[\"/ui/_next/static/css/f41c66e22715ab00.css\",\"style\"]\n"])</script><script>self.__next_f.push([1,"4:I[12846,[],\"\"]\n6:I[19107,[],\"ClientPageRoot\"]\n7:I[89076,[\"665\",\"static/chunks/3014691f-0b72c78cfebbd712.js\",\"990\",\"static/chunks/13b76428-ebdf3012af0e4489.js\",\"441\",\"static/chunks/441-79926bf2b9d89e04.js\",\"261\",\"static/chunks/261-cb27c20c4f8ec4c6.js\",\"899\",\"static/chunks/899-354f59ecde307dfa.js\",\"914\",\"static/chunks/914-000d10374f86fc1a.js\",\"250\",\"static/chunks/250-8b26aa68cd90cbb2.js\",\"699\",\"static/chunks/699-6b82f8e7b98ca1a3.js\",\"931\",\"static/chunks/app/page-fbe63e2a496641d2.js\"],\"default\",1]\n8:I[4707,[],\"\"]\n9:I[36423,[],\"\"]\nb:I[61060,[],\"\"]\nc:[]\n"])</script><script>self.__next_f.push([1,"0:[\"$\",\"$L4\",null,{\"buildId\":\"8I5x-IqExlZLRs0oeiz6b\",\"assetPrefix\":\"/ui\",\"urlParts\":[\"\",\"\"],\"initialTree\":[\"\",{\"children\":[\"__PAGE__\",{}]},\"$undefined\",\"$undefined\",true],\"initialSeedData\":[\"\",{\"children\":[\"__PAGE__\",{},[[\"$L5\",[\"$\",\"$L6\",null,{\"props\":{\"params\":{},\"searchParams\":{}},\"Component\":\"$7\"}],null],null],null]},[[[[\"$\",\"link\",\"0\",{\"rel\":\"stylesheet\",\"href\":\"/ui/_next/static/css/86f6cc749f6b8493.css\",\"precedence\":\"next\",\"crossOrigin\":\"$undefined\"}],[\"$\",\"link\",\"1\",{\"rel\":\"stylesheet\",\"href\":\"/ui/_next/static/css/f41c66e22715ab00.css\",\"precedence\":\"next\",\"crossOrigin\":\"$undefined\"}]],[\"$\",\"html\",null,{\"lang\":\"en\",\"children\":[\"$\",\"body\",null,{\"className\":\"__className_cf7686\",\"children\":[\"$\",\"$L8\",null,{\"parallelRouterKey\":\"children\",\"segmentPath\":[\"children\"],\"error\":\"$undefined\",\"errorStyles\":\"$undefined\",\"errorScripts\":\"$undefined\",\"template\":[\"$\",\"$L9\",null,{}],\"templateStyles\":\"$undefined\",\"templateScripts\":\"$undefined\",\"notFound\":[[\"$\",\"title\",null,{\"children\":\"404: This page could not be found.\"}],[\"$\",\"div\",null,{\"style\":{\"fontFamily\":\"system-ui,\\\"Segoe UI\\\",Roboto,Helvetica,Arial,sans-serif,\\\"Apple Color Emoji\\\",\\\"Segoe UI Emoji\\\"\",\"height\":\"100vh\",\"textAlign\":\"center\",\"display\":\"flex\",\"flexDirection\":\"column\",\"alignItems\":\"center\",\"justifyContent\":\"center\"},\"children\":[\"$\",\"div\",null,{\"children\":[[\"$\",\"style\",null,{\"dangerouslySetInnerHTML\":{\"__html\":\"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}\"}}],[\"$\",\"h1\",null,{\"className\":\"next-error-h1\",\"style\":{\"display\":\"inline-block\",\"margin\":\"0 20px 0 0\",\"padding\":\"0 23px 0 0\",\"fontSize\":24,\"fontWeight\":500,\"verticalAlign\":\"top\",\"lineHeight\":\"49px\"},\"children\":\"404\"}],[\"$\",\"div\",null,{\"style\":{\"display\":\"inline-block\"},\"children\":[\"$\",\"h2\",null,{\"style\":{\"fontSize\":14,\"fontWeight\":400,\"lineHeight\":\"49px\",\"margin\":0},\"children\":\"This page could not be found.\"}]}]]}]}]],\"notFoundStyles\":[]}]}]}]],null],null],\"couldBeIntercepted\":false,\"initialHead\":[null,\"$La\"],\"globalErrorComponent\":\"$b\",\"missingSlots\":\"$Wc\"}]\n"])</script><script>self.__next_f.push([1,"a:[[\"$\",\"meta\",\"0\",{\"name\":\"viewport\",\"content\":\"width=device-width, initial-scale=1\"}],[\"$\",\"meta\",\"1\",{\"charSet\":\"utf-8\"}],[\"$\",\"title\",\"2\",{\"children\":\"LiteLLM Dashboard\"}],[\"$\",\"meta\",\"3\",{\"name\":\"description\",\"content\":\"LiteLLM Proxy Admin UI\"}],[\"$\",\"link\",\"4\",{\"rel\":\"icon\",\"href\":\"/ui/favicon.ico\",\"type\":\"image/x-icon\",\"sizes\":\"16x16\"}],[\"$\",\"meta\",\"5\",{\"name\":\"next-size-adjust\"}]]\n5:null\n"])</script></body></html>

View file

@ -1,7 +1,7 @@
2:I[19107,[],"ClientPageRoot"]
3:I[35319,["665","static/chunks/3014691f-0b72c78cfebbd712.js","990","static/chunks/13b76428-ebdf3012af0e4489.js","441","static/chunks/441-79926bf2b9d89e04.js","261","static/chunks/261-cb27c20c4f8ec4c6.js","899","static/chunks/899-354f59ecde307dfa.js","678","static/chunks/678-58bcfc3337902198.js","250","static/chunks/250-fd088aaa064b7d46.js","699","static/chunks/699-a194d60126b95923.js","931","static/chunks/app/page-84c68f24f2d4d77b.js"],"default",1]
3:I[89076,["665","static/chunks/3014691f-0b72c78cfebbd712.js","990","static/chunks/13b76428-ebdf3012af0e4489.js","441","static/chunks/441-79926bf2b9d89e04.js","261","static/chunks/261-cb27c20c4f8ec4c6.js","899","static/chunks/899-354f59ecde307dfa.js","914","static/chunks/914-000d10374f86fc1a.js","250","static/chunks/250-8b26aa68cd90cbb2.js","699","static/chunks/699-6b82f8e7b98ca1a3.js","931","static/chunks/app/page-fbe63e2a496641d2.js"],"default",1]
4:I[4707,[],""]
5:I[36423,[],""]
0:["Z74g7wOKfx1z1d_BuB0ip",[[["",{"children":["__PAGE__",{}]},"$undefined","$undefined",true],["",{"children":["__PAGE__",{},[["$L1",["$","$L2",null,{"props":{"params":{},"searchParams":{}},"Component":"$3"}],null],null],null]},[[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/86f6cc749f6b8493.css","precedence":"next","crossOrigin":"$undefined"}],["$","link","1",{"rel":"stylesheet","href":"/ui/_next/static/css/2a6af5dc23d92a9a.css","precedence":"next","crossOrigin":"$undefined"}]],["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_cf7686","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[]}]}]}]],null],null],["$L6",null]]]]
0:["8I5x-IqExlZLRs0oeiz6b",[[["",{"children":["__PAGE__",{}]},"$undefined","$undefined",true],["",{"children":["__PAGE__",{},[["$L1",["$","$L2",null,{"props":{"params":{},"searchParams":{}},"Component":"$3"}],null],null],null]},[[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/86f6cc749f6b8493.css","precedence":"next","crossOrigin":"$undefined"}],["$","link","1",{"rel":"stylesheet","href":"/ui/_next/static/css/f41c66e22715ab00.css","precedence":"next","crossOrigin":"$undefined"}]],["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_cf7686","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[]}]}]}]],null],null],["$L6",null]]]]
6:[["$","meta","0",{"name":"viewport","content":"width=device-width, initial-scale=1"}],["$","meta","1",{"charSet":"utf-8"}],["$","title","2",{"children":"LiteLLM Dashboard"}],["$","meta","3",{"name":"description","content":"LiteLLM Proxy Admin UI"}],["$","link","4",{"rel":"icon","href":"/ui/favicon.ico","type":"image/x-icon","sizes":"16x16"}],["$","meta","5",{"name":"next-size-adjust"}]]
1:null

View file

@ -1,7 +1,7 @@
2:I[19107,[],"ClientPageRoot"]
3:I[52829,["441","static/chunks/441-79926bf2b9d89e04.js","261","static/chunks/261-cb27c20c4f8ec4c6.js","250","static/chunks/250-fd088aaa064b7d46.js","699","static/chunks/699-a194d60126b95923.js","418","static/chunks/app/model_hub/page-6f97b95f1023b0e9.js"],"default",1]
3:I[52829,["441","static/chunks/441-79926bf2b9d89e04.js","261","static/chunks/261-cb27c20c4f8ec4c6.js","250","static/chunks/250-8b26aa68cd90cbb2.js","699","static/chunks/699-6b82f8e7b98ca1a3.js","418","static/chunks/app/model_hub/page-6f97b95f1023b0e9.js"],"default",1]
4:I[4707,[],""]
5:I[36423,[],""]
0:["Z74g7wOKfx1z1d_BuB0ip",[[["",{"children":["model_hub",{"children":["__PAGE__",{}]}]},"$undefined","$undefined",true],["",{"children":["model_hub",{"children":["__PAGE__",{},[["$L1",["$","$L2",null,{"props":{"params":{},"searchParams":{}},"Component":"$3"}],null],null],null]},[null,["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children","model_hub","children"],"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":"$undefined","notFoundStyles":"$undefined"}]],null]},[[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/86f6cc749f6b8493.css","precedence":"next","crossOrigin":"$undefined"}],["$","link","1",{"rel":"stylesheet","href":"/ui/_next/static/css/2a6af5dc23d92a9a.css","precedence":"next","crossOrigin":"$undefined"}]],["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_cf7686","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[]}]}]}]],null],null],["$L6",null]]]]
0:["8I5x-IqExlZLRs0oeiz6b",[[["",{"children":["model_hub",{"children":["__PAGE__",{}]}]},"$undefined","$undefined",true],["",{"children":["model_hub",{"children":["__PAGE__",{},[["$L1",["$","$L2",null,{"props":{"params":{},"searchParams":{}},"Component":"$3"}],null],null],null]},[null,["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children","model_hub","children"],"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":"$undefined","notFoundStyles":"$undefined"}]],null]},[[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/86f6cc749f6b8493.css","precedence":"next","crossOrigin":"$undefined"}],["$","link","1",{"rel":"stylesheet","href":"/ui/_next/static/css/f41c66e22715ab00.css","precedence":"next","crossOrigin":"$undefined"}]],["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_cf7686","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[]}]}]}]],null],null],["$L6",null]]]]
6:[["$","meta","0",{"name":"viewport","content":"width=device-width, initial-scale=1"}],["$","meta","1",{"charSet":"utf-8"}],["$","title","2",{"children":"LiteLLM Dashboard"}],["$","meta","3",{"name":"description","content":"LiteLLM Proxy Admin UI"}],["$","link","4",{"rel":"icon","href":"/ui/favicon.ico","type":"image/x-icon","sizes":"16x16"}],["$","meta","5",{"name":"next-size-adjust"}]]
1:null

File diff suppressed because one or more lines are too long

View file

@ -1,7 +1,7 @@
2:I[19107,[],"ClientPageRoot"]
3:I[12011,["665","static/chunks/3014691f-0b72c78cfebbd712.js","441","static/chunks/441-79926bf2b9d89e04.js","899","static/chunks/899-354f59ecde307dfa.js","250","static/chunks/250-fd088aaa064b7d46.js","461","static/chunks/app/onboarding/page-801b31bb95fa3d1c.js"],"default",1]
3:I[12011,["665","static/chunks/3014691f-0b72c78cfebbd712.js","441","static/chunks/441-79926bf2b9d89e04.js","899","static/chunks/899-354f59ecde307dfa.js","250","static/chunks/250-8b26aa68cd90cbb2.js","461","static/chunks/app/onboarding/page-f2e9aa9e77b66520.js"],"default",1]
4:I[4707,[],""]
5:I[36423,[],""]
0:["Z74g7wOKfx1z1d_BuB0ip",[[["",{"children":["onboarding",{"children":["__PAGE__",{}]}]},"$undefined","$undefined",true],["",{"children":["onboarding",{"children":["__PAGE__",{},[["$L1",["$","$L2",null,{"props":{"params":{},"searchParams":{}},"Component":"$3"}],null],null],null]},[null,["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children","onboarding","children"],"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":"$undefined","notFoundStyles":"$undefined"}]],null]},[[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/86f6cc749f6b8493.css","precedence":"next","crossOrigin":"$undefined"}],["$","link","1",{"rel":"stylesheet","href":"/ui/_next/static/css/2a6af5dc23d92a9a.css","precedence":"next","crossOrigin":"$undefined"}]],["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_cf7686","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[]}]}]}]],null],null],["$L6",null]]]]
0:["8I5x-IqExlZLRs0oeiz6b",[[["",{"children":["onboarding",{"children":["__PAGE__",{}]}]},"$undefined","$undefined",true],["",{"children":["onboarding",{"children":["__PAGE__",{},[["$L1",["$","$L2",null,{"props":{"params":{},"searchParams":{}},"Component":"$3"}],null],null],null]},[null,["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children","onboarding","children"],"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":"$undefined","notFoundStyles":"$undefined"}]],null]},[[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/86f6cc749f6b8493.css","precedence":"next","crossOrigin":"$undefined"}],["$","link","1",{"rel":"stylesheet","href":"/ui/_next/static/css/f41c66e22715ab00.css","precedence":"next","crossOrigin":"$undefined"}]],["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_cf7686","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[]}]}]}]],null],null],["$L6",null]]]]
6:[["$","meta","0",{"name":"viewport","content":"width=device-width, initial-scale=1"}],["$","meta","1",{"charSet":"utf-8"}],["$","title","2",{"children":"LiteLLM Dashboard"}],["$","meta","3",{"name":"description","content":"LiteLLM Proxy Admin UI"}],["$","link","4",{"rel":"icon","href":"/ui/favicon.ico","type":"image/x-icon","sizes":"16x16"}],["$","meta","5",{"name":"next-size-adjust"}]]
1:null

View file

@ -1,9 +1,5 @@
model_list:
- model_name: anthropic/claude-3-7-sonnet-20250219
litellm_params:
model: anthropic/claude-3-7-sonnet-20250219
api_key: os.environ/ANTHROPIC_API_KEY
- model_name: gpt-4
- model_name: claude-3.7
litellm_params:
model: openai/gpt-3.5-turbo
api_key: os.environ/OPENAI_API_KEY
@ -14,3 +10,19 @@ model_list:
- model_name: deepseek-r1-api
litellm_params:
model: deepseek/deepseek-reasoner
- model_name: cohere.embed-english-v3
litellm_params:
model: bedrock/cohere.embed-english-v3
api_key: os.environ/COHERE_API_KEY
- model_name: bedrock-claude-3-7
litellm_params:
model: bedrock/invoke/us.anthropic.claude-3-7-sonnet-20250219-v1:0
- model_name: bedrock-claude-3-5-sonnet
litellm_params:
model: bedrock/invoke/us.anthropic.claude-3-5-sonnet-20240620-v1:0
- model_name: bedrock-nova
litellm_params:
model: bedrock/us.amazon.nova-pro-v1:0
litellm_settings:
callbacks: ["langfuse"]

View file

@ -26,6 +26,8 @@ from litellm.types.utils import (
ModelResponse,
ProviderField,
StandardCallbackDynamicParams,
StandardLoggingPayloadErrorInformation,
StandardLoggingPayloadStatus,
StandardPassThroughResponseObject,
TextCompletionResponse,
)
@ -610,6 +612,8 @@ class GenerateKeyResponse(KeyRequestBase):
token_id: Optional[str] = None
litellm_budget_table: Optional[Any] = None
token: Optional[str] = None
created_by: Optional[str] = None
updated_by: Optional[str] = None
@model_validator(mode="before")
@classmethod
@ -1387,7 +1391,9 @@ class LiteLLM_VerificationToken(LiteLLMPydanticObjectBase):
litellm_budget_table: Optional[dict] = None
org_id: Optional[str] = None # org id for a given key
created_at: Optional[datetime] = None
created_by: Optional[str] = None
updated_at: Optional[datetime] = None
updated_by: Optional[str] = None
model_config = ConfigDict(protected_namespaces=())
@ -1574,6 +1580,10 @@ class LiteLLM_UserTableFiltered(BaseModel): # done to avoid exposing sensitive
user_email: str
class LiteLLM_UserTableWithKeyCount(LiteLLM_UserTable):
key_count: int = 0
class LiteLLM_EndUserTable(LiteLLMPydanticObjectBase):
user_id: str
blocked: bool
@ -1704,6 +1714,7 @@ class WebhookEvent(CallInfo):
class SpecialModelNames(enum.Enum):
all_team_models = "all-team-models"
all_proxy_models = "all-proxy-models"
no_default_models = "no-default-models"
class InvitationNew(LiteLLMPydanticObjectBase):
@ -1846,6 +1857,9 @@ class SpendLogsMetadata(TypedDict):
] # special param to log k,v pairs to spendlogs for a call
requester_ip_address: Optional[str]
applied_guardrails: Optional[List[str]]
status: StandardLoggingPayloadStatus
proxy_server_request: Optional[str]
error_information: Optional[StandardLoggingPayloadErrorInformation]
class SpendLogsPayload(TypedDict):

View file

@ -1116,6 +1116,14 @@ async def can_user_call_model(
if user_object is None:
return True
if SpecialModelNames.no_default_models.value in user_object.models:
raise ProxyException(
message=f"User not allowed to access model. No default model access, only team models allowed. Tried to access {model}",
type=ProxyErrorTypes.key_model_access_denied,
param="model",
code=status.HTTP_401_UNAUTHORIZED,
)
return await _can_object_call_model(
model=model,
llm_router=llm_router,

View file

@ -1,5 +1,6 @@
# What is this?
## Common checks for /v1/models and `/model/info`
import copy
from typing import Dict, List, Optional, Set
import litellm
@ -30,7 +31,7 @@ def get_provider_models(provider: str) -> Optional[List[str]]:
return get_valid_models()
if provider in litellm.models_by_provider:
provider_models = litellm.models_by_provider[provider]
provider_models = copy.deepcopy(litellm.models_by_provider[provider])
for idx, _model in enumerate(provider_models):
if provider not in _model:
provider_models[idx] = f"{provider}/{_model}"

View file

@ -240,3 +240,18 @@ class RouteChecks:
RouteChecks._route_matches_pattern(route=route, pattern=allowed_route)
for allowed_route in allowed_routes
) # Check pattern match
@staticmethod
def _is_assistants_api_request(request: Request) -> bool:
"""
Returns True if `thread` or `assistant` is in the request path
Args:
request (Request): The request object
Returns:
bool: True if `thread` or `assistant` is in the request path, False otherwise
"""
if "thread" in request.url.path or "assistant" in request.url.path:
return True
return False

View file

@ -8,6 +8,7 @@ Returns a UserAPIKeyAuth object if the API key is valid
"""
import asyncio
import re
import secrets
from datetime import datetime, timezone
from typing import Optional, cast
@ -279,6 +280,21 @@ def get_rbac_role(jwt_handler: JWTHandler, scopes: List[str]) -> str:
return LitellmUserRoles.TEAM
def get_model_from_request(request_data: dict, route: str) -> Optional[str]:
# First try to get model from request_data
model = request_data.get("model")
# If model not in request_data, try to extract from route
if model is None:
# Parse model from route that follows the pattern /openai/deployments/{model}/*
match = re.match(r"/openai/deployments/([^/]+)", route)
if match:
model = match.group(1)
return model
async def _user_api_key_auth_builder( # noqa: PLR0915
request: Request,
api_key: str,
@ -807,7 +823,7 @@ async def _user_api_key_auth_builder( # noqa: PLR0915
# the validation will occur when checking the team has access to this model
pass
else:
model = request_data.get("model", None)
model = get_model_from_request(request_data, route)
fallback_models = cast(
Optional[List[ALL_FALLBACK_MODEL_VALUES]],
request_data.get("fallbacks", None),

View file

@ -42,7 +42,26 @@ async def _read_request_body(request: Optional[Request]) -> Dict:
if not body:
parsed_body = {}
else:
try:
parsed_body = orjson.loads(body)
except orjson.JSONDecodeError:
# Fall back to the standard json module which is more forgiving
# First decode bytes to string if needed
body_str = body.decode("utf-8") if isinstance(body, bytes) else body
# Replace invalid surrogate pairs
import re
# This regex finds incomplete surrogate pairs
body_str = re.sub(
r"[\uD800-\uDBFF](?![\uDC00-\uDFFF])", "", body_str
)
# This regex finds low surrogates without high surrogates
body_str = re.sub(
r"(?<![\uD800-\uDBFF])[\uDC00-\uDFFF]", "", body_str
)
parsed_body = json.loads(body_str)
# Cache the parsed result
_safe_set_request_parsed_body(request=request, parsed_body=parsed_body)
@ -62,8 +81,8 @@ async def _read_request_body(request: Optional[Request]) -> Dict:
def _safe_get_request_parsed_body(request: Optional[Request]) -> Optional[dict]:
if request is None:
return None
if hasattr(request, "state") and hasattr(request.state, "parsed_body"):
return request.state.parsed_body
if hasattr(request, "scope") and "parsed_body" in request.scope:
return request.scope["parsed_body"]
return None
@ -74,7 +93,7 @@ def _safe_set_request_parsed_body(
try:
if request is None:
return
request.state.parsed_body = parsed_body
request.scope["parsed_body"] = parsed_body
except Exception as e:
verbose_proxy_logger.debug(
"Unexpected error setting request parsed body - {}".format(e)

View file

@ -64,10 +64,10 @@ def log_db_metrics(func):
# in litellm custom callbacks kwargs is passed as arg[0]
# https://docs.litellm.ai/docs/observability/custom_callback#callback-functions
args is not None
and len(args) > 0
and isinstance(args[0], dict)
and len(args) > 1
and isinstance(args[1], dict)
):
passed_kwargs = args[0]
passed_kwargs = args[1]
parent_otel_span = _get_parent_otel_span_from_kwargs(
kwargs=passed_kwargs
)

View file

@ -1,25 +1,91 @@
import asyncio
import traceback
from typing import Optional, Union, cast
from datetime import datetime
from typing import Any, Optional, Union, cast
import litellm
from litellm._logging import verbose_proxy_logger
from litellm.integrations.custom_logger import CustomLogger
from litellm.litellm_core_utils.core_helpers import (
_get_parent_otel_span_from_kwargs,
get_litellm_metadata_from_kwargs,
)
from litellm.litellm_core_utils.litellm_logging import StandardLoggingPayloadSetup
from litellm.proxy._types import UserAPIKeyAuth
from litellm.proxy.auth.auth_checks import log_db_metrics
from litellm.types.utils import StandardLoggingPayload
from litellm.types.utils import (
StandardLoggingPayload,
StandardLoggingUserAPIKeyMetadata,
)
from litellm.utils import get_end_user_id_for_cost_tracking
@log_db_metrics
async def _PROXY_track_cost_callback(
class _ProxyDBLogger(CustomLogger):
async def async_log_success_event(self, kwargs, response_obj, start_time, end_time):
await self._PROXY_track_cost_callback(
kwargs, response_obj, start_time, end_time
)
async def async_post_call_failure_hook(
self,
request_data: dict,
original_exception: Exception,
user_api_key_dict: UserAPIKeyAuth,
):
from litellm.proxy.proxy_server import update_database
_metadata = dict(
StandardLoggingUserAPIKeyMetadata(
user_api_key_hash=user_api_key_dict.api_key,
user_api_key_alias=user_api_key_dict.key_alias,
user_api_key_user_email=user_api_key_dict.user_email,
user_api_key_user_id=user_api_key_dict.user_id,
user_api_key_team_id=user_api_key_dict.team_id,
user_api_key_org_id=user_api_key_dict.org_id,
user_api_key_team_alias=user_api_key_dict.team_alias,
user_api_key_end_user_id=user_api_key_dict.end_user_id,
)
)
_metadata["user_api_key"] = user_api_key_dict.api_key
_metadata["status"] = "failure"
_metadata["error_information"] = (
StandardLoggingPayloadSetup.get_error_information(
original_exception=original_exception,
)
)
existing_metadata: dict = request_data.get("metadata", None) or {}
existing_metadata.update(_metadata)
if "litellm_params" not in request_data:
request_data["litellm_params"] = {}
request_data["litellm_params"]["proxy_server_request"] = (
request_data.get("proxy_server_request") or {}
)
request_data["litellm_params"]["metadata"] = existing_metadata
await update_database(
token=user_api_key_dict.api_key,
response_cost=0.0,
user_id=user_api_key_dict.user_id,
end_user_id=user_api_key_dict.end_user_id,
team_id=user_api_key_dict.team_id,
kwargs=request_data,
completion_response=original_exception,
start_time=datetime.now(),
end_time=datetime.now(),
org_id=user_api_key_dict.org_id,
)
@log_db_metrics
async def _PROXY_track_cost_callback(
self,
kwargs, # kwargs to completion
completion_response: litellm.ModelResponse, # response from completion
completion_response: Optional[
Union[litellm.ModelResponse, Any]
], # response from completion
start_time=None,
end_time=None, # start/end time for completion
):
):
from litellm.proxy.proxy_server import (
prisma_client,
proxy_logging_obj,
@ -132,7 +198,9 @@ async def _PROXY_track_cost_callback(
failing_model=model,
)
)
verbose_proxy_logger.exception("Error in tracking cost callback - %s", str(e))
verbose_proxy_logger.exception(
"Error in tracking cost callback - %s", str(e)
)
def _should_track_cost_callback(

View file

@ -17,6 +17,7 @@ from litellm.proxy._types import (
TeamCallbackMetadata,
UserAPIKeyAuth,
)
from litellm.proxy.auth.route_checks import RouteChecks
from litellm.router import Router
from litellm.types.llms.anthropic import ANTHROPIC_API_HEADERS
from litellm.types.services import ServiceTypes
@ -59,7 +60,7 @@ def _get_metadata_variable_name(request: Request) -> str:
For ALL other endpoints we call this "metadata
"""
if "thread" in request.url.path or "assistant" in request.url.path:
if RouteChecks._is_assistants_api_request(request):
return "litellm_metadata"
if "batches" in request.url.path:
return "litellm_metadata"

View file

@ -127,7 +127,7 @@ async def new_user(
- user_role: Optional[str] - Specify a user role - "proxy_admin", "proxy_admin_viewer", "internal_user", "internal_user_viewer", "team", "customer". Info about each role here: `https://github.com/BerriAI/litellm/litellm/proxy/_types.py#L20`
- max_budget: Optional[float] - Specify max budget for a given user.
- budget_duration: Optional[str] - Budget is reset at the end of specified duration. If not set, budget is never reset. You can set duration as seconds ("30s"), minutes ("30m"), hours ("30h"), days ("30d"), months ("1mo").
- models: Optional[list] - Model_name's a user is allowed to call. (if empty, key is allowed to call all models)
- models: Optional[list] - Model_name's a user is allowed to call. (if empty, key is allowed to call all models). Set to ['no-default-models'] to block all model access. Restricting user to only team-based model access.
- tpm_limit: Optional[int] - Specify tpm limit for a given user (Tokens per minute)
- rpm_limit: Optional[int] - Specify rpm limit for a given user (Requests per minute)
- auto_create_key: bool - Default=True. Flag used for returning a key as part of the /user/new response
@ -753,6 +753,9 @@ async def get_users(
role: Optional[str] = fastapi.Query(
default=None, description="Filter users by role"
),
user_ids: Optional[str] = fastapi.Query(
default=None, description="Get list of users by user_ids"
),
page: int = fastapi.Query(default=1, ge=1, description="Page number"),
page_size: int = fastapi.Query(
default=25, ge=1, le=100, description="Number of items per page"
@ -770,12 +773,19 @@ async def get_users(
- proxy_admin_viewer
- internal_user
- internal_user_viewer
user_ids: Optional[str]
Get list of users by user_ids. Comma separated list of user_ids.
page: int
The page number to return
page_size: int
The number of items per page
Currently - admin-only endpoint.
Example curl:
```
http://0.0.0.0:4000/user/list?user_ids=default_user_id,693c1a4a-1cc0-4c7c-afe8-b5d2c8d52e17
```
"""
from litellm.proxy.proxy_server import prisma_client
@ -787,49 +797,69 @@ async def get_users(
# Calculate skip and take for pagination
skip = (page - 1) * page_size
take = page_size
# Prepare the query conditions
where_clause = ""
# Build where conditions based on provided parameters
where_conditions: Dict[str, Any] = {}
if role:
where_clause = f"""WHERE "user_role" = '{role}'"""
where_conditions["user_role"] = {
"contains": role,
"mode": "insensitive", # Case-insensitive search
}
# Single optimized SQL query that gets both users and total count
sql_query = f"""
WITH total_users AS (
SELECT COUNT(*) AS total_number_internal_users
FROM "LiteLLM_UserTable"
),
paginated_users AS (
SELECT
u.*,
(
SELECT COUNT(*)
FROM "LiteLLM_VerificationToken" vt
WHERE vt."user_id" = u."user_id"
) AS key_count
FROM "LiteLLM_UserTable" u
{where_clause}
LIMIT {take} OFFSET {skip}
if user_ids and isinstance(user_ids, str):
user_id_list = [uid.strip() for uid in user_ids.split(",") if uid.strip()]
where_conditions["user_id"] = {
"in": user_id_list, # Now passing a list of strings as required by Prisma
}
users: Optional[List[LiteLLM_UserTable]] = (
await prisma_client.db.litellm_usertable.find_many(
where=where_conditions,
skip=skip,
take=page_size,
order={"created_at": "desc"},
)
)
SELECT
(SELECT total_number_internal_users FROM total_users),
*
FROM paginated_users;
"""
# Execute the query
results = await prisma_client.db.query_raw(sql_query)
# Get total count from the first row (if results exist)
total_count = 0
if len(results) > 0:
total_count = results[0].get("total_number_internal_users")
# Get total count of user rows
total_count = await prisma_client.db.litellm_usertable.count(
where=where_conditions # type: ignore
)
# Get key count for each user
if users is not None:
user_keys = await prisma_client.db.litellm_verificationtoken.group_by(
by=["user_id"],
count={"user_id": True},
where={"user_id": {"in": [user.user_id for user in users]}},
)
user_key_counts = {
item["user_id"]: item["_count"]["user_id"] for item in user_keys
}
else:
user_key_counts = {}
verbose_proxy_logger.debug(f"Total count of users: {total_count}")
# Calculate total pages
total_pages = -(-total_count // page_size) # Ceiling division
# Prepare response
user_list: List[LiteLLM_UserTableWithKeyCount] = []
if users is not None:
for user in users:
user_list.append(
LiteLLM_UserTableWithKeyCount(
**user.model_dump(), key_count=user_key_counts.get(user.user_id, 0)
)
) # Return full key object
else:
user_list = []
return {
"users": results,
"users": user_list,
"total": total_count,
"page": page,
"page_size": page_size,

View file

@ -518,6 +518,10 @@ async def generate_key_fn( # noqa: PLR0915
if "budget_duration" in data_json:
data_json["key_budget_duration"] = data_json.pop("budget_duration", None)
if user_api_key_dict.user_id is not None:
data_json["created_by"] = user_api_key_dict.user_id
data_json["updated_by"] = user_api_key_dict.user_id
# Set tags on the new key
if "tags" in data_json:
from litellm.proxy.proxy_server import premium_user
@ -1122,6 +1126,8 @@ async def generate_key_helper_fn( # noqa: PLR0915
organization_id: Optional[str] = None,
table_name: Optional[Literal["key", "user"]] = None,
send_invite_email: Optional[bool] = None,
created_by: Optional[str] = None,
updated_by: Optional[str] = None,
):
from litellm.proxy.proxy_server import (
litellm_proxy_budget_name,
@ -1225,6 +1231,8 @@ async def generate_key_helper_fn( # noqa: PLR0915
"model_max_budget": model_max_budget_json,
"budget_id": budget_id,
"blocked": blocked,
"created_by": created_by,
"updated_by": updated_by,
}
if (

View file

@ -14,6 +14,7 @@ from fastapi import APIRouter, Depends, HTTPException, Request, Response
import litellm
from litellm.constants import BEDROCK_AGENT_RUNTIME_PASS_THROUGH_ROUTES
from litellm.proxy._types import *
from litellm.proxy.auth.route_checks import RouteChecks
from litellm.proxy.auth.user_api_key_auth import user_api_key_auth
from litellm.proxy.pass_through_endpoints.pass_through_endpoints import (
create_pass_through_route,
@ -397,7 +398,7 @@ async def azure_proxy_route(
)
# Add or update query parameters
azure_api_key = passthrough_endpoint_router.get_credentials(
custom_llm_provider="azure",
custom_llm_provider=litellm.LlmProviders.AZURE.value,
region_name=None,
)
if azure_api_key is None:
@ -405,13 +406,14 @@ async def azure_proxy_route(
"Required 'AZURE_API_KEY' in environment to make pass-through calls to Azure."
)
return await _base_openai_pass_through_handler(
return await BaseOpenAIPassThroughHandler._base_openai_pass_through_handler(
endpoint=endpoint,
request=request,
fastapi_response=fastapi_response,
user_api_key_dict=user_api_key_dict,
base_target_url=base_target_url,
api_key=azure_api_key,
custom_llm_provider=litellm.LlmProviders.AZURE,
)
@ -431,10 +433,10 @@ async def openai_proxy_route(
"""
base_target_url = "https://api.openai.com"
base_target_url = "https://api.openai.com/"
# Add or update query parameters
openai_api_key = passthrough_endpoint_router.get_credentials(
custom_llm_provider="openai",
custom_llm_provider=litellm.LlmProviders.OPENAI.value,
region_name=None,
)
if openai_api_key is None:
@ -442,33 +444,40 @@ async def openai_proxy_route(
"Required 'OPENAI_API_KEY' in environment to make pass-through calls to OpenAI."
)
return await _base_openai_pass_through_handler(
return await BaseOpenAIPassThroughHandler._base_openai_pass_through_handler(
endpoint=endpoint,
request=request,
fastapi_response=fastapi_response,
user_api_key_dict=user_api_key_dict,
base_target_url=base_target_url,
api_key=openai_api_key,
custom_llm_provider=litellm.LlmProviders.OPENAI,
)
async def _base_openai_pass_through_handler(
class BaseOpenAIPassThroughHandler:
@staticmethod
async def _base_openai_pass_through_handler(
endpoint: str,
request: Request,
fastapi_response: Response,
user_api_key_dict: UserAPIKeyAuth,
base_target_url: str,
api_key: str,
):
custom_llm_provider: litellm.LlmProviders,
):
encoded_endpoint = httpx.URL(endpoint).path
# Ensure endpoint starts with '/' for proper URL construction
if not encoded_endpoint.startswith("/"):
encoded_endpoint = "/" + encoded_endpoint
# Construct the full target URL using httpx
# Construct the full target URL by properly joining the base URL and endpoint path
base_url = httpx.URL(base_target_url)
updated_url = base_url.copy_with(path=encoded_endpoint)
updated_url = BaseOpenAIPassThroughHandler._join_url_paths(
base_url=base_url,
path=encoded_endpoint,
custom_llm_provider=custom_llm_provider,
)
## check for streaming
is_streaming_request = False
@ -479,10 +488,9 @@ async def _base_openai_pass_through_handler(
endpoint_func = create_pass_through_route(
endpoint=endpoint,
target=str(updated_url),
custom_headers={
"authorization": "Bearer {}".format(api_key),
"api-key": "{}".format(api_key),
},
custom_headers=BaseOpenAIPassThroughHandler._assemble_headers(
api_key=api_key, request=request
),
) # dynamically construct pass-through endpoint based on incoming path
received_value = await endpoint_func(
request,
@ -493,3 +501,56 @@ async def _base_openai_pass_through_handler(
)
return received_value
@staticmethod
def _append_openai_beta_header(headers: dict, request: Request) -> dict:
"""
Appends the OpenAI-Beta header to the headers if the request is an OpenAI Assistants API request
"""
if (
RouteChecks._is_assistants_api_request(request) is True
and "OpenAI-Beta" not in headers
):
headers["OpenAI-Beta"] = "assistants=v2"
return headers
@staticmethod
def _assemble_headers(api_key: str, request: Request) -> dict:
base_headers = {
"authorization": "Bearer {}".format(api_key),
"api-key": "{}".format(api_key),
}
return BaseOpenAIPassThroughHandler._append_openai_beta_header(
headers=base_headers,
request=request,
)
@staticmethod
def _join_url_paths(
base_url: httpx.URL, path: str, custom_llm_provider: litellm.LlmProviders
) -> str:
"""
Properly joins a base URL with a path, preserving any existing path in the base URL.
"""
# Join paths correctly by removing trailing/leading slashes as needed
if not base_url.path or base_url.path == "/":
# If base URL has no path, just use the new path
joined_path_str = str(base_url.copy_with(path=path))
else:
# Otherwise, combine the paths
base_path = base_url.path.rstrip("/")
clean_path = path.lstrip("/")
full_path = f"{base_path}/{clean_path}"
joined_path_str = str(base_url.copy_with(path=full_path))
# Apply OpenAI-specific path handling for both branches
if (
custom_llm_provider == litellm.LlmProviders.OPENAI
and "/v1/" not in joined_path_str
):
# Insert v1 after api.openai.com for OpenAI requests
joined_path_str = joined_path_str.replace(
"api.openai.com/", "api.openai.com/v1/"
)
return joined_path_str

Some files were not shown because too many files have changed in this diff Show more