mirror of
https://github.com/BerriAI/litellm.git
synced 2025-04-26 11:14:04 +00:00
Merge branch 'main' into litellm_fix_team_model_access_checks
This commit is contained in:
commit
f85d5afd58
213 changed files with 7650 additions and 1600 deletions
|
@ -699,6 +699,7 @@ jobs:
|
||||||
pip install "pytest-cov==5.0.0"
|
pip install "pytest-cov==5.0.0"
|
||||||
pip install "pytest-asyncio==0.21.1"
|
pip install "pytest-asyncio==0.21.1"
|
||||||
pip install "respx==0.21.1"
|
pip install "respx==0.21.1"
|
||||||
|
pip install "hypercorn==0.17.3"
|
||||||
# Run pytest and generate JUnit XML report
|
# Run pytest and generate JUnit XML report
|
||||||
- run:
|
- run:
|
||||||
name: Run tests
|
name: Run tests
|
||||||
|
@ -1981,11 +1982,44 @@ jobs:
|
||||||
- run:
|
- run:
|
||||||
name: Wait for app to be ready
|
name: Wait for app to be ready
|
||||||
command: dockerize -wait http://localhost:4000 -timeout 5m
|
command: dockerize -wait http://localhost:4000 -timeout 5m
|
||||||
|
# Add Ruby installation and testing before the existing Node.js and Python tests
|
||||||
|
- run:
|
||||||
|
name: Install Ruby and Bundler
|
||||||
|
command: |
|
||||||
|
# Import GPG keys first
|
||||||
|
gpg --keyserver hkp://keyserver.ubuntu.com --recv-keys 409B6B1796C275462A1703113804BB82D39DC0E3 7D2BAF1CF37B13E2069D6956105BD0E739499BDB || {
|
||||||
|
curl -sSL https://rvm.io/mpapis.asc | gpg --import -
|
||||||
|
curl -sSL https://rvm.io/pkuczynski.asc | gpg --import -
|
||||||
|
}
|
||||||
|
|
||||||
|
# Install Ruby version manager (RVM)
|
||||||
|
curl -sSL https://get.rvm.io | bash -s stable
|
||||||
|
|
||||||
|
# Source RVM from the correct location
|
||||||
|
source $HOME/.rvm/scripts/rvm
|
||||||
|
|
||||||
|
# Install Ruby 3.2.2
|
||||||
|
rvm install 3.2.2
|
||||||
|
rvm use 3.2.2 --default
|
||||||
|
|
||||||
|
# Install latest Bundler
|
||||||
|
gem install bundler
|
||||||
|
|
||||||
|
- run:
|
||||||
|
name: Run Ruby tests
|
||||||
|
command: |
|
||||||
|
source $HOME/.rvm/scripts/rvm
|
||||||
|
cd tests/pass_through_tests/ruby_passthrough_tests
|
||||||
|
bundle install
|
||||||
|
bundle exec rspec
|
||||||
|
no_output_timeout: 30m
|
||||||
# New steps to run Node.js test
|
# New steps to run Node.js test
|
||||||
- run:
|
- run:
|
||||||
name: Install Node.js
|
name: Install Node.js
|
||||||
command: |
|
command: |
|
||||||
|
export DEBIAN_FRONTEND=noninteractive
|
||||||
curl -fsSL https://deb.nodesource.com/setup_18.x | sudo -E bash -
|
curl -fsSL https://deb.nodesource.com/setup_18.x | sudo -E bash -
|
||||||
|
sudo apt-get update
|
||||||
sudo apt-get install -y nodejs
|
sudo apt-get install -y nodejs
|
||||||
node --version
|
node --version
|
||||||
npm --version
|
npm --version
|
||||||
|
|
30
.github/workflows/interpret_load_test.py
vendored
30
.github/workflows/interpret_load_test.py
vendored
|
@ -54,27 +54,29 @@ def interpret_results(csv_file):
|
||||||
|
|
||||||
def _get_docker_run_command_stable_release(release_version):
|
def _get_docker_run_command_stable_release(release_version):
|
||||||
return f"""
|
return f"""
|
||||||
\n\n
|
\n\n
|
||||||
## Docker Run LiteLLM Proxy
|
## Docker Run LiteLLM Proxy
|
||||||
|
|
||||||
```
|
```
|
||||||
docker run \\
|
docker run \\
|
||||||
-e STORE_MODEL_IN_DB=True \\
|
-e STORE_MODEL_IN_DB=True \\
|
||||||
-p 4000:4000 \\
|
-p 4000:4000 \\
|
||||||
ghcr.io/berriai/litellm_stable_release_branch-{release_version}
|
ghcr.io/berriai/litellm:litellm_stable_release_branch-{release_version}
|
||||||
|
```
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
|
||||||
def _get_docker_run_command(release_version):
|
def _get_docker_run_command(release_version):
|
||||||
return f"""
|
return f"""
|
||||||
\n\n
|
\n\n
|
||||||
## Docker Run LiteLLM Proxy
|
## Docker Run LiteLLM Proxy
|
||||||
|
|
||||||
```
|
```
|
||||||
docker run \\
|
docker run \\
|
||||||
-e STORE_MODEL_IN_DB=True \\
|
-e STORE_MODEL_IN_DB=True \\
|
||||||
-p 4000:4000 \\
|
-p 4000:4000 \\
|
||||||
ghcr.io/berriai/litellm:main-{release_version}
|
ghcr.io/berriai/litellm:main-{release_version}
|
||||||
|
```
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
|
||||||
|
|
2
.github/workflows/locustfile.py
vendored
2
.github/workflows/locustfile.py
vendored
|
@ -8,7 +8,7 @@ class MyUser(HttpUser):
|
||||||
def chat_completion(self):
|
def chat_completion(self):
|
||||||
headers = {
|
headers = {
|
||||||
"Content-Type": "application/json",
|
"Content-Type": "application/json",
|
||||||
"Authorization": "Bearer sk-ZoHqrLIs2-5PzJrqBaviAA",
|
"Authorization": "Bearer sk-8N1tLOOyH8TIxwOLahhIVg",
|
||||||
# Include any additional headers you may need for authentication, etc.
|
# Include any additional headers you may need for authentication, etc.
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
2
.gitignore
vendored
2
.gitignore
vendored
|
@ -77,3 +77,5 @@ litellm/proxy/_experimental/out/404.html
|
||||||
litellm/proxy/_experimental/out/model_hub.html
|
litellm/proxy/_experimental/out/model_hub.html
|
||||||
.mypy_cache/*
|
.mypy_cache/*
|
||||||
litellm/proxy/application.log
|
litellm/proxy/application.log
|
||||||
|
tests/llm_translation/vertex_test_account.json
|
||||||
|
tests/llm_translation/test_vertex_key.json
|
||||||
|
|
|
@ -40,7 +40,7 @@ LiteLLM manages:
|
||||||
[**Jump to LiteLLM Proxy (LLM Gateway) Docs**](https://github.com/BerriAI/litellm?tab=readme-ov-file#openai-proxy---docs) <br>
|
[**Jump to LiteLLM Proxy (LLM Gateway) Docs**](https://github.com/BerriAI/litellm?tab=readme-ov-file#openai-proxy---docs) <br>
|
||||||
[**Jump to Supported LLM Providers**](https://github.com/BerriAI/litellm?tab=readme-ov-file#supported-providers-docs)
|
[**Jump to Supported LLM Providers**](https://github.com/BerriAI/litellm?tab=readme-ov-file#supported-providers-docs)
|
||||||
|
|
||||||
🚨 **Stable Release:** Use docker images with the `-stable` tag. These have undergone 12 hour load tests, before being published.
|
🚨 **Stable Release:** Use docker images with the `-stable` tag. These have undergone 12 hour load tests, before being published. [More information about the release cycle here](https://docs.litellm.ai/docs/proxy/release_cycle)
|
||||||
|
|
||||||
Support for more providers. Missing a provider or LLM Platform, raise a [feature request](https://github.com/BerriAI/litellm/issues/new?assignees=&labels=enhancement&projects=&template=feature_request.yml&title=%5BFeature%5D%3A+).
|
Support for more providers. Missing a provider or LLM Platform, raise a [feature request](https://github.com/BerriAI/litellm/issues/new?assignees=&labels=enhancement&projects=&template=feature_request.yml&title=%5BFeature%5D%3A+).
|
||||||
|
|
||||||
|
|
|
@ -18,7 +18,7 @@ type: application
|
||||||
# This is the chart version. This version number should be incremented each time you make changes
|
# This is the chart version. This version number should be incremented each time you make changes
|
||||||
# to the chart and its templates, including the app version.
|
# to the chart and its templates, including the app version.
|
||||||
# Versions are expected to follow Semantic Versioning (https://semver.org/)
|
# Versions are expected to follow Semantic Versioning (https://semver.org/)
|
||||||
version: 0.3.0
|
version: 0.4.1
|
||||||
|
|
||||||
# This is the version number of the application being deployed. This version number should be
|
# This is the version number of the application being deployed. This version number should be
|
||||||
# incremented each time you make changes to the application. Versions are not expected to
|
# incremented each time you make changes to the application. Versions are not expected to
|
||||||
|
|
|
@ -48,6 +48,23 @@ spec:
|
||||||
{{- end }}
|
{{- end }}
|
||||||
- name: DISABLE_SCHEMA_UPDATE
|
- name: DISABLE_SCHEMA_UPDATE
|
||||||
value: "false" # always run the migration from the Helm PreSync hook, override the value set
|
value: "false" # always run the migration from the Helm PreSync hook, override the value set
|
||||||
|
{{- with .Values.volumeMounts }}
|
||||||
|
volumeMounts:
|
||||||
|
{{- toYaml . | nindent 12 }}
|
||||||
|
{{- end }}
|
||||||
|
{{- with .Values.volumes }}
|
||||||
|
volumes:
|
||||||
|
{{- toYaml . | nindent 8 }}
|
||||||
|
{{- end }}
|
||||||
restartPolicy: OnFailure
|
restartPolicy: OnFailure
|
||||||
|
{{- with .Values.affinity }}
|
||||||
|
affinity:
|
||||||
|
{{- toYaml . | nindent 8 }}
|
||||||
|
{{- end }}
|
||||||
|
{{- with .Values.tolerations }}
|
||||||
|
tolerations:
|
||||||
|
{{- toYaml . | nindent 8 }}
|
||||||
|
{{- end }}
|
||||||
|
ttlSecondsAfterFinished: {{ .Values.migrationJob.ttlSecondsAfterFinished }}
|
||||||
backoffLimit: {{ .Values.migrationJob.backoffLimit }}
|
backoffLimit: {{ .Values.migrationJob.backoffLimit }}
|
||||||
{{- end }}
|
{{- end }}
|
||||||
|
|
|
@ -187,6 +187,7 @@ migrationJob:
|
||||||
backoffLimit: 4 # Backoff limit for Job restarts
|
backoffLimit: 4 # Backoff limit for Job restarts
|
||||||
disableSchemaUpdate: false # Skip schema migrations for specific environments. When True, the job will exit with code 0.
|
disableSchemaUpdate: false # Skip schema migrations for specific environments. When True, the job will exit with code 0.
|
||||||
annotations: {}
|
annotations: {}
|
||||||
|
ttlSecondsAfterFinished: 120
|
||||||
|
|
||||||
# Additional environment variables to be added to the deployment
|
# Additional environment variables to be added to the deployment
|
||||||
envVars: {
|
envVars: {
|
||||||
|
|
95
docs/my-website/docs/pass_through/openai_passthrough.md
Normal file
95
docs/my-website/docs/pass_through/openai_passthrough.md
Normal file
|
@ -0,0 +1,95 @@
|
||||||
|
# OpenAI Passthrough
|
||||||
|
|
||||||
|
Pass-through endpoints for `/openai`
|
||||||
|
|
||||||
|
## Overview
|
||||||
|
|
||||||
|
| Feature | Supported | Notes |
|
||||||
|
|-------|-------|-------|
|
||||||
|
| Cost Tracking | ❌ | Not supported |
|
||||||
|
| Logging | ✅ | Works across all integrations |
|
||||||
|
| Streaming | ✅ | Fully supported |
|
||||||
|
|
||||||
|
### When to use this?
|
||||||
|
|
||||||
|
- For 90% of your use cases, you should use the [native LiteLLM OpenAI Integration](https://docs.litellm.ai/docs/providers/openai) (`/chat/completions`, `/embeddings`, `/completions`, `/images`, `/batches`, etc.)
|
||||||
|
- Use this passthrough to call less popular or newer OpenAI endpoints that LiteLLM doesn't fully support yet, such as `/assistants`, `/threads`, `/vector_stores`
|
||||||
|
|
||||||
|
Simply replace `https://api.openai.com` with `LITELLM_PROXY_BASE_URL/openai`
|
||||||
|
|
||||||
|
## Usage Examples
|
||||||
|
|
||||||
|
### Assistants API
|
||||||
|
|
||||||
|
#### Create OpenAI Client
|
||||||
|
|
||||||
|
Make sure you do the following:
|
||||||
|
- Point `base_url` to your `LITELLM_PROXY_BASE_URL/openai`
|
||||||
|
- Use your `LITELLM_API_KEY` as the `api_key`
|
||||||
|
|
||||||
|
```python
|
||||||
|
import openai
|
||||||
|
|
||||||
|
client = openai.OpenAI(
|
||||||
|
base_url="http://0.0.0.0:4000/openai", # <your-proxy-url>/openai
|
||||||
|
api_key="sk-anything" # <your-proxy-api-key>
|
||||||
|
)
|
||||||
|
```
|
||||||
|
|
||||||
|
#### Create an Assistant
|
||||||
|
|
||||||
|
```python
|
||||||
|
# Create an assistant
|
||||||
|
assistant = client.beta.assistants.create(
|
||||||
|
name="Math Tutor",
|
||||||
|
instructions="You are a math tutor. Help solve equations.",
|
||||||
|
model="gpt-4o",
|
||||||
|
)
|
||||||
|
```
|
||||||
|
|
||||||
|
#### Create a Thread
|
||||||
|
```python
|
||||||
|
# Create a thread
|
||||||
|
thread = client.beta.threads.create()
|
||||||
|
```
|
||||||
|
|
||||||
|
#### Add a Message to the Thread
|
||||||
|
```python
|
||||||
|
# Add a message
|
||||||
|
message = client.beta.threads.messages.create(
|
||||||
|
thread_id=thread.id,
|
||||||
|
role="user",
|
||||||
|
content="Solve 3x + 11 = 14",
|
||||||
|
)
|
||||||
|
```
|
||||||
|
|
||||||
|
#### Run the Assistant
|
||||||
|
```python
|
||||||
|
# Create a run to get the assistant's response
|
||||||
|
run = client.beta.threads.runs.create(
|
||||||
|
thread_id=thread.id,
|
||||||
|
assistant_id=assistant.id,
|
||||||
|
)
|
||||||
|
|
||||||
|
# Check run status
|
||||||
|
run_status = client.beta.threads.runs.retrieve(
|
||||||
|
thread_id=thread.id,
|
||||||
|
run_id=run.id
|
||||||
|
)
|
||||||
|
```
|
||||||
|
|
||||||
|
#### Retrieve Messages
|
||||||
|
```python
|
||||||
|
# List messages after the run completes
|
||||||
|
messages = client.beta.threads.messages.list(
|
||||||
|
thread_id=thread.id
|
||||||
|
)
|
||||||
|
```
|
||||||
|
|
||||||
|
#### Delete the Assistant
|
||||||
|
|
||||||
|
```python
|
||||||
|
# Delete the assistant when done
|
||||||
|
client.beta.assistants.delete(assistant.id)
|
||||||
|
```
|
||||||
|
|
|
@ -377,6 +377,121 @@ print(f"\nResponse: {resp}")
|
||||||
```
|
```
|
||||||
|
|
||||||
|
|
||||||
|
## Usage - 'thinking' / 'reasoning content'
|
||||||
|
|
||||||
|
This is currently only supported for Anthropic's Claude 3.7 Sonnet + Deepseek R1.
|
||||||
|
|
||||||
|
Works on v1.61.20+.
|
||||||
|
|
||||||
|
Returns 2 new fields in `message` and `delta` object:
|
||||||
|
- `reasoning_content` - string - The reasoning content of the response
|
||||||
|
- `thinking_blocks` - list of objects (Anthropic only) - The thinking blocks of the response
|
||||||
|
|
||||||
|
Each object has the following fields:
|
||||||
|
- `type` - Literal["thinking"] - The type of thinking block
|
||||||
|
- `thinking` - string - The thinking of the response. Also returned in `reasoning_content`
|
||||||
|
- `signature_delta` - string - A base64 encoded string, returned by Anthropic.
|
||||||
|
|
||||||
|
The `signature_delta` is required by Anthropic on subsequent calls, if 'thinking' content is passed in (only required to use `thinking` with tool calling). [Learn more](https://docs.anthropic.com/en/docs/build-with-claude/extended-thinking#understanding-thinking-blocks)
|
||||||
|
|
||||||
|
<Tabs>
|
||||||
|
<TabItem value="sdk" label="SDK">
|
||||||
|
|
||||||
|
```python
|
||||||
|
from litellm import completion
|
||||||
|
|
||||||
|
# set env
|
||||||
|
os.environ["AWS_ACCESS_KEY_ID"] = ""
|
||||||
|
os.environ["AWS_SECRET_ACCESS_KEY"] = ""
|
||||||
|
os.environ["AWS_REGION_NAME"] = ""
|
||||||
|
|
||||||
|
|
||||||
|
resp = completion(
|
||||||
|
model="bedrock/us.anthropic.claude-3-7-sonnet-20250219-v1:0",
|
||||||
|
messages=[{"role": "user", "content": "What is the capital of France?"}],
|
||||||
|
thinking={"type": "enabled", "budget_tokens": 1024},
|
||||||
|
)
|
||||||
|
|
||||||
|
print(resp)
|
||||||
|
```
|
||||||
|
</TabItem>
|
||||||
|
<TabItem value="proxy" label="PROXY">
|
||||||
|
|
||||||
|
1. Setup config.yaml
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
model_list:
|
||||||
|
- model_name: bedrock-claude-3-7
|
||||||
|
litellm_params:
|
||||||
|
model: bedrock/us.anthropic.claude-3-7-sonnet-20250219-v1:0
|
||||||
|
thinking: {"type": "enabled", "budget_tokens": 1024} # 👈 EITHER HERE OR ON REQUEST
|
||||||
|
```
|
||||||
|
|
||||||
|
2. Start proxy
|
||||||
|
|
||||||
|
```bash
|
||||||
|
litellm --config /path/to/config.yaml
|
||||||
|
```
|
||||||
|
|
||||||
|
3. Test it!
|
||||||
|
|
||||||
|
```bash
|
||||||
|
curl http://0.0.0.0:4000/v1/chat/completions \
|
||||||
|
-H "Content-Type: application/json" \
|
||||||
|
-H "Authorization: Bearer <YOUR-LITELLM-KEY>" \
|
||||||
|
-d '{
|
||||||
|
"model": "bedrock-claude-3-7",
|
||||||
|
"messages": [{"role": "user", "content": "What is the capital of France?"}],
|
||||||
|
"thinking": {"type": "enabled", "budget_tokens": 1024} # 👈 EITHER HERE OR ON CONFIG.YAML
|
||||||
|
}'
|
||||||
|
```
|
||||||
|
|
||||||
|
</TabItem>
|
||||||
|
</Tabs>
|
||||||
|
|
||||||
|
|
||||||
|
**Expected Response**
|
||||||
|
|
||||||
|
Same as [Anthropic API response](../providers/anthropic#usage---thinking--reasoning_content).
|
||||||
|
|
||||||
|
```python
|
||||||
|
{
|
||||||
|
"id": "chatcmpl-c661dfd7-7530-49c9-b0cc-d5018ba4727d",
|
||||||
|
"created": 1740640366,
|
||||||
|
"model": "us.anthropic.claude-3-7-sonnet-20250219-v1:0",
|
||||||
|
"object": "chat.completion",
|
||||||
|
"system_fingerprint": null,
|
||||||
|
"choices": [
|
||||||
|
{
|
||||||
|
"finish_reason": "stop",
|
||||||
|
"index": 0,
|
||||||
|
"message": {
|
||||||
|
"content": "The capital of France is Paris. It's not only the capital city but also the largest city in France, serving as the country's major cultural, economic, and political center.",
|
||||||
|
"role": "assistant",
|
||||||
|
"tool_calls": null,
|
||||||
|
"function_call": null,
|
||||||
|
"reasoning_content": "The capital of France is Paris. This is a straightforward factual question.",
|
||||||
|
"thinking_blocks": [
|
||||||
|
{
|
||||||
|
"type": "thinking",
|
||||||
|
"thinking": "The capital of France is Paris. This is a straightforward factual question.",
|
||||||
|
"signature_delta": "EqoBCkgIARABGAIiQL2UoU0b1OHYi+yCHpBY7U6FQW8/FcoLewocJQPa2HnmLM+NECy50y44F/kD4SULFXi57buI9fAvyBwtyjlOiO0SDE3+r3spdg6PLOo9PBoMma2ku5OTAoR46j9VIjDRlvNmBvff7YW4WI9oU8XagaOBSxLPxElrhyuxppEn7m6bfT40dqBSTDrfiw4FYB4qEPETTI6TA6wtjGAAqmFqKTo="
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"usage": {
|
||||||
|
"completion_tokens": 64,
|
||||||
|
"prompt_tokens": 42,
|
||||||
|
"total_tokens": 106,
|
||||||
|
"completion_tokens_details": null,
|
||||||
|
"prompt_tokens_details": null
|
||||||
|
}
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
|
||||||
## Usage - Bedrock Guardrails
|
## Usage - Bedrock Guardrails
|
||||||
|
|
||||||
Example of using [Bedrock Guardrails with LiteLLM](https://docs.aws.amazon.com/bedrock/latest/userguide/guardrails-use-converse-api.html)
|
Example of using [Bedrock Guardrails with LiteLLM](https://docs.aws.amazon.com/bedrock/latest/userguide/guardrails-use-converse-api.html)
|
||||||
|
|
|
@ -23,14 +23,16 @@ import os
|
||||||
|
|
||||||
os.environ['CEREBRAS_API_KEY'] = ""
|
os.environ['CEREBRAS_API_KEY'] = ""
|
||||||
response = completion(
|
response = completion(
|
||||||
model="cerebras/meta/llama3-70b-instruct",
|
model="cerebras/llama3-70b-instruct",
|
||||||
messages=[
|
messages=[
|
||||||
{
|
{
|
||||||
"role": "user",
|
"role": "user",
|
||||||
"content": "What's the weather like in Boston today in Fahrenheit?",
|
"content": "What's the weather like in Boston today in Fahrenheit? (Write in JSON)",
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
max_tokens=10,
|
max_tokens=10,
|
||||||
|
|
||||||
|
# The prompt should include JSON if 'json_object' is selected; otherwise, you will get error code 400.
|
||||||
response_format={ "type": "json_object" },
|
response_format={ "type": "json_object" },
|
||||||
seed=123,
|
seed=123,
|
||||||
stop=["\n\n"],
|
stop=["\n\n"],
|
||||||
|
@ -50,16 +52,18 @@ import os
|
||||||
|
|
||||||
os.environ['CEREBRAS_API_KEY'] = ""
|
os.environ['CEREBRAS_API_KEY'] = ""
|
||||||
response = completion(
|
response = completion(
|
||||||
model="cerebras/meta/llama3-70b-instruct",
|
model="cerebras/llama3-70b-instruct",
|
||||||
messages=[
|
messages=[
|
||||||
{
|
{
|
||||||
"role": "user",
|
"role": "user",
|
||||||
"content": "What's the weather like in Boston today in Fahrenheit?",
|
"content": "What's the weather like in Boston today in Fahrenheit? (Write in JSON)",
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
stream=True,
|
stream=True,
|
||||||
max_tokens=10,
|
max_tokens=10,
|
||||||
response_format={ "type": "json_object" },
|
|
||||||
|
# The prompt should include JSON if 'json_object' is selected; otherwise, you will get error code 400.
|
||||||
|
response_format={ "type": "json_object" },
|
||||||
seed=123,
|
seed=123,
|
||||||
stop=["\n\n"],
|
stop=["\n\n"],
|
||||||
temperature=0.2,
|
temperature=0.2,
|
||||||
|
|
|
@ -1,3 +1,6 @@
|
||||||
|
import Tabs from '@theme/Tabs';
|
||||||
|
import TabItem from '@theme/TabItem';
|
||||||
|
|
||||||
# Infinity
|
# Infinity
|
||||||
|
|
||||||
| Property | Details |
|
| Property | Details |
|
||||||
|
@ -12,6 +15,9 @@
|
||||||
|
|
||||||
```python
|
```python
|
||||||
from litellm import rerank
|
from litellm import rerank
|
||||||
|
import os
|
||||||
|
|
||||||
|
os.environ["INFINITY_API_BASE"] = "http://localhost:8080"
|
||||||
|
|
||||||
response = rerank(
|
response = rerank(
|
||||||
model="infinity/rerank",
|
model="infinity/rerank",
|
||||||
|
@ -65,3 +71,114 @@ curl http://0.0.0.0:4000/rerank \
|
||||||
```
|
```
|
||||||
|
|
||||||
|
|
||||||
|
## Supported Cohere Rerank API Params
|
||||||
|
|
||||||
|
| Param | Type | Description |
|
||||||
|
|-------|-------|-------|
|
||||||
|
| `query` | `str` | The query to rerank the documents against |
|
||||||
|
| `documents` | `list[str]` | The documents to rerank |
|
||||||
|
| `top_n` | `int` | The number of documents to return |
|
||||||
|
| `return_documents` | `bool` | Whether to return the documents in the response |
|
||||||
|
|
||||||
|
### Usage - Return Documents
|
||||||
|
|
||||||
|
<Tabs>
|
||||||
|
<TabItem value="sdk" label="SDK">
|
||||||
|
|
||||||
|
```python
|
||||||
|
response = rerank(
|
||||||
|
model="infinity/rerank",
|
||||||
|
query="What is the capital of France?",
|
||||||
|
documents=["Paris", "London", "Berlin", "Madrid"],
|
||||||
|
return_documents=True,
|
||||||
|
)
|
||||||
|
```
|
||||||
|
|
||||||
|
</TabItem>
|
||||||
|
|
||||||
|
<TabItem value="proxy" label="PROXY">
|
||||||
|
|
||||||
|
```bash
|
||||||
|
curl http://0.0.0.0:4000/rerank \
|
||||||
|
-H "Authorization: Bearer sk-1234" \
|
||||||
|
-H "Content-Type: application/json" \
|
||||||
|
-d '{
|
||||||
|
"model": "custom-infinity-rerank",
|
||||||
|
"query": "What is the capital of France?",
|
||||||
|
"documents": [
|
||||||
|
"Paris",
|
||||||
|
"London",
|
||||||
|
"Berlin",
|
||||||
|
"Madrid"
|
||||||
|
],
|
||||||
|
"return_documents": True,
|
||||||
|
}'
|
||||||
|
```
|
||||||
|
|
||||||
|
</TabItem>
|
||||||
|
</Tabs>
|
||||||
|
|
||||||
|
## Pass Provider-specific Params
|
||||||
|
|
||||||
|
Any unmapped params will be passed to the provider as-is.
|
||||||
|
|
||||||
|
<Tabs>
|
||||||
|
<TabItem value="sdk" label="SDK">
|
||||||
|
|
||||||
|
```python
|
||||||
|
from litellm import rerank
|
||||||
|
import os
|
||||||
|
|
||||||
|
os.environ["INFINITY_API_BASE"] = "http://localhost:8080"
|
||||||
|
|
||||||
|
response = rerank(
|
||||||
|
model="infinity/rerank",
|
||||||
|
query="What is the capital of France?",
|
||||||
|
documents=["Paris", "London", "Berlin", "Madrid"],
|
||||||
|
raw_scores=True, # 👈 PROVIDER-SPECIFIC PARAM
|
||||||
|
)
|
||||||
|
```
|
||||||
|
</TabItem>
|
||||||
|
|
||||||
|
<TabItem value="proxy" label="PROXY">
|
||||||
|
|
||||||
|
1. Setup config.yaml
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
model_list:
|
||||||
|
- model_name: custom-infinity-rerank
|
||||||
|
litellm_params:
|
||||||
|
model: infinity/rerank
|
||||||
|
api_base: https://localhost:8080
|
||||||
|
raw_scores: True # 👈 EITHER SET PROVIDER-SPECIFIC PARAMS HERE OR IN REQUEST BODY
|
||||||
|
```
|
||||||
|
|
||||||
|
2. Start litellm
|
||||||
|
|
||||||
|
```bash
|
||||||
|
litellm --config /path/to/config.yaml
|
||||||
|
|
||||||
|
# RUNNING on http://0.0.0.0:4000
|
||||||
|
```
|
||||||
|
|
||||||
|
3. Test it!
|
||||||
|
|
||||||
|
```bash
|
||||||
|
curl http://0.0.0.0:4000/rerank \
|
||||||
|
-H "Authorization: Bearer sk-1234" \
|
||||||
|
-H "Content-Type: application/json" \
|
||||||
|
-d '{
|
||||||
|
"model": "custom-infinity-rerank",
|
||||||
|
"query": "What is the capital of the United States?",
|
||||||
|
"documents": [
|
||||||
|
"Carson City is the capital city of the American state of Nevada.",
|
||||||
|
"The Commonwealth of the Northern Mariana Islands is a group of islands in the Pacific Ocean. Its capital is Saipan.",
|
||||||
|
"Washington, D.C. is the capital of the United States.",
|
||||||
|
"Capital punishment has existed in the United States since before it was a country."
|
||||||
|
],
|
||||||
|
"raw_scores": True # 👈 PROVIDER-SPECIFIC PARAM
|
||||||
|
}'
|
||||||
|
```
|
||||||
|
</TabItem>
|
||||||
|
|
||||||
|
</Tabs>
|
||||||
|
|
|
@ -3,13 +3,15 @@ import TabItem from '@theme/TabItem';
|
||||||
|
|
||||||
# LiteLLM Proxy (LLM Gateway)
|
# LiteLLM Proxy (LLM Gateway)
|
||||||
|
|
||||||
:::tip
|
|
||||||
|
|
||||||
[LiteLLM Providers a **self hosted** proxy server (AI Gateway)](../simple_proxy) to call all the LLMs in the OpenAI format
|
| Property | Details |
|
||||||
|
|-------|-------|
|
||||||
|
| Description | LiteLLM Proxy is an OpenAI-compatible gateway that allows you to interact with multiple LLM providers through a unified API. Simply use the `litellm_proxy/` prefix before the model name to route your requests through the proxy. |
|
||||||
|
| Provider Route on LiteLLM | `litellm_proxy/` (add this prefix to the model name, to route any requests to litellm_proxy - e.g. `litellm_proxy/your-model-name`) |
|
||||||
|
| Setup LiteLLM Gateway | [LiteLLM Gateway ↗](../simple_proxy) |
|
||||||
|
| Supported Endpoints |`/chat/completions`, `/completions`, `/embeddings`, `/audio/speech`, `/audio/transcriptions`, `/images`, `/rerank` |
|
||||||
|
|
||||||
:::
|
|
||||||
|
|
||||||
**[LiteLLM Proxy](../simple_proxy) is OpenAI compatible**, you just need the `litellm_proxy/` prefix before the model
|
|
||||||
|
|
||||||
## Required Variables
|
## Required Variables
|
||||||
|
|
||||||
|
@ -83,7 +85,76 @@ for chunk in response:
|
||||||
print(chunk)
|
print(chunk)
|
||||||
```
|
```
|
||||||
|
|
||||||
|
## Embeddings
|
||||||
|
|
||||||
|
```python
|
||||||
|
import litellm
|
||||||
|
|
||||||
|
response = litellm.embedding(
|
||||||
|
model="litellm_proxy/your-embedding-model",
|
||||||
|
input="Hello world",
|
||||||
|
api_base="your-litellm-proxy-url",
|
||||||
|
api_key="your-litellm-proxy-api-key"
|
||||||
|
)
|
||||||
|
```
|
||||||
|
|
||||||
|
## Image Generation
|
||||||
|
|
||||||
|
```python
|
||||||
|
import litellm
|
||||||
|
|
||||||
|
response = litellm.image_generation(
|
||||||
|
model="litellm_proxy/dall-e-3",
|
||||||
|
prompt="A beautiful sunset over mountains",
|
||||||
|
api_base="your-litellm-proxy-url",
|
||||||
|
api_key="your-litellm-proxy-api-key"
|
||||||
|
)
|
||||||
|
```
|
||||||
|
|
||||||
|
## Audio Transcription
|
||||||
|
|
||||||
|
```python
|
||||||
|
import litellm
|
||||||
|
|
||||||
|
response = litellm.transcription(
|
||||||
|
model="litellm_proxy/whisper-1",
|
||||||
|
file="your-audio-file",
|
||||||
|
api_base="your-litellm-proxy-url",
|
||||||
|
api_key="your-litellm-proxy-api-key"
|
||||||
|
)
|
||||||
|
```
|
||||||
|
|
||||||
|
## Text to Speech
|
||||||
|
|
||||||
|
```python
|
||||||
|
import litellm
|
||||||
|
|
||||||
|
response = litellm.speech(
|
||||||
|
model="litellm_proxy/tts-1",
|
||||||
|
input="Hello world",
|
||||||
|
api_base="your-litellm-proxy-url",
|
||||||
|
api_key="your-litellm-proxy-api-key"
|
||||||
|
)
|
||||||
|
```
|
||||||
|
|
||||||
|
## Rerank
|
||||||
|
|
||||||
|
```python
|
||||||
|
import litellm
|
||||||
|
|
||||||
|
import litellm
|
||||||
|
|
||||||
|
response = litellm.rerank(
|
||||||
|
model="litellm_proxy/rerank-english-v2.0",
|
||||||
|
query="What is machine learning?",
|
||||||
|
documents=[
|
||||||
|
"Machine learning is a field of study in artificial intelligence",
|
||||||
|
"Biology is the study of living organisms"
|
||||||
|
],
|
||||||
|
api_base="your-litellm-proxy-url",
|
||||||
|
api_key="your-litellm-proxy-api-key"
|
||||||
|
)
|
||||||
|
```
|
||||||
## **Usage with Langchain, LLamaindex, OpenAI Js, Anthropic SDK, Instructor**
|
## **Usage with Langchain, LLamaindex, OpenAI Js, Anthropic SDK, Instructor**
|
||||||
|
|
||||||
#### [Follow this doc to see how to use litellm proxy with langchain, llamaindex, anthropic etc](../proxy/user_keys)
|
#### [Follow this doc to see how to use litellm proxy with langchain, llamaindex, anthropic etc](../proxy/user_keys)
|
|
@ -2,11 +2,11 @@ import Tabs from '@theme/Tabs';
|
||||||
import TabItem from '@theme/TabItem';
|
import TabItem from '@theme/TabItem';
|
||||||
|
|
||||||
# Sambanova
|
# Sambanova
|
||||||
https://community.sambanova.ai/t/create-chat-completion-api/
|
https://cloud.sambanova.ai/
|
||||||
|
|
||||||
:::tip
|
:::tip
|
||||||
|
|
||||||
**We support ALL Sambanova models, just set `model=sambanova/<any-model-on-sambanova>` as a prefix when sending litellm requests. For the complete supported model list, visit https://sambanova.ai/technology/models **
|
**We support ALL Sambanova models, just set `model=sambanova/<any-model-on-sambanova>` as a prefix when sending litellm requests. For the complete supported model list, visit https://docs.sambanova.ai/cloud/docs/get-started/supported-models **
|
||||||
|
|
||||||
:::
|
:::
|
||||||
|
|
||||||
|
@ -27,12 +27,11 @@ response = completion(
|
||||||
messages=[
|
messages=[
|
||||||
{
|
{
|
||||||
"role": "user",
|
"role": "user",
|
||||||
"content": "What do you know about sambanova.ai",
|
"content": "What do you know about sambanova.ai. Give your response in json format",
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
max_tokens=10,
|
max_tokens=10,
|
||||||
response_format={ "type": "json_object" },
|
response_format={ "type": "json_object" },
|
||||||
seed=123,
|
|
||||||
stop=["\n\n"],
|
stop=["\n\n"],
|
||||||
temperature=0.2,
|
temperature=0.2,
|
||||||
top_p=0.9,
|
top_p=0.9,
|
||||||
|
@ -54,13 +53,12 @@ response = completion(
|
||||||
messages=[
|
messages=[
|
||||||
{
|
{
|
||||||
"role": "user",
|
"role": "user",
|
||||||
"content": "What do you know about sambanova.ai",
|
"content": "What do you know about sambanova.ai. Give your response in json format",
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
stream=True,
|
stream=True,
|
||||||
max_tokens=10,
|
max_tokens=10,
|
||||||
response_format={ "type": "json_object" },
|
response_format={ "type": "json_object" },
|
||||||
seed=123,
|
|
||||||
stop=["\n\n"],
|
stop=["\n\n"],
|
||||||
temperature=0.2,
|
temperature=0.2,
|
||||||
top_p=0.9,
|
top_p=0.9,
|
||||||
|
|
|
@ -852,6 +852,7 @@ litellm.vertex_location = "us-central1 # Your Location
|
||||||
| claude-3-5-sonnet@20240620 | `completion('vertex_ai/claude-3-5-sonnet@20240620', messages)` |
|
| claude-3-5-sonnet@20240620 | `completion('vertex_ai/claude-3-5-sonnet@20240620', messages)` |
|
||||||
| claude-3-sonnet@20240229 | `completion('vertex_ai/claude-3-sonnet@20240229', messages)` |
|
| claude-3-sonnet@20240229 | `completion('vertex_ai/claude-3-sonnet@20240229', messages)` |
|
||||||
| claude-3-haiku@20240307 | `completion('vertex_ai/claude-3-haiku@20240307', messages)` |
|
| claude-3-haiku@20240307 | `completion('vertex_ai/claude-3-haiku@20240307', messages)` |
|
||||||
|
| claude-3-7-sonnet@20250219 | `completion('vertex_ai/claude-3-7-sonnet@20250219', messages)` |
|
||||||
|
|
||||||
### Usage
|
### Usage
|
||||||
|
|
||||||
|
@ -926,6 +927,119 @@ curl --location 'http://0.0.0.0:4000/chat/completions' \
|
||||||
</Tabs>
|
</Tabs>
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
### Usage - `thinking` / `reasoning_content`
|
||||||
|
|
||||||
|
|
||||||
|
<Tabs>
|
||||||
|
<TabItem value="sdk" label="SDK">
|
||||||
|
|
||||||
|
```python
|
||||||
|
from litellm import completion
|
||||||
|
|
||||||
|
resp = completion(
|
||||||
|
model="vertex_ai/claude-3-7-sonnet-20250219",
|
||||||
|
messages=[{"role": "user", "content": "What is the capital of France?"}],
|
||||||
|
thinking={"type": "enabled", "budget_tokens": 1024},
|
||||||
|
)
|
||||||
|
|
||||||
|
```
|
||||||
|
|
||||||
|
</TabItem>
|
||||||
|
|
||||||
|
<TabItem value="proxy" label="PROXY">
|
||||||
|
|
||||||
|
1. Setup config.yaml
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
- model_name: claude-3-7-sonnet-20250219
|
||||||
|
litellm_params:
|
||||||
|
model: vertex_ai/claude-3-7-sonnet-20250219
|
||||||
|
vertex_ai_project: "my-test-project"
|
||||||
|
vertex_ai_location: "us-west-1"
|
||||||
|
```
|
||||||
|
|
||||||
|
2. Start proxy
|
||||||
|
|
||||||
|
```bash
|
||||||
|
litellm --config /path/to/config.yaml
|
||||||
|
```
|
||||||
|
|
||||||
|
3. Test it!
|
||||||
|
|
||||||
|
```bash
|
||||||
|
curl http://0.0.0.0:4000/v1/chat/completions \
|
||||||
|
-H "Content-Type: application/json" \
|
||||||
|
-H "Authorization: Bearer <YOUR-LITELLM-KEY>" \
|
||||||
|
-d '{
|
||||||
|
"model": "claude-3-7-sonnet-20250219",
|
||||||
|
"messages": [{"role": "user", "content": "What is the capital of France?"}],
|
||||||
|
"thinking": {"type": "enabled", "budget_tokens": 1024}
|
||||||
|
}'
|
||||||
|
```
|
||||||
|
|
||||||
|
</TabItem>
|
||||||
|
</Tabs>
|
||||||
|
|
||||||
|
|
||||||
|
**Expected Response**
|
||||||
|
|
||||||
|
```python
|
||||||
|
ModelResponse(
|
||||||
|
id='chatcmpl-c542d76d-f675-4e87-8e5f-05855f5d0f5e',
|
||||||
|
created=1740470510,
|
||||||
|
model='claude-3-7-sonnet-20250219',
|
||||||
|
object='chat.completion',
|
||||||
|
system_fingerprint=None,
|
||||||
|
choices=[
|
||||||
|
Choices(
|
||||||
|
finish_reason='stop',
|
||||||
|
index=0,
|
||||||
|
message=Message(
|
||||||
|
content="The capital of France is Paris.",
|
||||||
|
role='assistant',
|
||||||
|
tool_calls=None,
|
||||||
|
function_call=None,
|
||||||
|
provider_specific_fields={
|
||||||
|
'citations': None,
|
||||||
|
'thinking_blocks': [
|
||||||
|
{
|
||||||
|
'type': 'thinking',
|
||||||
|
'thinking': 'The capital of France is Paris. This is a very straightforward factual question.',
|
||||||
|
'signature': 'EuYBCkQYAiJAy6...'
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
),
|
||||||
|
thinking_blocks=[
|
||||||
|
{
|
||||||
|
'type': 'thinking',
|
||||||
|
'thinking': 'The capital of France is Paris. This is a very straightforward factual question.',
|
||||||
|
'signature': 'EuYBCkQYAiJAy6AGB...'
|
||||||
|
}
|
||||||
|
],
|
||||||
|
reasoning_content='The capital of France is Paris. This is a very straightforward factual question.'
|
||||||
|
)
|
||||||
|
],
|
||||||
|
usage=Usage(
|
||||||
|
completion_tokens=68,
|
||||||
|
prompt_tokens=42,
|
||||||
|
total_tokens=110,
|
||||||
|
completion_tokens_details=None,
|
||||||
|
prompt_tokens_details=PromptTokensDetailsWrapper(
|
||||||
|
audio_tokens=None,
|
||||||
|
cached_tokens=0,
|
||||||
|
text_tokens=None,
|
||||||
|
image_tokens=None
|
||||||
|
),
|
||||||
|
cache_creation_input_tokens=0,
|
||||||
|
cache_read_input_tokens=0
|
||||||
|
)
|
||||||
|
)
|
||||||
|
```
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
## Llama 3 API
|
## Llama 3 API
|
||||||
|
|
||||||
| Model Name | Function Call |
|
| Model Name | Function Call |
|
||||||
|
|
|
@ -157,6 +157,98 @@ curl -L -X POST 'http://0.0.0.0:4000/embeddings' \
|
||||||
</TabItem>
|
</TabItem>
|
||||||
</Tabs>
|
</Tabs>
|
||||||
|
|
||||||
|
## Send Video URL to VLLM
|
||||||
|
|
||||||
|
Example Implementation from VLLM [here](https://github.com/vllm-project/vllm/pull/10020)
|
||||||
|
|
||||||
|
There are two ways to send a video url to VLLM:
|
||||||
|
|
||||||
|
1. Pass the video url directly
|
||||||
|
|
||||||
|
```
|
||||||
|
{"type": "video_url", "video_url": {"url": video_url}},
|
||||||
|
```
|
||||||
|
|
||||||
|
2. Pass the video data as base64
|
||||||
|
|
||||||
|
```
|
||||||
|
{"type": "video_url", "video_url": {"url": f"data:video/mp4;base64,{video_data_base64}"}}
|
||||||
|
```
|
||||||
|
|
||||||
|
<Tabs>
|
||||||
|
<TabItem value="sdk" label="SDK">
|
||||||
|
|
||||||
|
```python
|
||||||
|
from litellm import completion
|
||||||
|
|
||||||
|
response = completion(
|
||||||
|
model="hosted_vllm/qwen", # pass the vllm model name
|
||||||
|
messages=[
|
||||||
|
{
|
||||||
|
"role": "user",
|
||||||
|
"content": [
|
||||||
|
{
|
||||||
|
"type": "text",
|
||||||
|
"text": "Summarize the following video"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"type": "video_url",
|
||||||
|
"video_url": {
|
||||||
|
"url": "https://www.youtube.com/watch?v=dQw4w9WgXcQ"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
api_base="https://hosted-vllm-api.co")
|
||||||
|
|
||||||
|
print(response)
|
||||||
|
```
|
||||||
|
|
||||||
|
</TabItem>
|
||||||
|
<TabItem value="proxy" label="PROXY">
|
||||||
|
|
||||||
|
1. Setup config.yaml
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
model_list:
|
||||||
|
- model_name: my-model
|
||||||
|
litellm_params:
|
||||||
|
model: hosted_vllm/qwen # add hosted_vllm/ prefix to route as OpenAI provider
|
||||||
|
api_base: https://hosted-vllm-api.co # add api base for OpenAI compatible provider
|
||||||
|
```
|
||||||
|
|
||||||
|
2. Start the proxy
|
||||||
|
|
||||||
|
```bash
|
||||||
|
$ litellm --config /path/to/config.yaml
|
||||||
|
|
||||||
|
# RUNNING on http://0.0.0.0:4000
|
||||||
|
```
|
||||||
|
|
||||||
|
3. Test it!
|
||||||
|
|
||||||
|
```bash
|
||||||
|
curl -X POST http://0.0.0.0:4000/chat/completions \
|
||||||
|
-H "Authorization: Bearer sk-1234" \
|
||||||
|
-H "Content-Type: application/json" \
|
||||||
|
-d '{
|
||||||
|
"model": "my-model",
|
||||||
|
"messages": [
|
||||||
|
{"role": "user", "content":
|
||||||
|
[
|
||||||
|
{"type": "text", "text": "Summarize the following video"},
|
||||||
|
{"type": "video_url", "video_url": {"url": "https://www.youtube.com/watch?v=dQw4w9WgXcQ"}}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}'
|
||||||
|
```
|
||||||
|
|
||||||
|
</TabItem>
|
||||||
|
</Tabs>
|
||||||
|
|
||||||
|
|
||||||
## (Deprecated) for `vllm pip package`
|
## (Deprecated) for `vllm pip package`
|
||||||
### Using - `litellm.completion`
|
### Using - `litellm.completion`
|
||||||
|
|
||||||
|
|
|
@ -36,7 +36,7 @@ import TabItem from '@theme/TabItem';
|
||||||
- Virtual Key Rate Limit
|
- Virtual Key Rate Limit
|
||||||
- User Rate Limit
|
- User Rate Limit
|
||||||
- Team Limit
|
- Team Limit
|
||||||
- The `_PROXY_track_cost_callback` updates spend / usage in the LiteLLM database. [Here is everything tracked in the DB per request](https://github.com/BerriAI/litellm/blob/ba41a72f92a9abf1d659a87ec880e8e319f87481/schema.prisma#L172)
|
- The `_ProxyDBLogger` updates spend / usage in the LiteLLM database. [Here is everything tracked in the DB per request](https://github.com/BerriAI/litellm/blob/ba41a72f92a9abf1d659a87ec880e8e319f87481/schema.prisma#L172)
|
||||||
|
|
||||||
## Frequently Asked Questions
|
## Frequently Asked Questions
|
||||||
|
|
||||||
|
|
12
docs/my-website/docs/proxy/release_cycle.md
Normal file
12
docs/my-website/docs/proxy/release_cycle.md
Normal file
|
@ -0,0 +1,12 @@
|
||||||
|
# Release Cycle
|
||||||
|
|
||||||
|
Litellm Proxy has the following release cycle:
|
||||||
|
|
||||||
|
- `v1.x.x-nightly`: These are releases which pass ci/cd.
|
||||||
|
- `v1.x.x.rc`: These are releases which pass ci/cd + [manual review](https://github.com/BerriAI/litellm/discussions/8495#discussioncomment-12180711).
|
||||||
|
- `v1.x.x`: These are releases which pass ci/cd + manual review + 3 days of production testing.
|
||||||
|
|
||||||
|
In production, we recommend using the latest `v1.x.x` release.
|
||||||
|
|
||||||
|
|
||||||
|
Follow our release notes [here](https://github.com/BerriAI/litellm/releases).
|
357
docs/my-website/docs/reasoning_content.md
Normal file
357
docs/my-website/docs/reasoning_content.md
Normal file
|
@ -0,0 +1,357 @@
|
||||||
|
import Tabs from '@theme/Tabs';
|
||||||
|
import TabItem from '@theme/TabItem';
|
||||||
|
|
||||||
|
# 'Thinking' / 'Reasoning Content'
|
||||||
|
|
||||||
|
Supported Providers:
|
||||||
|
- Deepseek (`deepseek/`)
|
||||||
|
- Anthropic API (`anthropic/`)
|
||||||
|
- Bedrock (Anthropic + Deepseek) (`bedrock/`)
|
||||||
|
- Vertex AI (Anthropic) (`vertexai/`)
|
||||||
|
|
||||||
|
```python
|
||||||
|
"message": {
|
||||||
|
...
|
||||||
|
"reasoning_content": "The capital of France is Paris.",
|
||||||
|
"thinking_blocks": [
|
||||||
|
{
|
||||||
|
"type": "thinking",
|
||||||
|
"thinking": "The capital of France is Paris.",
|
||||||
|
"signature_delta": "EqoBCkgIARABGAIiQL2UoU0b1OHYi+..."
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
## Quick Start
|
||||||
|
|
||||||
|
<Tabs>
|
||||||
|
<TabItem value="sdk" label="SDK">
|
||||||
|
|
||||||
|
```python
|
||||||
|
from litellm import completion
|
||||||
|
import os
|
||||||
|
|
||||||
|
os.environ["ANTHROPIC_API_KEY"] = ""
|
||||||
|
|
||||||
|
response = completion(
|
||||||
|
model="anthropic/claude-3-7-sonnet-20250219",
|
||||||
|
messages=[
|
||||||
|
{"role": "user", "content": "What is the capital of France?"},
|
||||||
|
],
|
||||||
|
thinking={"type": "enabled", "budget_tokens": 1024} # 👈 REQUIRED FOR ANTHROPIC models (on `anthropic/`, `bedrock/`, `vertexai/`)
|
||||||
|
)
|
||||||
|
print(response.choices[0].message.content)
|
||||||
|
```
|
||||||
|
|
||||||
|
</TabItem>
|
||||||
|
<TabItem value="proxy" label="PROXY">
|
||||||
|
|
||||||
|
```bash
|
||||||
|
curl http://0.0.0.0:4000/v1/chat/completions \
|
||||||
|
-H "Content-Type: application/json" \
|
||||||
|
-H "Authorization: Bearer $LITELLM_KEY" \
|
||||||
|
-d '{
|
||||||
|
"model": "anthropic/claude-3-7-sonnet-20250219",
|
||||||
|
"messages": [
|
||||||
|
{
|
||||||
|
"role": "user",
|
||||||
|
"content": "What is the capital of France?"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"thinking": {"type": "enabled", "budget_tokens": 1024}
|
||||||
|
}'
|
||||||
|
```
|
||||||
|
</TabItem>
|
||||||
|
</Tabs>
|
||||||
|
|
||||||
|
**Expected Response**
|
||||||
|
|
||||||
|
```bash
|
||||||
|
{
|
||||||
|
"id": "3b66124d79a708e10c603496b363574c",
|
||||||
|
"choices": [
|
||||||
|
{
|
||||||
|
"finish_reason": "stop",
|
||||||
|
"index": 0,
|
||||||
|
"message": {
|
||||||
|
"content": " won the FIFA World Cup in 2022.",
|
||||||
|
"role": "assistant",
|
||||||
|
"tool_calls": null,
|
||||||
|
"function_call": null
|
||||||
|
}
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"created": 1723323084,
|
||||||
|
"model": "deepseek/deepseek-chat",
|
||||||
|
"object": "chat.completion",
|
||||||
|
"system_fingerprint": "fp_7e0991cad4",
|
||||||
|
"usage": {
|
||||||
|
"completion_tokens": 12,
|
||||||
|
"prompt_tokens": 16,
|
||||||
|
"total_tokens": 28,
|
||||||
|
},
|
||||||
|
"service_tier": null
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
## Tool Calling with `thinking`
|
||||||
|
|
||||||
|
Here's how to use `thinking` blocks by Anthropic with tool calling.
|
||||||
|
|
||||||
|
<Tabs>
|
||||||
|
<TabItem value="sdk" label="SDK">
|
||||||
|
|
||||||
|
```python
|
||||||
|
litellm._turn_on_debug()
|
||||||
|
litellm.modify_params = True
|
||||||
|
model = "anthropic/claude-3-7-sonnet-20250219" # works across Anthropic, Bedrock, Vertex AI
|
||||||
|
# Step 1: send the conversation and available functions to the model
|
||||||
|
messages = [
|
||||||
|
{
|
||||||
|
"role": "user",
|
||||||
|
"content": "What's the weather like in San Francisco, Tokyo, and Paris? - give me 3 responses",
|
||||||
|
}
|
||||||
|
]
|
||||||
|
tools = [
|
||||||
|
{
|
||||||
|
"type": "function",
|
||||||
|
"function": {
|
||||||
|
"name": "get_current_weather",
|
||||||
|
"description": "Get the current weather in a given location",
|
||||||
|
"parameters": {
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"location": {
|
||||||
|
"type": "string",
|
||||||
|
"description": "The city and state",
|
||||||
|
},
|
||||||
|
"unit": {
|
||||||
|
"type": "string",
|
||||||
|
"enum": ["celsius", "fahrenheit"],
|
||||||
|
},
|
||||||
|
},
|
||||||
|
"required": ["location"],
|
||||||
|
},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
]
|
||||||
|
response = litellm.completion(
|
||||||
|
model=model,
|
||||||
|
messages=messages,
|
||||||
|
tools=tools,
|
||||||
|
tool_choice="auto", # auto is default, but we'll be explicit
|
||||||
|
thinking={"type": "enabled", "budget_tokens": 1024},
|
||||||
|
)
|
||||||
|
print("Response\n", response)
|
||||||
|
response_message = response.choices[0].message
|
||||||
|
tool_calls = response_message.tool_calls
|
||||||
|
|
||||||
|
print("Expecting there to be 3 tool calls")
|
||||||
|
assert (
|
||||||
|
len(tool_calls) > 0
|
||||||
|
) # this has to call the function for SF, Tokyo and paris
|
||||||
|
|
||||||
|
# Step 2: check if the model wanted to call a function
|
||||||
|
print(f"tool_calls: {tool_calls}")
|
||||||
|
if tool_calls:
|
||||||
|
# Step 3: call the function
|
||||||
|
# Note: the JSON response may not always be valid; be sure to handle errors
|
||||||
|
available_functions = {
|
||||||
|
"get_current_weather": get_current_weather,
|
||||||
|
} # only one function in this example, but you can have multiple
|
||||||
|
messages.append(
|
||||||
|
response_message
|
||||||
|
) # extend conversation with assistant's reply
|
||||||
|
print("Response message\n", response_message)
|
||||||
|
# Step 4: send the info for each function call and function response to the model
|
||||||
|
for tool_call in tool_calls:
|
||||||
|
function_name = tool_call.function.name
|
||||||
|
if function_name not in available_functions:
|
||||||
|
# the model called a function that does not exist in available_functions - don't try calling anything
|
||||||
|
return
|
||||||
|
function_to_call = available_functions[function_name]
|
||||||
|
function_args = json.loads(tool_call.function.arguments)
|
||||||
|
function_response = function_to_call(
|
||||||
|
location=function_args.get("location"),
|
||||||
|
unit=function_args.get("unit"),
|
||||||
|
)
|
||||||
|
messages.append(
|
||||||
|
{
|
||||||
|
"tool_call_id": tool_call.id,
|
||||||
|
"role": "tool",
|
||||||
|
"name": function_name,
|
||||||
|
"content": function_response,
|
||||||
|
}
|
||||||
|
) # extend conversation with function response
|
||||||
|
print(f"messages: {messages}")
|
||||||
|
second_response = litellm.completion(
|
||||||
|
model=model,
|
||||||
|
messages=messages,
|
||||||
|
seed=22,
|
||||||
|
# tools=tools,
|
||||||
|
drop_params=True,
|
||||||
|
thinking={"type": "enabled", "budget_tokens": 1024},
|
||||||
|
) # get a new response from the model where it can see the function response
|
||||||
|
print("second response\n", second_response)
|
||||||
|
```
|
||||||
|
|
||||||
|
</TabItem>
|
||||||
|
<TabItem value="proxy" label="PROXY">
|
||||||
|
|
||||||
|
1. Setup config.yaml
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
model_list:
|
||||||
|
- model_name: claude-3-7-sonnet-thinking
|
||||||
|
litellm_params:
|
||||||
|
model: anthropic/claude-3-7-sonnet-20250219
|
||||||
|
api_key: os.environ/ANTHROPIC_API_KEY
|
||||||
|
thinking: {
|
||||||
|
"type": "enabled",
|
||||||
|
"budget_tokens": 1024
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
2. Run proxy
|
||||||
|
|
||||||
|
```bash
|
||||||
|
litellm --config config.yaml
|
||||||
|
|
||||||
|
# RUNNING on http://0.0.0.0:4000
|
||||||
|
```
|
||||||
|
|
||||||
|
3. Make 1st call
|
||||||
|
|
||||||
|
```bash
|
||||||
|
curl http://0.0.0.0:4000/v1/chat/completions \
|
||||||
|
-H "Content-Type: application/json" \
|
||||||
|
-H "Authorization: Bearer $LITELLM_KEY" \
|
||||||
|
-d '{
|
||||||
|
"model": "claude-3-7-sonnet-thinking",
|
||||||
|
"messages": [
|
||||||
|
{"role": "user", "content": "What's the weather like in San Francisco, Tokyo, and Paris? - give me 3 responses"},
|
||||||
|
],
|
||||||
|
"tools": [
|
||||||
|
{
|
||||||
|
"type": "function",
|
||||||
|
"function": {
|
||||||
|
"name": "get_current_weather",
|
||||||
|
"description": "Get the current weather in a given location",
|
||||||
|
"parameters": {
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"location": {
|
||||||
|
"type": "string",
|
||||||
|
"description": "The city and state",
|
||||||
|
},
|
||||||
|
"unit": {
|
||||||
|
"type": "string",
|
||||||
|
"enum": ["celsius", "fahrenheit"],
|
||||||
|
},
|
||||||
|
},
|
||||||
|
"required": ["location"],
|
||||||
|
},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"tool_choice": "auto"
|
||||||
|
}'
|
||||||
|
```
|
||||||
|
|
||||||
|
4. Make 2nd call with tool call results
|
||||||
|
|
||||||
|
```bash
|
||||||
|
curl http://0.0.0.0:4000/v1/chat/completions \
|
||||||
|
-H "Content-Type: application/json" \
|
||||||
|
-H "Authorization: Bearer $LITELLM_KEY" \
|
||||||
|
-d '{
|
||||||
|
"model": "claude-3-7-sonnet-thinking",
|
||||||
|
"messages": [
|
||||||
|
{
|
||||||
|
"role": "user",
|
||||||
|
"content": "What\'s the weather like in San Francisco, Tokyo, and Paris? - give me 3 responses"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"role": "assistant",
|
||||||
|
"content": "I\'ll check the current weather for these three cities for you:",
|
||||||
|
"tool_calls": [
|
||||||
|
{
|
||||||
|
"index": 2,
|
||||||
|
"function": {
|
||||||
|
"arguments": "{\"location\": \"San Francisco\"}",
|
||||||
|
"name": "get_current_weather"
|
||||||
|
},
|
||||||
|
"id": "tooluse_mnqzmtWYRjCxUInuAdK7-w",
|
||||||
|
"type": "function"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"function_call": null,
|
||||||
|
"reasoning_content": "The user is asking for the current weather in three different locations: San Francisco, Tokyo, and Paris. I have access to the `get_current_weather` function that can provide this information.\n\nThe function requires a `location` parameter, and has an optional `unit` parameter. The user hasn't specified which unit they prefer (celsius or fahrenheit), so I'll use the default provided by the function.\n\nI need to make three separate function calls, one for each location:\n1. San Francisco\n2. Tokyo\n3. Paris\n\nThen I'll compile the results into a response with three distinct weather reports as requested by the user.",
|
||||||
|
"thinking_blocks": [
|
||||||
|
{
|
||||||
|
"type": "thinking",
|
||||||
|
"thinking": "The user is asking for the current weather in three different locations: San Francisco, Tokyo, and Paris. I have access to the `get_current_weather` function that can provide this information.\n\nThe function requires a `location` parameter, and has an optional `unit` parameter. The user hasn't specified which unit they prefer (celsius or fahrenheit), so I'll use the default provided by the function.\n\nI need to make three separate function calls, one for each location:\n1. San Francisco\n2. Tokyo\n3. Paris\n\nThen I'll compile the results into a response with three distinct weather reports as requested by the user.",
|
||||||
|
"signature_delta": "EqoBCkgIARABGAIiQCkBXENoyB+HstUOs/iGjG+bvDbIQRrxPsPpOSt5yDxX6iulZ/4K/w9Rt4J5Nb2+3XUYsyOH+CpZMfADYvItFR4SDPb7CmzoGKoolCMAJRoM62p1ZRASZhrD3swqIjAVY7vOAFWKZyPEJglfX/60+bJphN9W1wXR6rWrqn3MwUbQ5Mb/pnpeb10HMploRgUqEGKOd6fRKTkUoNDuAnPb55c="
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"provider_specific_fields": {
|
||||||
|
"reasoningContentBlocks": [
|
||||||
|
{
|
||||||
|
"reasoningText": {
|
||||||
|
"signature": "EqoBCkgIARABGAIiQCkBXENoyB+HstUOs/iGjG+bvDbIQRrxPsPpOSt5yDxX6iulZ/4K/w9Rt4J5Nb2+3XUYsyOH+CpZMfADYvItFR4SDPb7CmzoGKoolCMAJRoM62p1ZRASZhrD3swqIjAVY7vOAFWKZyPEJglfX/60+bJphN9W1wXR6rWrqn3MwUbQ5Mb/pnpeb10HMploRgUqEGKOd6fRKTkUoNDuAnPb55c=",
|
||||||
|
"text": "The user is asking for the current weather in three different locations: San Francisco, Tokyo, and Paris. I have access to the `get_current_weather` function that can provide this information.\n\nThe function requires a `location` parameter, and has an optional `unit` parameter. The user hasn't specified which unit they prefer (celsius or fahrenheit), so I'll use the default provided by the function.\n\nI need to make three separate function calls, one for each location:\n1. San Francisco\n2. Tokyo\n3. Paris\n\nThen I'll compile the results into a response with three distinct weather reports as requested by the user."
|
||||||
|
}
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"tool_call_id": "tooluse_mnqzmtWYRjCxUInuAdK7-w",
|
||||||
|
"role": "tool",
|
||||||
|
"name": "get_current_weather",
|
||||||
|
"content": "{\"location\": \"San Francisco\", \"temperature\": \"72\", \"unit\": \"fahrenheit\"}"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}'
|
||||||
|
```
|
||||||
|
|
||||||
|
</TabItem>
|
||||||
|
</Tabs>
|
||||||
|
|
||||||
|
## Switching between Anthropic + Deepseek models
|
||||||
|
|
||||||
|
Set `drop_params=True` to drop the 'thinking' blocks when swapping from Anthropic to Deepseek models. Suggest improvements to this approach [here](https://github.com/BerriAI/litellm/discussions/8927).
|
||||||
|
|
||||||
|
```python
|
||||||
|
litellm.drop_params = True # 👈 EITHER GLOBALLY or per request
|
||||||
|
|
||||||
|
# or per request
|
||||||
|
## Anthropic
|
||||||
|
response = litellm.completion(
|
||||||
|
model="anthropic/claude-3-7-sonnet-20250219",
|
||||||
|
messages=[{"role": "user", "content": "What is the capital of France?"}],
|
||||||
|
thinking={"type": "enabled", "budget_tokens": 1024},
|
||||||
|
drop_params=True,
|
||||||
|
)
|
||||||
|
|
||||||
|
## Deepseek
|
||||||
|
response = litellm.completion(
|
||||||
|
model="deepseek/deepseek-chat",
|
||||||
|
messages=[{"role": "user", "content": "What is the capital of France?"}],
|
||||||
|
thinking={"type": "enabled", "budget_tokens": 1024},
|
||||||
|
drop_params=True,
|
||||||
|
)
|
||||||
|
```
|
||||||
|
|
||||||
|
## Spec
|
||||||
|
|
||||||
|
|
||||||
|
These fields can be accessed via `response.choices[0].message.reasoning_content` and `response.choices[0].message.thinking_blocks`.
|
||||||
|
|
||||||
|
- `reasoning_content` - str: The reasoning content from the model. Returned across all providers.
|
||||||
|
- `thinking_blocks` - Optional[List[Dict[str, str]]]: A list of thinking blocks from the model. Only returned for Anthropic models.
|
||||||
|
- `type` - str: The type of thinking block.
|
||||||
|
- `thinking` - str: The thinking from the model.
|
||||||
|
- `signature_delta` - str: The signature delta from the model.
|
||||||
|
|
|
@ -18,13 +18,6 @@ hide_table_of_contents: false
|
||||||
`alerting`, `prometheus`, `secret management`, `management endpoints`, `ui`, `prompt management`, `finetuning`, `batch`
|
`alerting`, `prometheus`, `secret management`, `management endpoints`, `ui`, `prompt management`, `finetuning`, `batch`
|
||||||
|
|
||||||
|
|
||||||
:::note
|
|
||||||
|
|
||||||
v1.57.8-stable, is currently being tested. It will be released on 2025-01-12.
|
|
||||||
|
|
||||||
:::
|
|
||||||
|
|
||||||
|
|
||||||
## New / Updated Models
|
## New / Updated Models
|
||||||
|
|
||||||
1. Mistral large pricing - https://github.com/BerriAI/litellm/pull/7452
|
1. Mistral large pricing - https://github.com/BerriAI/litellm/pull/7452
|
||||||
|
|
109
docs/my-website/release_notes/v1.61.20-stable/index.md
Normal file
109
docs/my-website/release_notes/v1.61.20-stable/index.md
Normal file
|
@ -0,0 +1,109 @@
|
||||||
|
---
|
||||||
|
title: v1.61.20-stable
|
||||||
|
slug: v1.61.20-stable
|
||||||
|
date: 2025-03-01T10:00:00
|
||||||
|
authors:
|
||||||
|
- name: Krrish Dholakia
|
||||||
|
title: CEO, LiteLLM
|
||||||
|
url: https://www.linkedin.com/in/krish-d/
|
||||||
|
image_url: https://media.licdn.com/dms/image/v2/D4D03AQGrlsJ3aqpHmQ/profile-displayphoto-shrink_400_400/B4DZSAzgP7HYAg-/0/1737327772964?e=1743638400&v=beta&t=39KOXMUFedvukiWWVPHf3qI45fuQD7lNglICwN31DrI
|
||||||
|
- name: Ishaan Jaffer
|
||||||
|
title: CTO, LiteLLM
|
||||||
|
url: https://www.linkedin.com/in/reffajnaahsi/
|
||||||
|
image_url: https://media.licdn.com/dms/image/v2/D4D03AQGiM7ZrUwqu_Q/profile-displayphoto-shrink_800_800/profile-displayphoto-shrink_800_800/0/1675971026692?e=1741824000&v=beta&t=eQnRdXPJo4eiINWTZARoYTfqh064pgZ-E21pQTSy8jc
|
||||||
|
tags: [llm translation, rerank, ui, thinking, reasoning_content, claude-3-7-sonnet]
|
||||||
|
hide_table_of_contents: false
|
||||||
|
---
|
||||||
|
|
||||||
|
import Image from '@theme/IdealImage';
|
||||||
|
|
||||||
|
# v1.61.20-stable
|
||||||
|
|
||||||
|
|
||||||
|
:::info
|
||||||
|
|
||||||
|
`v1.61.20-stable` will be live on 2025-02-04.
|
||||||
|
|
||||||
|
:::
|
||||||
|
|
||||||
|
These are the changes since `v1.61.13-stable`.
|
||||||
|
|
||||||
|
This release is primarily focused on:
|
||||||
|
- LLM Translation improvements (claude-3-7-sonnet + 'thinking'/'reasoning_content' support)
|
||||||
|
- UI improvements (add model flow, user management, etc)
|
||||||
|
|
||||||
|
## Demo Instance
|
||||||
|
|
||||||
|
Here's a Demo Instance to test changes:
|
||||||
|
- Instance: https://demo.litellm.ai/
|
||||||
|
- Login Credentials:
|
||||||
|
- Username: admin
|
||||||
|
- Password: sk-1234
|
||||||
|
|
||||||
|
## New Models / Updated Models
|
||||||
|
|
||||||
|
1. Anthropic 3-7 sonnet support + cost tracking (Anthropic API + Bedrock + Vertex AI + OpenRouter)
|
||||||
|
1. Anthropic API [Start here](https://docs.litellm.ai/docs/providers/anthropic#usage---thinking--reasoning_content)
|
||||||
|
2. Bedrock API [Start here](https://docs.litellm.ai/docs/providers/bedrock#usage---thinking--reasoning-content)
|
||||||
|
3. Vertex AI API [See here](../../docs/providers/vertex#usage---thinking--reasoning_content)
|
||||||
|
4. OpenRouter [See here](https://github.com/BerriAI/litellm/blob/ba5bdce50a0b9bc822de58c03940354f19a733ed/model_prices_and_context_window.json#L5626)
|
||||||
|
2. Gpt-4.5-preview support + cost tracking [See here](https://github.com/BerriAI/litellm/blob/ba5bdce50a0b9bc822de58c03940354f19a733ed/model_prices_and_context_window.json#L79)
|
||||||
|
3. Azure AI - Phi-4 cost tracking [See here](https://github.com/BerriAI/litellm/blob/ba5bdce50a0b9bc822de58c03940354f19a733ed/model_prices_and_context_window.json#L1773)
|
||||||
|
4. Claude-3.5-sonnet - vision support updated on Anthropic API [See here](https://github.com/BerriAI/litellm/blob/ba5bdce50a0b9bc822de58c03940354f19a733ed/model_prices_and_context_window.json#L2888)
|
||||||
|
5. Bedrock llama vision support [See here](https://github.com/BerriAI/litellm/blob/ba5bdce50a0b9bc822de58c03940354f19a733ed/model_prices_and_context_window.json#L7714)
|
||||||
|
6. Cerebras llama3.3-70b pricing [See here](https://github.com/BerriAI/litellm/blob/ba5bdce50a0b9bc822de58c03940354f19a733ed/model_prices_and_context_window.json#L2697)
|
||||||
|
|
||||||
|
## LLM Translation
|
||||||
|
|
||||||
|
1. Infinity Rerank - support returning documents when return_documents=True [Start here](../../docs/providers/infinity#usage---returning-documents)
|
||||||
|
2. Amazon Deepseek - `<think>` param extraction into ‘reasoning_content’ [Start here](https://docs.litellm.ai/docs/providers/bedrock#bedrock-imported-models-deepseek-deepseek-r1)
|
||||||
|
3. Amazon Titan Embeddings - filter out ‘aws_’ params from request body [Start here](https://docs.litellm.ai/docs/providers/bedrock#bedrock-embedding)
|
||||||
|
4. Anthropic ‘thinking’ + ‘reasoning_content’ translation support (Anthropic API, Bedrock, Vertex AI) [Start here](https://docs.litellm.ai/docs/reasoning_content)
|
||||||
|
5. VLLM - support ‘video_url’ [Start here](../../docs/providers/vllm#send-video-url-to-vllm)
|
||||||
|
6. Call proxy via litellm SDK: Support `litellm_proxy/` for embedding, image_generation, transcription, speech, rerank [Start here](https://docs.litellm.ai/docs/providers/litellm_proxy)
|
||||||
|
7. OpenAI Pass-through - allow using Assistants GET, DELETE on /openai pass through routes [Start here](https://docs.litellm.ai/docs/pass_through/openai_passthrough)
|
||||||
|
8. Message Translation - fix openai message for assistant msg if role is missing - openai allows this
|
||||||
|
9. O1/O3 - support ‘drop_params’ for o3-mini and o1 parallel_tool_calls param (not supported currently) [See here](https://docs.litellm.ai/docs/completion/drop_params)
|
||||||
|
|
||||||
|
## Spend Tracking Improvements
|
||||||
|
|
||||||
|
1. Cost tracking for rerank via Bedrock [See PR](https://github.com/BerriAI/litellm/commit/b682dc4ec8fd07acf2f4c981d2721e36ae2a49c5)
|
||||||
|
2. Anthropic pass-through - fix race condition causing cost to not be tracked [See PR](https://github.com/BerriAI/litellm/pull/8874)
|
||||||
|
3. Anthropic pass-through: Ensure accurate token counting [See PR](https://github.com/BerriAI/litellm/pull/8880)
|
||||||
|
|
||||||
|
## Management Endpoints / UI
|
||||||
|
|
||||||
|
1. Models Page - Allow sorting models by ‘created at’
|
||||||
|
2. Models Page - Edit Model Flow Improvements
|
||||||
|
3. Models Page - Fix Adding Azure, Azure AI Studio models on UI
|
||||||
|
4. Internal Users Page - Allow Bulk Adding Internal Users on UI
|
||||||
|
5. Internal Users Page - Allow sorting users by ‘created at’
|
||||||
|
6. Virtual Keys Page - Allow searching for UserIDs on the dropdown when assigning a user to a team [See PR](https://github.com/BerriAI/litellm/pull/8844)
|
||||||
|
7. Virtual Keys Page - allow creating a user when assigning keys to users [See PR](https://github.com/BerriAI/litellm/pull/8844)
|
||||||
|
8. Model Hub Page - fix text overflow issue [See PR](https://github.com/BerriAI/litellm/pull/8749)
|
||||||
|
9. Admin Settings Page - Allow adding MSFT SSO on UI
|
||||||
|
10. Backend - don't allow creating duplicate internal users in DB
|
||||||
|
|
||||||
|
## Helm
|
||||||
|
|
||||||
|
1. support ttlSecondsAfterFinished on the migration job - [See PR](https://github.com/BerriAI/litellm/pull/8593)
|
||||||
|
2. enhance migrations job with additional configurable properties - [See PR](https://github.com/BerriAI/litellm/pull/8636)
|
||||||
|
|
||||||
|
## Logging / Guardrail Integrations
|
||||||
|
|
||||||
|
1. Arize Phoenix support
|
||||||
|
2. ‘No-log’ - fix ‘no-log’ param support on embedding calls
|
||||||
|
|
||||||
|
## Performance / Loadbalancing / Reliability improvements
|
||||||
|
|
||||||
|
1. Single Deployment Cooldown logic - Use allowed_fails or allowed_fail_policy if set [Start here](https://docs.litellm.ai/docs/routing#advanced-custom-retries-cooldowns-based-on-error-type)
|
||||||
|
|
||||||
|
## General Proxy Improvements
|
||||||
|
|
||||||
|
1. Hypercorn - fix reading / parsing request body
|
||||||
|
2. Windows - fix running proxy in windows
|
||||||
|
3. DD-Trace - fix dd-trace enablement on proxy
|
||||||
|
|
||||||
|
## Complete Git Diff
|
||||||
|
|
||||||
|
View the complete git diff [here](https://github.com/BerriAI/litellm/compare/v1.61.13-stable...v1.61.20-stable).
|
|
@ -41,6 +41,7 @@ const sidebars = {
|
||||||
"proxy/deploy",
|
"proxy/deploy",
|
||||||
"proxy/prod",
|
"proxy/prod",
|
||||||
"proxy/cli",
|
"proxy/cli",
|
||||||
|
"proxy/release_cycle",
|
||||||
"proxy/model_management",
|
"proxy/model_management",
|
||||||
"proxy/health",
|
"proxy/health",
|
||||||
"proxy/debugging",
|
"proxy/debugging",
|
||||||
|
@ -242,6 +243,7 @@ const sidebars = {
|
||||||
"completion/document_understanding",
|
"completion/document_understanding",
|
||||||
"completion/vision",
|
"completion/vision",
|
||||||
"completion/json_mode",
|
"completion/json_mode",
|
||||||
|
"reasoning_content",
|
||||||
"completion/prompt_caching",
|
"completion/prompt_caching",
|
||||||
"completion/predict_outputs",
|
"completion/predict_outputs",
|
||||||
"completion/prefix",
|
"completion/prefix",
|
||||||
|
@ -303,6 +305,7 @@ const sidebars = {
|
||||||
"pass_through/vertex_ai",
|
"pass_through/vertex_ai",
|
||||||
"pass_through/google_ai_studio",
|
"pass_through/google_ai_studio",
|
||||||
"pass_through/cohere",
|
"pass_through/cohere",
|
||||||
|
"pass_through/openai_passthrough",
|
||||||
"pass_through/anthropic_completion",
|
"pass_through/anthropic_completion",
|
||||||
"pass_through/bedrock",
|
"pass_through/bedrock",
|
||||||
"pass_through/assembly_ai",
|
"pass_through/assembly_ai",
|
||||||
|
|
|
@ -53,6 +53,7 @@ from litellm.constants import (
|
||||||
cohere_embedding_models,
|
cohere_embedding_models,
|
||||||
bedrock_embedding_models,
|
bedrock_embedding_models,
|
||||||
known_tokenizer_config,
|
known_tokenizer_config,
|
||||||
|
BEDROCK_INVOKE_PROVIDERS_LITERAL,
|
||||||
)
|
)
|
||||||
from litellm.types.guardrails import GuardrailItem
|
from litellm.types.guardrails import GuardrailItem
|
||||||
from litellm.proxy._types import (
|
from litellm.proxy._types import (
|
||||||
|
@ -361,17 +362,7 @@ BEDROCK_CONVERSE_MODELS = [
|
||||||
"meta.llama3-2-11b-instruct-v1:0",
|
"meta.llama3-2-11b-instruct-v1:0",
|
||||||
"meta.llama3-2-90b-instruct-v1:0",
|
"meta.llama3-2-90b-instruct-v1:0",
|
||||||
]
|
]
|
||||||
BEDROCK_INVOKE_PROVIDERS_LITERAL = Literal[
|
|
||||||
"cohere",
|
|
||||||
"anthropic",
|
|
||||||
"mistral",
|
|
||||||
"amazon",
|
|
||||||
"meta",
|
|
||||||
"llama",
|
|
||||||
"ai21",
|
|
||||||
"nova",
|
|
||||||
"deepseek_r1",
|
|
||||||
]
|
|
||||||
####### COMPLETION MODELS ###################
|
####### COMPLETION MODELS ###################
|
||||||
open_ai_chat_completion_models: List = []
|
open_ai_chat_completion_models: List = []
|
||||||
open_ai_text_completion_models: List = []
|
open_ai_text_completion_models: List = []
|
||||||
|
|
|
@ -13,26 +13,14 @@ import json
|
||||||
import time
|
import time
|
||||||
import traceback
|
import traceback
|
||||||
from enum import Enum
|
from enum import Enum
|
||||||
from typing import Any, Dict, List, Optional, Set, Union
|
from typing import Any, Dict, List, Optional, Union
|
||||||
|
|
||||||
from openai.types.audio.transcription_create_params import TranscriptionCreateParams
|
|
||||||
from openai.types.chat.completion_create_params import (
|
|
||||||
CompletionCreateParamsNonStreaming,
|
|
||||||
CompletionCreateParamsStreaming,
|
|
||||||
)
|
|
||||||
from openai.types.completion_create_params import (
|
|
||||||
CompletionCreateParamsNonStreaming as TextCompletionCreateParamsNonStreaming,
|
|
||||||
)
|
|
||||||
from openai.types.completion_create_params import (
|
|
||||||
CompletionCreateParamsStreaming as TextCompletionCreateParamsStreaming,
|
|
||||||
)
|
|
||||||
from openai.types.embedding_create_params import EmbeddingCreateParams
|
|
||||||
from pydantic import BaseModel
|
from pydantic import BaseModel
|
||||||
|
|
||||||
import litellm
|
import litellm
|
||||||
from litellm._logging import verbose_logger
|
from litellm._logging import verbose_logger
|
||||||
|
from litellm.litellm_core_utils.model_param_helper import ModelParamHelper
|
||||||
from litellm.types.caching import *
|
from litellm.types.caching import *
|
||||||
from litellm.types.rerank import RerankRequest
|
|
||||||
from litellm.types.utils import all_litellm_params
|
from litellm.types.utils import all_litellm_params
|
||||||
|
|
||||||
from .base_cache import BaseCache
|
from .base_cache import BaseCache
|
||||||
|
@ -257,7 +245,7 @@ class Cache:
|
||||||
verbose_logger.debug("\nReturning preset cache key: %s", preset_cache_key)
|
verbose_logger.debug("\nReturning preset cache key: %s", preset_cache_key)
|
||||||
return preset_cache_key
|
return preset_cache_key
|
||||||
|
|
||||||
combined_kwargs = self._get_relevant_args_to_use_for_cache_key()
|
combined_kwargs = ModelParamHelper._get_all_llm_api_params()
|
||||||
litellm_param_kwargs = all_litellm_params
|
litellm_param_kwargs = all_litellm_params
|
||||||
for param in kwargs:
|
for param in kwargs:
|
||||||
if param in combined_kwargs:
|
if param in combined_kwargs:
|
||||||
|
@ -364,76 +352,6 @@ class Cache:
|
||||||
if "litellm_params" in kwargs:
|
if "litellm_params" in kwargs:
|
||||||
kwargs["litellm_params"]["preset_cache_key"] = preset_cache_key
|
kwargs["litellm_params"]["preset_cache_key"] = preset_cache_key
|
||||||
|
|
||||||
def _get_relevant_args_to_use_for_cache_key(self) -> Set[str]:
|
|
||||||
"""
|
|
||||||
Gets the supported kwargs for each call type and combines them
|
|
||||||
"""
|
|
||||||
chat_completion_kwargs = self._get_litellm_supported_chat_completion_kwargs()
|
|
||||||
text_completion_kwargs = self._get_litellm_supported_text_completion_kwargs()
|
|
||||||
embedding_kwargs = self._get_litellm_supported_embedding_kwargs()
|
|
||||||
transcription_kwargs = self._get_litellm_supported_transcription_kwargs()
|
|
||||||
rerank_kwargs = self._get_litellm_supported_rerank_kwargs()
|
|
||||||
exclude_kwargs = self._get_kwargs_to_exclude_from_cache_key()
|
|
||||||
|
|
||||||
combined_kwargs = chat_completion_kwargs.union(
|
|
||||||
text_completion_kwargs,
|
|
||||||
embedding_kwargs,
|
|
||||||
transcription_kwargs,
|
|
||||||
rerank_kwargs,
|
|
||||||
)
|
|
||||||
combined_kwargs = combined_kwargs.difference(exclude_kwargs)
|
|
||||||
return combined_kwargs
|
|
||||||
|
|
||||||
def _get_litellm_supported_chat_completion_kwargs(self) -> Set[str]:
|
|
||||||
"""
|
|
||||||
Get the litellm supported chat completion kwargs
|
|
||||||
|
|
||||||
This follows the OpenAI API Spec
|
|
||||||
"""
|
|
||||||
all_chat_completion_kwargs = set(
|
|
||||||
CompletionCreateParamsNonStreaming.__annotations__.keys()
|
|
||||||
).union(set(CompletionCreateParamsStreaming.__annotations__.keys()))
|
|
||||||
return all_chat_completion_kwargs
|
|
||||||
|
|
||||||
def _get_litellm_supported_text_completion_kwargs(self) -> Set[str]:
|
|
||||||
"""
|
|
||||||
Get the litellm supported text completion kwargs
|
|
||||||
|
|
||||||
This follows the OpenAI API Spec
|
|
||||||
"""
|
|
||||||
all_text_completion_kwargs = set(
|
|
||||||
TextCompletionCreateParamsNonStreaming.__annotations__.keys()
|
|
||||||
).union(set(TextCompletionCreateParamsStreaming.__annotations__.keys()))
|
|
||||||
return all_text_completion_kwargs
|
|
||||||
|
|
||||||
def _get_litellm_supported_rerank_kwargs(self) -> Set[str]:
|
|
||||||
"""
|
|
||||||
Get the litellm supported rerank kwargs
|
|
||||||
"""
|
|
||||||
return set(RerankRequest.model_fields.keys())
|
|
||||||
|
|
||||||
def _get_litellm_supported_embedding_kwargs(self) -> Set[str]:
|
|
||||||
"""
|
|
||||||
Get the litellm supported embedding kwargs
|
|
||||||
|
|
||||||
This follows the OpenAI API Spec
|
|
||||||
"""
|
|
||||||
return set(EmbeddingCreateParams.__annotations__.keys())
|
|
||||||
|
|
||||||
def _get_litellm_supported_transcription_kwargs(self) -> Set[str]:
|
|
||||||
"""
|
|
||||||
Get the litellm supported transcription kwargs
|
|
||||||
|
|
||||||
This follows the OpenAI API Spec
|
|
||||||
"""
|
|
||||||
return set(TranscriptionCreateParams.__annotations__.keys())
|
|
||||||
|
|
||||||
def _get_kwargs_to_exclude_from_cache_key(self) -> Set[str]:
|
|
||||||
"""
|
|
||||||
Get the kwargs to exclude from the cache key
|
|
||||||
"""
|
|
||||||
return set(["metadata"])
|
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def _get_hashed_cache_key(cache_key: str) -> str:
|
def _get_hashed_cache_key(cache_key: str) -> str:
|
||||||
"""
|
"""
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
from typing import List
|
from typing import List, Literal
|
||||||
|
|
||||||
ROUTER_MAX_FALLBACKS = 5
|
ROUTER_MAX_FALLBACKS = 5
|
||||||
DEFAULT_BATCH_SIZE = 512
|
DEFAULT_BATCH_SIZE = 512
|
||||||
|
@ -120,6 +120,7 @@ OPENAI_CHAT_COMPLETION_PARAMS = [
|
||||||
"top_logprobs",
|
"top_logprobs",
|
||||||
"reasoning_effort",
|
"reasoning_effort",
|
||||||
"extra_headers",
|
"extra_headers",
|
||||||
|
"thinking",
|
||||||
]
|
]
|
||||||
|
|
||||||
openai_compatible_endpoints: List = [
|
openai_compatible_endpoints: List = [
|
||||||
|
@ -319,6 +320,17 @@ baseten_models: List = [
|
||||||
"31dxrj3",
|
"31dxrj3",
|
||||||
] # FALCON 7B # WizardLM # Mosaic ML
|
] # FALCON 7B # WizardLM # Mosaic ML
|
||||||
|
|
||||||
|
BEDROCK_INVOKE_PROVIDERS_LITERAL = Literal[
|
||||||
|
"cohere",
|
||||||
|
"anthropic",
|
||||||
|
"mistral",
|
||||||
|
"amazon",
|
||||||
|
"meta",
|
||||||
|
"llama",
|
||||||
|
"ai21",
|
||||||
|
"nova",
|
||||||
|
"deepseek_r1",
|
||||||
|
]
|
||||||
|
|
||||||
open_ai_embedding_models: List = ["text-embedding-ada-002"]
|
open_ai_embedding_models: List = ["text-embedding-ada-002"]
|
||||||
cohere_embedding_models: List = [
|
cohere_embedding_models: List = [
|
||||||
|
|
|
@ -577,6 +577,4 @@ class DataDogLogger(
|
||||||
start_time_utc: Optional[datetimeObj],
|
start_time_utc: Optional[datetimeObj],
|
||||||
end_time_utc: Optional[datetimeObj],
|
end_time_utc: Optional[datetimeObj],
|
||||||
) -> Optional[dict]:
|
) -> Optional[dict]:
|
||||||
raise NotImplementedError(
|
pass
|
||||||
"Datdog Integration for getting request/response payloads not implemented as yet"
|
|
||||||
)
|
|
||||||
|
|
|
@ -5,49 +5,69 @@ If the ddtrace package is not installed, the tracer will be a no-op.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
from contextlib import contextmanager
|
from contextlib import contextmanager
|
||||||
|
from typing import TYPE_CHECKING, Any, Union
|
||||||
|
|
||||||
try:
|
from litellm.secret_managers.main import get_secret_bool
|
||||||
from ddtrace import tracer as dd_tracer
|
|
||||||
|
|
||||||
has_ddtrace = True
|
if TYPE_CHECKING:
|
||||||
except ImportError:
|
from ddtrace.tracer import Tracer as DD_TRACER
|
||||||
has_ddtrace = False
|
else:
|
||||||
|
DD_TRACER = Any
|
||||||
|
|
||||||
@contextmanager
|
|
||||||
def null_tracer(name, **kwargs):
|
|
||||||
class NullSpan:
|
|
||||||
def __enter__(self):
|
|
||||||
return self
|
|
||||||
|
|
||||||
def __exit__(self, *args):
|
class NullSpan:
|
||||||
pass
|
"""A no-op span implementation."""
|
||||||
|
|
||||||
def finish(self):
|
def __enter__(self):
|
||||||
pass
|
return self
|
||||||
|
|
||||||
yield NullSpan()
|
def __exit__(self, *args):
|
||||||
|
pass
|
||||||
|
|
||||||
class NullTracer:
|
def finish(self):
|
||||||
def trace(self, name, **kwargs):
|
pass
|
||||||
class NullSpan:
|
|
||||||
def __enter__(self):
|
|
||||||
return self
|
|
||||||
|
|
||||||
def __exit__(self, *args):
|
|
||||||
pass
|
|
||||||
|
|
||||||
def finish(self):
|
@contextmanager
|
||||||
pass
|
def null_tracer(name, **kwargs):
|
||||||
|
"""Context manager that yields a no-op span."""
|
||||||
|
yield NullSpan()
|
||||||
|
|
||||||
return NullSpan()
|
|
||||||
|
|
||||||
def wrap(self, name=None, **kwargs):
|
class NullTracer:
|
||||||
def decorator(f):
|
"""A no-op tracer implementation."""
|
||||||
return f
|
|
||||||
|
|
||||||
return decorator
|
def trace(self, name, **kwargs):
|
||||||
|
return NullSpan()
|
||||||
|
|
||||||
dd_tracer = NullTracer()
|
def wrap(self, name=None, **kwargs):
|
||||||
|
# If called with no arguments (as @tracer.wrap())
|
||||||
|
if callable(name):
|
||||||
|
return name
|
||||||
|
|
||||||
# Export the tracer instance
|
# If called with arguments (as @tracer.wrap(name="something"))
|
||||||
tracer = dd_tracer
|
def decorator(f):
|
||||||
|
return f
|
||||||
|
|
||||||
|
return decorator
|
||||||
|
|
||||||
|
|
||||||
|
def _should_use_dd_tracer():
|
||||||
|
"""Returns True if `USE_DDTRACE` is set to True in .env"""
|
||||||
|
return get_secret_bool("USE_DDTRACE", False) is True
|
||||||
|
|
||||||
|
|
||||||
|
# Initialize tracer
|
||||||
|
should_use_dd_tracer = _should_use_dd_tracer()
|
||||||
|
tracer: Union[NullTracer, DD_TRACER] = NullTracer()
|
||||||
|
# We need to ensure tracer is never None and always has the required methods
|
||||||
|
if should_use_dd_tracer:
|
||||||
|
try:
|
||||||
|
from ddtrace import tracer as dd_tracer
|
||||||
|
|
||||||
|
# Define the type to match what's expected by the code using this module
|
||||||
|
tracer = dd_tracer
|
||||||
|
except ImportError:
|
||||||
|
tracer = NullTracer()
|
||||||
|
else:
|
||||||
|
tracer = NullTracer()
|
||||||
|
|
|
@ -278,6 +278,7 @@ def exception_type( # type: ignore # noqa: PLR0915
|
||||||
"This model's maximum context length is" in error_str
|
"This model's maximum context length is" in error_str
|
||||||
or "string too long. Expected a string with maximum length"
|
or "string too long. Expected a string with maximum length"
|
||||||
in error_str
|
in error_str
|
||||||
|
or "model's maximum context limit" in error_str
|
||||||
):
|
):
|
||||||
exception_mapping_worked = True
|
exception_mapping_worked = True
|
||||||
raise ContextWindowExceededError(
|
raise ContextWindowExceededError(
|
||||||
|
@ -692,6 +693,13 @@ def exception_type( # type: ignore # noqa: PLR0915
|
||||||
response=getattr(original_exception, "response", None),
|
response=getattr(original_exception, "response", None),
|
||||||
litellm_debug_info=extra_information,
|
litellm_debug_info=extra_information,
|
||||||
)
|
)
|
||||||
|
elif "model's maximum context limit" in error_str:
|
||||||
|
exception_mapping_worked = True
|
||||||
|
raise ContextWindowExceededError(
|
||||||
|
message=f"{custom_llm_provider}Exception: Context Window Error - {error_str}",
|
||||||
|
model=model,
|
||||||
|
llm_provider=custom_llm_provider,
|
||||||
|
)
|
||||||
elif "token_quota_reached" in error_str:
|
elif "token_quota_reached" in error_str:
|
||||||
exception_mapping_worked = True
|
exception_mapping_worked = True
|
||||||
raise RateLimitError(
|
raise RateLimitError(
|
||||||
|
|
|
@ -75,7 +75,7 @@ def get_litellm_params(
|
||||||
"model_info": model_info,
|
"model_info": model_info,
|
||||||
"proxy_server_request": proxy_server_request,
|
"proxy_server_request": proxy_server_request,
|
||||||
"preset_cache_key": preset_cache_key,
|
"preset_cache_key": preset_cache_key,
|
||||||
"no-log": no_log,
|
"no-log": no_log or kwargs.get("no-log"),
|
||||||
"stream_response": {}, # litellm_call_id: ModelResponse Dict
|
"stream_response": {}, # litellm_call_id: ModelResponse Dict
|
||||||
"input_cost_per_token": input_cost_per_token,
|
"input_cost_per_token": input_cost_per_token,
|
||||||
"input_cost_per_second": input_cost_per_second,
|
"input_cost_per_second": input_cost_per_second,
|
||||||
|
|
|
@ -3,7 +3,6 @@
|
||||||
# Logging function -> log the exact model details + what's being sent | Non-Blocking
|
# Logging function -> log the exact model details + what's being sent | Non-Blocking
|
||||||
import copy
|
import copy
|
||||||
import datetime
|
import datetime
|
||||||
from functools import lru_cache
|
|
||||||
import json
|
import json
|
||||||
import os
|
import os
|
||||||
import re
|
import re
|
||||||
|
@ -13,6 +12,7 @@ import time
|
||||||
import traceback
|
import traceback
|
||||||
import uuid
|
import uuid
|
||||||
from datetime import datetime as dt_object
|
from datetime import datetime as dt_object
|
||||||
|
from functools import lru_cache
|
||||||
from typing import Any, Callable, Dict, List, Literal, Optional, Tuple, Union, cast
|
from typing import Any, Callable, Dict, List, Literal, Optional, Tuple, Union, cast
|
||||||
|
|
||||||
from pydantic import BaseModel
|
from pydantic import BaseModel
|
||||||
|
@ -33,6 +33,7 @@ from litellm.integrations.custom_logger import CustomLogger
|
||||||
from litellm.integrations.mlflow import MlflowLogger
|
from litellm.integrations.mlflow import MlflowLogger
|
||||||
from litellm.integrations.pagerduty.pagerduty import PagerDutyAlerting
|
from litellm.integrations.pagerduty.pagerduty import PagerDutyAlerting
|
||||||
from litellm.litellm_core_utils.get_litellm_params import get_litellm_params
|
from litellm.litellm_core_utils.get_litellm_params import get_litellm_params
|
||||||
|
from litellm.litellm_core_utils.model_param_helper import ModelParamHelper
|
||||||
from litellm.litellm_core_utils.redact_messages import (
|
from litellm.litellm_core_utils.redact_messages import (
|
||||||
redact_message_input_output_from_custom_logger,
|
redact_message_input_output_from_custom_logger,
|
||||||
redact_message_input_output_from_logging,
|
redact_message_input_output_from_logging,
|
||||||
|
@ -2513,15 +2514,19 @@ def _init_custom_logger_compatible_class( # noqa: PLR0915
|
||||||
|
|
||||||
# auth can be disabled on local deployments of arize phoenix
|
# auth can be disabled on local deployments of arize phoenix
|
||||||
if arize_phoenix_config.otlp_auth_headers is not None:
|
if arize_phoenix_config.otlp_auth_headers is not None:
|
||||||
os.environ["OTEL_EXPORTER_OTLP_TRACES_HEADERS"] = arize_phoenix_config.otlp_auth_headers
|
os.environ["OTEL_EXPORTER_OTLP_TRACES_HEADERS"] = (
|
||||||
|
arize_phoenix_config.otlp_auth_headers
|
||||||
|
)
|
||||||
|
|
||||||
for callback in _in_memory_loggers:
|
for callback in _in_memory_loggers:
|
||||||
if (
|
if (
|
||||||
isinstance(callback, OpenTelemetry)
|
isinstance(callback, OpenTelemetry)
|
||||||
and callback.callback_name == "arize_phoenix"
|
and callback.callback_name == "arize_phoenix"
|
||||||
):
|
):
|
||||||
return callback # type: ignore
|
return callback # type: ignore
|
||||||
_otel_logger = OpenTelemetry(config=otel_config, callback_name="arize_phoenix")
|
_otel_logger = OpenTelemetry(
|
||||||
|
config=otel_config, callback_name="arize_phoenix"
|
||||||
|
)
|
||||||
_in_memory_loggers.append(_otel_logger)
|
_in_memory_loggers.append(_otel_logger)
|
||||||
return _otel_logger # type: ignore
|
return _otel_logger # type: ignore
|
||||||
elif logging_integration == "otel":
|
elif logging_integration == "otel":
|
||||||
|
@ -3110,10 +3115,26 @@ class StandardLoggingPayloadSetup:
|
||||||
str(original_exception.__class__.__name__) if original_exception else ""
|
str(original_exception.__class__.__name__) if original_exception else ""
|
||||||
)
|
)
|
||||||
_llm_provider_in_exception = getattr(original_exception, "llm_provider", "")
|
_llm_provider_in_exception = getattr(original_exception, "llm_provider", "")
|
||||||
|
|
||||||
|
# Get traceback information (first 100 lines)
|
||||||
|
traceback_info = ""
|
||||||
|
if original_exception:
|
||||||
|
tb = getattr(original_exception, "__traceback__", None)
|
||||||
|
if tb:
|
||||||
|
import traceback
|
||||||
|
|
||||||
|
tb_lines = traceback.format_tb(tb)
|
||||||
|
traceback_info = "".join(tb_lines[:100]) # Limit to first 100 lines
|
||||||
|
|
||||||
|
# Get additional error details
|
||||||
|
error_message = str(original_exception)
|
||||||
|
|
||||||
return StandardLoggingPayloadErrorInformation(
|
return StandardLoggingPayloadErrorInformation(
|
||||||
error_code=error_status,
|
error_code=error_status,
|
||||||
error_class=error_class,
|
error_class=error_class,
|
||||||
llm_provider=_llm_provider_in_exception,
|
llm_provider=_llm_provider_in_exception,
|
||||||
|
traceback=traceback_info,
|
||||||
|
error_message=error_message if original_exception else "",
|
||||||
)
|
)
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
|
@ -3310,7 +3331,9 @@ def get_standard_logging_object_payload(
|
||||||
requester_ip_address=clean_metadata.get("requester_ip_address", None),
|
requester_ip_address=clean_metadata.get("requester_ip_address", None),
|
||||||
messages=kwargs.get("messages"),
|
messages=kwargs.get("messages"),
|
||||||
response=final_response_obj,
|
response=final_response_obj,
|
||||||
model_parameters=kwargs.get("optional_params", None),
|
model_parameters=ModelParamHelper.get_standard_logging_model_parameters(
|
||||||
|
kwargs.get("optional_params", None) or {}
|
||||||
|
),
|
||||||
hidden_params=clean_hidden_params,
|
hidden_params=clean_hidden_params,
|
||||||
model_map_information=model_cost_information,
|
model_map_information=model_cost_information,
|
||||||
error_str=error_str,
|
error_str=error_str,
|
||||||
|
|
|
@ -473,6 +473,7 @@ def convert_to_model_response_object( # noqa: PLR0915
|
||||||
tool_calls=tool_calls,
|
tool_calls=tool_calls,
|
||||||
audio=choice["message"].get("audio", None),
|
audio=choice["message"].get("audio", None),
|
||||||
provider_specific_fields=provider_specific_fields,
|
provider_specific_fields=provider_specific_fields,
|
||||||
|
reasoning_content=reasoning_content,
|
||||||
)
|
)
|
||||||
finish_reason = choice.get("finish_reason", None)
|
finish_reason = choice.get("finish_reason", None)
|
||||||
if finish_reason is None:
|
if finish_reason is None:
|
||||||
|
|
133
litellm/litellm_core_utils/model_param_helper.py
Normal file
133
litellm/litellm_core_utils/model_param_helper.py
Normal file
|
@ -0,0 +1,133 @@
|
||||||
|
from typing import Set
|
||||||
|
|
||||||
|
from openai.types.audio.transcription_create_params import TranscriptionCreateParams
|
||||||
|
from openai.types.chat.completion_create_params import (
|
||||||
|
CompletionCreateParamsNonStreaming,
|
||||||
|
CompletionCreateParamsStreaming,
|
||||||
|
)
|
||||||
|
from openai.types.completion_create_params import (
|
||||||
|
CompletionCreateParamsNonStreaming as TextCompletionCreateParamsNonStreaming,
|
||||||
|
)
|
||||||
|
from openai.types.completion_create_params import (
|
||||||
|
CompletionCreateParamsStreaming as TextCompletionCreateParamsStreaming,
|
||||||
|
)
|
||||||
|
from openai.types.embedding_create_params import EmbeddingCreateParams
|
||||||
|
|
||||||
|
from litellm.types.rerank import RerankRequest
|
||||||
|
|
||||||
|
|
||||||
|
class ModelParamHelper:
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def get_standard_logging_model_parameters(
|
||||||
|
model_parameters: dict,
|
||||||
|
) -> dict:
|
||||||
|
""" """
|
||||||
|
standard_logging_model_parameters: dict = {}
|
||||||
|
supported_model_parameters = (
|
||||||
|
ModelParamHelper._get_relevant_args_to_use_for_logging()
|
||||||
|
)
|
||||||
|
|
||||||
|
for key, value in model_parameters.items():
|
||||||
|
if key in supported_model_parameters:
|
||||||
|
standard_logging_model_parameters[key] = value
|
||||||
|
return standard_logging_model_parameters
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def get_exclude_params_for_model_parameters() -> Set[str]:
|
||||||
|
return set(["messages", "prompt", "input"])
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _get_relevant_args_to_use_for_logging() -> Set[str]:
|
||||||
|
"""
|
||||||
|
Gets all relevant llm api params besides the ones with prompt content
|
||||||
|
"""
|
||||||
|
all_openai_llm_api_params = ModelParamHelper._get_all_llm_api_params()
|
||||||
|
# Exclude parameters that contain prompt content
|
||||||
|
combined_kwargs = all_openai_llm_api_params.difference(
|
||||||
|
set(ModelParamHelper.get_exclude_params_for_model_parameters())
|
||||||
|
)
|
||||||
|
return combined_kwargs
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _get_all_llm_api_params() -> Set[str]:
|
||||||
|
"""
|
||||||
|
Gets the supported kwargs for each call type and combines them
|
||||||
|
"""
|
||||||
|
chat_completion_kwargs = (
|
||||||
|
ModelParamHelper._get_litellm_supported_chat_completion_kwargs()
|
||||||
|
)
|
||||||
|
text_completion_kwargs = (
|
||||||
|
ModelParamHelper._get_litellm_supported_text_completion_kwargs()
|
||||||
|
)
|
||||||
|
embedding_kwargs = ModelParamHelper._get_litellm_supported_embedding_kwargs()
|
||||||
|
transcription_kwargs = (
|
||||||
|
ModelParamHelper._get_litellm_supported_transcription_kwargs()
|
||||||
|
)
|
||||||
|
rerank_kwargs = ModelParamHelper._get_litellm_supported_rerank_kwargs()
|
||||||
|
exclude_kwargs = ModelParamHelper._get_exclude_kwargs()
|
||||||
|
|
||||||
|
combined_kwargs = chat_completion_kwargs.union(
|
||||||
|
text_completion_kwargs,
|
||||||
|
embedding_kwargs,
|
||||||
|
transcription_kwargs,
|
||||||
|
rerank_kwargs,
|
||||||
|
)
|
||||||
|
combined_kwargs = combined_kwargs.difference(exclude_kwargs)
|
||||||
|
return combined_kwargs
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _get_litellm_supported_chat_completion_kwargs() -> Set[str]:
|
||||||
|
"""
|
||||||
|
Get the litellm supported chat completion kwargs
|
||||||
|
|
||||||
|
This follows the OpenAI API Spec
|
||||||
|
"""
|
||||||
|
all_chat_completion_kwargs = set(
|
||||||
|
CompletionCreateParamsNonStreaming.__annotations__.keys()
|
||||||
|
).union(set(CompletionCreateParamsStreaming.__annotations__.keys()))
|
||||||
|
return all_chat_completion_kwargs
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _get_litellm_supported_text_completion_kwargs() -> Set[str]:
|
||||||
|
"""
|
||||||
|
Get the litellm supported text completion kwargs
|
||||||
|
|
||||||
|
This follows the OpenAI API Spec
|
||||||
|
"""
|
||||||
|
all_text_completion_kwargs = set(
|
||||||
|
TextCompletionCreateParamsNonStreaming.__annotations__.keys()
|
||||||
|
).union(set(TextCompletionCreateParamsStreaming.__annotations__.keys()))
|
||||||
|
return all_text_completion_kwargs
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _get_litellm_supported_rerank_kwargs() -> Set[str]:
|
||||||
|
"""
|
||||||
|
Get the litellm supported rerank kwargs
|
||||||
|
"""
|
||||||
|
return set(RerankRequest.model_fields.keys())
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _get_litellm_supported_embedding_kwargs() -> Set[str]:
|
||||||
|
"""
|
||||||
|
Get the litellm supported embedding kwargs
|
||||||
|
|
||||||
|
This follows the OpenAI API Spec
|
||||||
|
"""
|
||||||
|
return set(EmbeddingCreateParams.__annotations__.keys())
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _get_litellm_supported_transcription_kwargs() -> Set[str]:
|
||||||
|
"""
|
||||||
|
Get the litellm supported transcription kwargs
|
||||||
|
|
||||||
|
This follows the OpenAI API Spec
|
||||||
|
"""
|
||||||
|
return set(TranscriptionCreateParams.__annotations__.keys())
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _get_exclude_kwargs() -> Set[str]:
|
||||||
|
"""
|
||||||
|
Get the kwargs to exclude from the cache key
|
||||||
|
"""
|
||||||
|
return set(["metadata"])
|
|
@ -2151,6 +2151,10 @@ from email.message import Message
|
||||||
|
|
||||||
import httpx
|
import httpx
|
||||||
|
|
||||||
|
from litellm.types.llms.bedrock import (
|
||||||
|
BedrockConverseReasoningContentBlock,
|
||||||
|
BedrockConverseReasoningTextBlock,
|
||||||
|
)
|
||||||
from litellm.types.llms.bedrock import ContentBlock as BedrockContentBlock
|
from litellm.types.llms.bedrock import ContentBlock as BedrockContentBlock
|
||||||
from litellm.types.llms.bedrock import DocumentBlock as BedrockDocumentBlock
|
from litellm.types.llms.bedrock import DocumentBlock as BedrockDocumentBlock
|
||||||
from litellm.types.llms.bedrock import ImageBlock as BedrockImageBlock
|
from litellm.types.llms.bedrock import ImageBlock as BedrockImageBlock
|
||||||
|
@ -2963,6 +2967,28 @@ class BedrockConverseMessagesProcessor:
|
||||||
|
|
||||||
return contents
|
return contents
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def translate_thinking_blocks_to_reasoning_content_blocks(
|
||||||
|
thinking_blocks: List[ChatCompletionThinkingBlock],
|
||||||
|
) -> List[BedrockContentBlock]:
|
||||||
|
reasoning_content_blocks: List[BedrockContentBlock] = []
|
||||||
|
for thinking_block in thinking_blocks:
|
||||||
|
reasoning_text = thinking_block.get("thinking")
|
||||||
|
reasoning_signature = thinking_block.get("signature_delta")
|
||||||
|
text_block = BedrockConverseReasoningTextBlock(
|
||||||
|
text=reasoning_text or "",
|
||||||
|
)
|
||||||
|
if reasoning_signature is not None:
|
||||||
|
text_block["signature"] = reasoning_signature
|
||||||
|
reasoning_content_block = BedrockConverseReasoningContentBlock(
|
||||||
|
reasoningText=text_block,
|
||||||
|
)
|
||||||
|
bedrock_content_block = BedrockContentBlock(
|
||||||
|
reasoningContent=reasoning_content_block
|
||||||
|
)
|
||||||
|
reasoning_content_blocks.append(bedrock_content_block)
|
||||||
|
return reasoning_content_blocks
|
||||||
|
|
||||||
|
|
||||||
def _bedrock_converse_messages_pt( # noqa: PLR0915
|
def _bedrock_converse_messages_pt( # noqa: PLR0915
|
||||||
messages: List,
|
messages: List,
|
||||||
|
@ -3109,11 +3135,23 @@ def _bedrock_converse_messages_pt( # noqa: PLR0915
|
||||||
assistant_content: List[BedrockContentBlock] = []
|
assistant_content: List[BedrockContentBlock] = []
|
||||||
## MERGE CONSECUTIVE ASSISTANT CONTENT ##
|
## MERGE CONSECUTIVE ASSISTANT CONTENT ##
|
||||||
while msg_i < len(messages) and messages[msg_i]["role"] == "assistant":
|
while msg_i < len(messages) and messages[msg_i]["role"] == "assistant":
|
||||||
|
|
||||||
assistant_message_block = get_assistant_message_block_or_continue_message(
|
assistant_message_block = get_assistant_message_block_or_continue_message(
|
||||||
message=messages[msg_i],
|
message=messages[msg_i],
|
||||||
assistant_continue_message=assistant_continue_message,
|
assistant_continue_message=assistant_continue_message,
|
||||||
)
|
)
|
||||||
_assistant_content = assistant_message_block.get("content", None)
|
_assistant_content = assistant_message_block.get("content", None)
|
||||||
|
thinking_blocks = cast(
|
||||||
|
Optional[List[ChatCompletionThinkingBlock]],
|
||||||
|
assistant_message_block.get("thinking_blocks"),
|
||||||
|
)
|
||||||
|
|
||||||
|
if thinking_blocks is not None:
|
||||||
|
assistant_content.extend(
|
||||||
|
BedrockConverseMessagesProcessor.translate_thinking_blocks_to_reasoning_content_blocks(
|
||||||
|
thinking_blocks
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
if _assistant_content is not None and isinstance(_assistant_content, list):
|
if _assistant_content is not None and isinstance(_assistant_content, list):
|
||||||
assistants_parts: List[BedrockContentBlock] = []
|
assistants_parts: List[BedrockContentBlock] = []
|
||||||
|
|
|
@ -5,7 +5,7 @@ import threading
|
||||||
import time
|
import time
|
||||||
import traceback
|
import traceback
|
||||||
import uuid
|
import uuid
|
||||||
from typing import Any, Callable, Dict, List, Optional, cast
|
from typing import Any, Callable, Dict, List, Optional, Union, cast
|
||||||
|
|
||||||
import httpx
|
import httpx
|
||||||
from pydantic import BaseModel
|
from pydantic import BaseModel
|
||||||
|
@ -14,6 +14,7 @@ import litellm
|
||||||
from litellm import verbose_logger
|
from litellm import verbose_logger
|
||||||
from litellm.litellm_core_utils.redact_messages import LiteLLMLoggingObject
|
from litellm.litellm_core_utils.redact_messages import LiteLLMLoggingObject
|
||||||
from litellm.litellm_core_utils.thread_pool_executor import executor
|
from litellm.litellm_core_utils.thread_pool_executor import executor
|
||||||
|
from litellm.types.llms.openai import ChatCompletionChunk
|
||||||
from litellm.types.utils import Delta
|
from litellm.types.utils import Delta
|
||||||
from litellm.types.utils import GenericStreamingChunk as GChunk
|
from litellm.types.utils import GenericStreamingChunk as GChunk
|
||||||
from litellm.types.utils import (
|
from litellm.types.utils import (
|
||||||
|
@ -110,7 +111,7 @@ class CustomStreamWrapper:
|
||||||
) # GUARANTEE OPENAI HEADERS IN RESPONSE
|
) # GUARANTEE OPENAI HEADERS IN RESPONSE
|
||||||
|
|
||||||
self._response_headers = _response_headers
|
self._response_headers = _response_headers
|
||||||
self.response_id = None
|
self.response_id: Optional[str] = None
|
||||||
self.logging_loop = None
|
self.logging_loop = None
|
||||||
self.rules = Rules()
|
self.rules = Rules()
|
||||||
self.stream_options = stream_options or getattr(
|
self.stream_options = stream_options or getattr(
|
||||||
|
@ -713,7 +714,7 @@ class CustomStreamWrapper:
|
||||||
|
|
||||||
def is_delta_empty(self, delta: Delta) -> bool:
|
def is_delta_empty(self, delta: Delta) -> bool:
|
||||||
is_empty = True
|
is_empty = True
|
||||||
if delta.content is not None:
|
if delta.content:
|
||||||
is_empty = False
|
is_empty = False
|
||||||
elif delta.tool_calls is not None:
|
elif delta.tool_calls is not None:
|
||||||
is_empty = False
|
is_empty = False
|
||||||
|
@ -721,6 +722,39 @@ class CustomStreamWrapper:
|
||||||
is_empty = False
|
is_empty = False
|
||||||
return is_empty
|
return is_empty
|
||||||
|
|
||||||
|
def set_model_id(
|
||||||
|
self, id: str, model_response: ModelResponseStream
|
||||||
|
) -> ModelResponseStream:
|
||||||
|
"""
|
||||||
|
Set the model id and response id to the given id.
|
||||||
|
|
||||||
|
Ensure model id is always the same across all chunks.
|
||||||
|
|
||||||
|
If first chunk sent + id set, use that id for all chunks.
|
||||||
|
"""
|
||||||
|
if self.response_id is None:
|
||||||
|
self.response_id = id
|
||||||
|
if self.response_id is not None and isinstance(self.response_id, str):
|
||||||
|
model_response.id = self.response_id
|
||||||
|
return model_response
|
||||||
|
|
||||||
|
def copy_model_response_level_provider_specific_fields(
|
||||||
|
self,
|
||||||
|
original_chunk: Union[ModelResponseStream, ChatCompletionChunk],
|
||||||
|
model_response: ModelResponseStream,
|
||||||
|
) -> ModelResponseStream:
|
||||||
|
"""
|
||||||
|
Copy provider_specific_fields from original_chunk to model_response.
|
||||||
|
"""
|
||||||
|
provider_specific_fields = getattr(
|
||||||
|
original_chunk, "provider_specific_fields", None
|
||||||
|
)
|
||||||
|
if provider_specific_fields is not None:
|
||||||
|
model_response.provider_specific_fields = provider_specific_fields
|
||||||
|
for k, v in provider_specific_fields.items():
|
||||||
|
setattr(model_response, k, v)
|
||||||
|
return model_response
|
||||||
|
|
||||||
def return_processed_chunk_logic( # noqa
|
def return_processed_chunk_logic( # noqa
|
||||||
self,
|
self,
|
||||||
completion_obj: Dict[str, Any],
|
completion_obj: Dict[str, Any],
|
||||||
|
@ -747,6 +781,10 @@ class CustomStreamWrapper:
|
||||||
and completion_obj["function_call"] is not None
|
and completion_obj["function_call"] is not None
|
||||||
)
|
)
|
||||||
or (model_response.choices[0].delta.provider_specific_fields is not None)
|
or (model_response.choices[0].delta.provider_specific_fields is not None)
|
||||||
|
or (
|
||||||
|
"provider_specific_fields" in model_response
|
||||||
|
and model_response.choices[0].delta.provider_specific_fields is not None
|
||||||
|
)
|
||||||
or (
|
or (
|
||||||
"provider_specific_fields" in response_obj
|
"provider_specific_fields" in response_obj
|
||||||
and response_obj["provider_specific_fields"] is not None
|
and response_obj["provider_specific_fields"] is not None
|
||||||
|
@ -763,8 +801,6 @@ class CustomStreamWrapper:
|
||||||
## check if openai/azure chunk
|
## check if openai/azure chunk
|
||||||
original_chunk = response_obj.get("original_chunk", None)
|
original_chunk = response_obj.get("original_chunk", None)
|
||||||
if original_chunk:
|
if original_chunk:
|
||||||
model_response.id = original_chunk.id
|
|
||||||
self.response_id = original_chunk.id
|
|
||||||
if len(original_chunk.choices) > 0:
|
if len(original_chunk.choices) > 0:
|
||||||
choices = []
|
choices = []
|
||||||
for choice in original_chunk.choices:
|
for choice in original_chunk.choices:
|
||||||
|
@ -798,9 +834,10 @@ class CustomStreamWrapper:
|
||||||
model_response.choices[0].delta, "role"
|
model_response.choices[0].delta, "role"
|
||||||
):
|
):
|
||||||
_initial_delta = model_response.choices[0].delta.model_dump()
|
_initial_delta = model_response.choices[0].delta.model_dump()
|
||||||
|
|
||||||
_initial_delta.pop("role", None)
|
_initial_delta.pop("role", None)
|
||||||
model_response.choices[0].delta = Delta(**_initial_delta)
|
model_response.choices[0].delta = Delta(**_initial_delta)
|
||||||
print_verbose(
|
verbose_logger.debug(
|
||||||
f"model_response.choices[0].delta: {model_response.choices[0].delta}"
|
f"model_response.choices[0].delta: {model_response.choices[0].delta}"
|
||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
|
@ -842,6 +879,9 @@ class CustomStreamWrapper:
|
||||||
_is_delta_empty = self.is_delta_empty(delta=model_response.choices[0].delta)
|
_is_delta_empty = self.is_delta_empty(delta=model_response.choices[0].delta)
|
||||||
|
|
||||||
if _is_delta_empty:
|
if _is_delta_empty:
|
||||||
|
model_response.choices[0].delta = Delta(
|
||||||
|
content=None
|
||||||
|
) # ensure empty delta chunk returned
|
||||||
# get any function call arguments
|
# get any function call arguments
|
||||||
model_response.choices[0].finish_reason = map_finish_reason(
|
model_response.choices[0].finish_reason = map_finish_reason(
|
||||||
finish_reason=self.received_finish_reason
|
finish_reason=self.received_finish_reason
|
||||||
|
@ -870,7 +910,7 @@ class CustomStreamWrapper:
|
||||||
self.chunks.append(model_response)
|
self.chunks.append(model_response)
|
||||||
return
|
return
|
||||||
|
|
||||||
def chunk_creator(self, chunk): # type: ignore # noqa: PLR0915
|
def chunk_creator(self, chunk: Any): # type: ignore # noqa: PLR0915
|
||||||
model_response = self.model_response_creator()
|
model_response = self.model_response_creator()
|
||||||
response_obj: Dict[str, Any] = {}
|
response_obj: Dict[str, Any] = {}
|
||||||
|
|
||||||
|
@ -886,16 +926,13 @@ class CustomStreamWrapper:
|
||||||
) # check if chunk is a generic streaming chunk
|
) # check if chunk is a generic streaming chunk
|
||||||
) or (
|
) or (
|
||||||
self.custom_llm_provider
|
self.custom_llm_provider
|
||||||
and (
|
and self.custom_llm_provider in litellm._custom_providers
|
||||||
self.custom_llm_provider == "anthropic"
|
|
||||||
or self.custom_llm_provider in litellm._custom_providers
|
|
||||||
)
|
|
||||||
):
|
):
|
||||||
|
|
||||||
if self.received_finish_reason is not None:
|
if self.received_finish_reason is not None:
|
||||||
if "provider_specific_fields" not in chunk:
|
if "provider_specific_fields" not in chunk:
|
||||||
raise StopIteration
|
raise StopIteration
|
||||||
anthropic_response_obj: GChunk = chunk
|
anthropic_response_obj: GChunk = cast(GChunk, chunk)
|
||||||
completion_obj["content"] = anthropic_response_obj["text"]
|
completion_obj["content"] = anthropic_response_obj["text"]
|
||||||
if anthropic_response_obj["is_finished"]:
|
if anthropic_response_obj["is_finished"]:
|
||||||
self.received_finish_reason = anthropic_response_obj[
|
self.received_finish_reason = anthropic_response_obj[
|
||||||
|
@ -927,7 +964,7 @@ class CustomStreamWrapper:
|
||||||
].items():
|
].items():
|
||||||
setattr(model_response, key, value)
|
setattr(model_response, key, value)
|
||||||
|
|
||||||
response_obj = anthropic_response_obj
|
response_obj = cast(Dict[str, Any], anthropic_response_obj)
|
||||||
elif self.model == "replicate" or self.custom_llm_provider == "replicate":
|
elif self.model == "replicate" or self.custom_llm_provider == "replicate":
|
||||||
response_obj = self.handle_replicate_chunk(chunk)
|
response_obj = self.handle_replicate_chunk(chunk)
|
||||||
completion_obj["content"] = response_obj["text"]
|
completion_obj["content"] = response_obj["text"]
|
||||||
|
@ -989,6 +1026,7 @@ class CustomStreamWrapper:
|
||||||
try:
|
try:
|
||||||
completion_obj["content"] = chunk.text
|
completion_obj["content"] = chunk.text
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
|
original_exception = e
|
||||||
if "Part has no text." in str(e):
|
if "Part has no text." in str(e):
|
||||||
## check for function calling
|
## check for function calling
|
||||||
function_call = (
|
function_call = (
|
||||||
|
@ -1030,7 +1068,7 @@ class CustomStreamWrapper:
|
||||||
_model_response.choices = [_streaming_response]
|
_model_response.choices = [_streaming_response]
|
||||||
response_obj = {"original_chunk": _model_response}
|
response_obj = {"original_chunk": _model_response}
|
||||||
else:
|
else:
|
||||||
raise e
|
raise original_exception
|
||||||
if (
|
if (
|
||||||
hasattr(chunk.candidates[0], "finish_reason")
|
hasattr(chunk.candidates[0], "finish_reason")
|
||||||
and chunk.candidates[0].finish_reason.name
|
and chunk.candidates[0].finish_reason.name
|
||||||
|
@ -1093,8 +1131,9 @@ class CustomStreamWrapper:
|
||||||
total_tokens=response_obj["usage"].total_tokens,
|
total_tokens=response_obj["usage"].total_tokens,
|
||||||
)
|
)
|
||||||
elif self.custom_llm_provider == "text-completion-codestral":
|
elif self.custom_llm_provider == "text-completion-codestral":
|
||||||
response_obj = litellm.CodestralTextCompletionConfig()._chunk_parser(
|
response_obj = cast(
|
||||||
chunk
|
Dict[str, Any],
|
||||||
|
litellm.CodestralTextCompletionConfig()._chunk_parser(chunk),
|
||||||
)
|
)
|
||||||
completion_obj["content"] = response_obj["text"]
|
completion_obj["content"] = response_obj["text"]
|
||||||
print_verbose(f"completion obj content: {completion_obj['content']}")
|
print_verbose(f"completion obj content: {completion_obj['content']}")
|
||||||
|
@ -1156,8 +1195,9 @@ class CustomStreamWrapper:
|
||||||
self.received_finish_reason = response_obj["finish_reason"]
|
self.received_finish_reason = response_obj["finish_reason"]
|
||||||
if response_obj.get("original_chunk", None) is not None:
|
if response_obj.get("original_chunk", None) is not None:
|
||||||
if hasattr(response_obj["original_chunk"], "id"):
|
if hasattr(response_obj["original_chunk"], "id"):
|
||||||
model_response.id = response_obj["original_chunk"].id
|
model_response = self.set_model_id(
|
||||||
self.response_id = model_response.id
|
response_obj["original_chunk"].id, model_response
|
||||||
|
)
|
||||||
if hasattr(response_obj["original_chunk"], "system_fingerprint"):
|
if hasattr(response_obj["original_chunk"], "system_fingerprint"):
|
||||||
model_response.system_fingerprint = response_obj[
|
model_response.system_fingerprint = response_obj[
|
||||||
"original_chunk"
|
"original_chunk"
|
||||||
|
@ -1206,8 +1246,16 @@ class CustomStreamWrapper:
|
||||||
): # function / tool calling branch - only set for openai/azure compatible endpoints
|
): # function / tool calling branch - only set for openai/azure compatible endpoints
|
||||||
# enter this branch when no content has been passed in response
|
# enter this branch when no content has been passed in response
|
||||||
original_chunk = response_obj.get("original_chunk", None)
|
original_chunk = response_obj.get("original_chunk", None)
|
||||||
model_response.id = original_chunk.id
|
if hasattr(original_chunk, "id"):
|
||||||
self.response_id = original_chunk.id
|
model_response = self.set_model_id(
|
||||||
|
original_chunk.id, model_response
|
||||||
|
)
|
||||||
|
if hasattr(original_chunk, "provider_specific_fields"):
|
||||||
|
model_response = (
|
||||||
|
self.copy_model_response_level_provider_specific_fields(
|
||||||
|
original_chunk, model_response
|
||||||
|
)
|
||||||
|
)
|
||||||
if original_chunk.choices and len(original_chunk.choices) > 0:
|
if original_chunk.choices and len(original_chunk.choices) > 0:
|
||||||
delta = original_chunk.choices[0].delta
|
delta = original_chunk.choices[0].delta
|
||||||
if delta is not None and (
|
if delta is not None and (
|
||||||
|
|
|
@ -26,7 +26,7 @@ else:
|
||||||
class AiohttpOpenAIChatConfig(OpenAILikeChatConfig):
|
class AiohttpOpenAIChatConfig(OpenAILikeChatConfig):
|
||||||
def get_complete_url(
|
def get_complete_url(
|
||||||
self,
|
self,
|
||||||
api_base: str,
|
api_base: Optional[str],
|
||||||
model: str,
|
model: str,
|
||||||
optional_params: dict,
|
optional_params: dict,
|
||||||
stream: Optional[bool] = None,
|
stream: Optional[bool] = None,
|
||||||
|
@ -35,6 +35,8 @@ class AiohttpOpenAIChatConfig(OpenAILikeChatConfig):
|
||||||
Ensure - /v1/chat/completions is at the end of the url
|
Ensure - /v1/chat/completions is at the end of the url
|
||||||
|
|
||||||
"""
|
"""
|
||||||
|
if api_base is None:
|
||||||
|
api_base = "https://api.openai.com"
|
||||||
|
|
||||||
if not api_base.endswith("/chat/completions"):
|
if not api_base.endswith("/chat/completions"):
|
||||||
api_base += "/chat/completions"
|
api_base += "/chat/completions"
|
||||||
|
|
|
@ -34,7 +34,12 @@ from litellm.types.llms.openai import (
|
||||||
ChatCompletionToolCallChunk,
|
ChatCompletionToolCallChunk,
|
||||||
ChatCompletionUsageBlock,
|
ChatCompletionUsageBlock,
|
||||||
)
|
)
|
||||||
from litellm.types.utils import GenericStreamingChunk
|
from litellm.types.utils import (
|
||||||
|
Delta,
|
||||||
|
GenericStreamingChunk,
|
||||||
|
ModelResponseStream,
|
||||||
|
StreamingChoices,
|
||||||
|
)
|
||||||
from litellm.utils import CustomStreamWrapper, ModelResponse, ProviderConfigManager
|
from litellm.utils import CustomStreamWrapper, ModelResponse, ProviderConfigManager
|
||||||
|
|
||||||
from ...base import BaseLLM
|
from ...base import BaseLLM
|
||||||
|
@ -507,7 +512,12 @@ class ModelResponseIterator:
|
||||||
|
|
||||||
return usage_block
|
return usage_block
|
||||||
|
|
||||||
def _content_block_delta_helper(self, chunk: dict):
|
def _content_block_delta_helper(self, chunk: dict) -> Tuple[
|
||||||
|
str,
|
||||||
|
Optional[ChatCompletionToolCallChunk],
|
||||||
|
List[ChatCompletionThinkingBlock],
|
||||||
|
Dict[str, Any],
|
||||||
|
]:
|
||||||
"""
|
"""
|
||||||
Helper function to handle the content block delta
|
Helper function to handle the content block delta
|
||||||
"""
|
"""
|
||||||
|
@ -516,6 +526,7 @@ class ModelResponseIterator:
|
||||||
tool_use: Optional[ChatCompletionToolCallChunk] = None
|
tool_use: Optional[ChatCompletionToolCallChunk] = None
|
||||||
provider_specific_fields = {}
|
provider_specific_fields = {}
|
||||||
content_block = ContentBlockDelta(**chunk) # type: ignore
|
content_block = ContentBlockDelta(**chunk) # type: ignore
|
||||||
|
thinking_blocks: List[ChatCompletionThinkingBlock] = []
|
||||||
self.content_blocks.append(content_block)
|
self.content_blocks.append(content_block)
|
||||||
if "text" in content_block["delta"]:
|
if "text" in content_block["delta"]:
|
||||||
text = content_block["delta"]["text"]
|
text = content_block["delta"]["text"]
|
||||||
|
@ -535,25 +546,41 @@ class ModelResponseIterator:
|
||||||
"thinking" in content_block["delta"]
|
"thinking" in content_block["delta"]
|
||||||
or "signature_delta" == content_block["delta"]
|
or "signature_delta" == content_block["delta"]
|
||||||
):
|
):
|
||||||
provider_specific_fields["thinking_blocks"] = [
|
thinking_blocks = [
|
||||||
ChatCompletionThinkingBlock(
|
ChatCompletionThinkingBlock(
|
||||||
type="thinking",
|
type="thinking",
|
||||||
thinking=content_block["delta"].get("thinking"),
|
thinking=content_block["delta"].get("thinking"),
|
||||||
signature_delta=content_block["delta"].get("signature"),
|
signature_delta=content_block["delta"].get("signature"),
|
||||||
)
|
)
|
||||||
]
|
]
|
||||||
return text, tool_use, provider_specific_fields
|
provider_specific_fields["thinking_blocks"] = thinking_blocks
|
||||||
|
return text, tool_use, thinking_blocks, provider_specific_fields
|
||||||
|
|
||||||
def chunk_parser(self, chunk: dict) -> GenericStreamingChunk:
|
def _handle_reasoning_content(
|
||||||
|
self, thinking_blocks: List[ChatCompletionThinkingBlock]
|
||||||
|
) -> Optional[str]:
|
||||||
|
"""
|
||||||
|
Handle the reasoning content
|
||||||
|
"""
|
||||||
|
reasoning_content = None
|
||||||
|
for block in thinking_blocks:
|
||||||
|
if reasoning_content is None:
|
||||||
|
reasoning_content = ""
|
||||||
|
if "thinking" in block:
|
||||||
|
reasoning_content += block["thinking"]
|
||||||
|
return reasoning_content
|
||||||
|
|
||||||
|
def chunk_parser(self, chunk: dict) -> ModelResponseStream:
|
||||||
try:
|
try:
|
||||||
type_chunk = chunk.get("type", "") or ""
|
type_chunk = chunk.get("type", "") or ""
|
||||||
|
|
||||||
text = ""
|
text = ""
|
||||||
tool_use: Optional[ChatCompletionToolCallChunk] = None
|
tool_use: Optional[ChatCompletionToolCallChunk] = None
|
||||||
is_finished = False
|
|
||||||
finish_reason = ""
|
finish_reason = ""
|
||||||
usage: Optional[ChatCompletionUsageBlock] = None
|
usage: Optional[ChatCompletionUsageBlock] = None
|
||||||
provider_specific_fields: Dict[str, Any] = {}
|
provider_specific_fields: Dict[str, Any] = {}
|
||||||
|
reasoning_content: Optional[str] = None
|
||||||
|
thinking_blocks: Optional[List[ChatCompletionThinkingBlock]] = None
|
||||||
|
|
||||||
index = int(chunk.get("index", 0))
|
index = int(chunk.get("index", 0))
|
||||||
if type_chunk == "content_block_delta":
|
if type_chunk == "content_block_delta":
|
||||||
|
@ -561,9 +588,13 @@ class ModelResponseIterator:
|
||||||
Anthropic content chunk
|
Anthropic content chunk
|
||||||
chunk = {'type': 'content_block_delta', 'index': 0, 'delta': {'type': 'text_delta', 'text': 'Hello'}}
|
chunk = {'type': 'content_block_delta', 'index': 0, 'delta': {'type': 'text_delta', 'text': 'Hello'}}
|
||||||
"""
|
"""
|
||||||
text, tool_use, provider_specific_fields = (
|
text, tool_use, thinking_blocks, provider_specific_fields = (
|
||||||
self._content_block_delta_helper(chunk=chunk)
|
self._content_block_delta_helper(chunk=chunk)
|
||||||
)
|
)
|
||||||
|
if thinking_blocks:
|
||||||
|
reasoning_content = self._handle_reasoning_content(
|
||||||
|
thinking_blocks=thinking_blocks
|
||||||
|
)
|
||||||
elif type_chunk == "content_block_start":
|
elif type_chunk == "content_block_start":
|
||||||
"""
|
"""
|
||||||
event: content_block_start
|
event: content_block_start
|
||||||
|
@ -610,7 +641,6 @@ class ModelResponseIterator:
|
||||||
or "stop"
|
or "stop"
|
||||||
)
|
)
|
||||||
usage = self._handle_usage(anthropic_usage_chunk=message_delta["usage"])
|
usage = self._handle_usage(anthropic_usage_chunk=message_delta["usage"])
|
||||||
is_finished = True
|
|
||||||
elif type_chunk == "message_start":
|
elif type_chunk == "message_start":
|
||||||
"""
|
"""
|
||||||
Anthropic
|
Anthropic
|
||||||
|
@ -649,16 +679,27 @@ class ModelResponseIterator:
|
||||||
|
|
||||||
text, tool_use = self._handle_json_mode_chunk(text=text, tool_use=tool_use)
|
text, tool_use = self._handle_json_mode_chunk(text=text, tool_use=tool_use)
|
||||||
|
|
||||||
returned_chunk = GenericStreamingChunk(
|
returned_chunk = ModelResponseStream(
|
||||||
text=text,
|
choices=[
|
||||||
tool_use=tool_use,
|
StreamingChoices(
|
||||||
is_finished=is_finished,
|
index=index,
|
||||||
finish_reason=finish_reason,
|
delta=Delta(
|
||||||
|
content=text,
|
||||||
|
tool_calls=[tool_use] if tool_use is not None else None,
|
||||||
|
provider_specific_fields=(
|
||||||
|
provider_specific_fields
|
||||||
|
if provider_specific_fields
|
||||||
|
else None
|
||||||
|
),
|
||||||
|
thinking_blocks=(
|
||||||
|
thinking_blocks if thinking_blocks else None
|
||||||
|
),
|
||||||
|
reasoning_content=reasoning_content,
|
||||||
|
),
|
||||||
|
finish_reason=finish_reason,
|
||||||
|
)
|
||||||
|
],
|
||||||
usage=usage,
|
usage=usage,
|
||||||
index=index,
|
|
||||||
provider_specific_fields=(
|
|
||||||
provider_specific_fields if provider_specific_fields else None
|
|
||||||
),
|
|
||||||
)
|
)
|
||||||
|
|
||||||
return returned_chunk
|
return returned_chunk
|
||||||
|
@ -769,7 +810,7 @@ class ModelResponseIterator:
|
||||||
except ValueError as e:
|
except ValueError as e:
|
||||||
raise RuntimeError(f"Error parsing chunk: {e},\nReceived chunk: {chunk}")
|
raise RuntimeError(f"Error parsing chunk: {e},\nReceived chunk: {chunk}")
|
||||||
|
|
||||||
def convert_str_chunk_to_generic_chunk(self, chunk: str) -> GenericStreamingChunk:
|
def convert_str_chunk_to_generic_chunk(self, chunk: str) -> ModelResponseStream:
|
||||||
"""
|
"""
|
||||||
Convert a string chunk to a GenericStreamingChunk
|
Convert a string chunk to a GenericStreamingChunk
|
||||||
|
|
||||||
|
@ -789,11 +830,4 @@ class ModelResponseIterator:
|
||||||
data_json = json.loads(str_line[5:])
|
data_json = json.loads(str_line[5:])
|
||||||
return self.chunk_parser(chunk=data_json)
|
return self.chunk_parser(chunk=data_json)
|
||||||
else:
|
else:
|
||||||
return GenericStreamingChunk(
|
return ModelResponseStream()
|
||||||
text="",
|
|
||||||
is_finished=False,
|
|
||||||
finish_reason="",
|
|
||||||
usage=None,
|
|
||||||
index=0,
|
|
||||||
tool_use=None,
|
|
||||||
)
|
|
||||||
|
|
|
@ -23,6 +23,7 @@ from litellm.types.llms.openai import (
|
||||||
AllMessageValues,
|
AllMessageValues,
|
||||||
ChatCompletionCachedContent,
|
ChatCompletionCachedContent,
|
||||||
ChatCompletionSystemMessage,
|
ChatCompletionSystemMessage,
|
||||||
|
ChatCompletionThinkingBlock,
|
||||||
ChatCompletionToolCallChunk,
|
ChatCompletionToolCallChunk,
|
||||||
ChatCompletionToolCallFunctionChunk,
|
ChatCompletionToolCallFunctionChunk,
|
||||||
ChatCompletionToolParam,
|
ChatCompletionToolParam,
|
||||||
|
@ -80,7 +81,7 @@ class AnthropicConfig(BaseConfig):
|
||||||
return super().get_config()
|
return super().get_config()
|
||||||
|
|
||||||
def get_supported_openai_params(self, model: str):
|
def get_supported_openai_params(self, model: str):
|
||||||
return [
|
params = [
|
||||||
"stream",
|
"stream",
|
||||||
"stop",
|
"stop",
|
||||||
"temperature",
|
"temperature",
|
||||||
|
@ -95,6 +96,11 @@ class AnthropicConfig(BaseConfig):
|
||||||
"user",
|
"user",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
if "claude-3-7-sonnet" in model:
|
||||||
|
params.append("thinking")
|
||||||
|
|
||||||
|
return params
|
||||||
|
|
||||||
def get_json_schema_from_pydantic_object(
|
def get_json_schema_from_pydantic_object(
|
||||||
self, response_format: Union[Any, Dict, None]
|
self, response_format: Union[Any, Dict, None]
|
||||||
) -> Optional[dict]:
|
) -> Optional[dict]:
|
||||||
|
@ -117,6 +123,7 @@ class AnthropicConfig(BaseConfig):
|
||||||
prompt_caching_set: bool = False,
|
prompt_caching_set: bool = False,
|
||||||
pdf_used: bool = False,
|
pdf_used: bool = False,
|
||||||
is_vertex_request: bool = False,
|
is_vertex_request: bool = False,
|
||||||
|
user_anthropic_beta_headers: Optional[List[str]] = None,
|
||||||
) -> dict:
|
) -> dict:
|
||||||
|
|
||||||
betas = []
|
betas = []
|
||||||
|
@ -133,6 +140,9 @@ class AnthropicConfig(BaseConfig):
|
||||||
"content-type": "application/json",
|
"content-type": "application/json",
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if user_anthropic_beta_headers is not None:
|
||||||
|
betas.extend(user_anthropic_beta_headers)
|
||||||
|
|
||||||
# Don't send any beta headers to Vertex, Vertex has failed requests when they are sent
|
# Don't send any beta headers to Vertex, Vertex has failed requests when they are sent
|
||||||
if is_vertex_request is True:
|
if is_vertex_request is True:
|
||||||
pass
|
pass
|
||||||
|
@ -283,18 +293,6 @@ class AnthropicConfig(BaseConfig):
|
||||||
new_stop = new_v
|
new_stop = new_v
|
||||||
return new_stop
|
return new_stop
|
||||||
|
|
||||||
def _add_tools_to_optional_params(
|
|
||||||
self, optional_params: dict, tools: List[AllAnthropicToolsValues]
|
|
||||||
) -> dict:
|
|
||||||
if "tools" not in optional_params:
|
|
||||||
optional_params["tools"] = tools
|
|
||||||
else:
|
|
||||||
optional_params["tools"] = [
|
|
||||||
*optional_params["tools"],
|
|
||||||
*tools,
|
|
||||||
]
|
|
||||||
return optional_params
|
|
||||||
|
|
||||||
def map_openai_params(
|
def map_openai_params(
|
||||||
self,
|
self,
|
||||||
non_default_params: dict,
|
non_default_params: dict,
|
||||||
|
@ -335,6 +333,10 @@ class AnthropicConfig(BaseConfig):
|
||||||
optional_params["top_p"] = value
|
optional_params["top_p"] = value
|
||||||
if param == "response_format" and isinstance(value, dict):
|
if param == "response_format" and isinstance(value, dict):
|
||||||
|
|
||||||
|
ignore_response_format_types = ["text"]
|
||||||
|
if value["type"] in ignore_response_format_types: # value is a no-op
|
||||||
|
continue
|
||||||
|
|
||||||
json_schema: Optional[dict] = None
|
json_schema: Optional[dict] = None
|
||||||
if "response_schema" in value:
|
if "response_schema" in value:
|
||||||
json_schema = value["response_schema"]
|
json_schema = value["response_schema"]
|
||||||
|
@ -358,7 +360,8 @@ class AnthropicConfig(BaseConfig):
|
||||||
optional_params["json_mode"] = True
|
optional_params["json_mode"] = True
|
||||||
if param == "user":
|
if param == "user":
|
||||||
optional_params["metadata"] = {"user_id": value}
|
optional_params["metadata"] = {"user_id": value}
|
||||||
|
if param == "thinking":
|
||||||
|
optional_params["thinking"] = value
|
||||||
return optional_params
|
return optional_params
|
||||||
|
|
||||||
def _create_json_tool_call_for_response_format(
|
def _create_json_tool_call_for_response_format(
|
||||||
|
@ -584,12 +587,14 @@ class AnthropicConfig(BaseConfig):
|
||||||
def extract_response_content(self, completion_response: dict) -> Tuple[
|
def extract_response_content(self, completion_response: dict) -> Tuple[
|
||||||
str,
|
str,
|
||||||
Optional[List[Any]],
|
Optional[List[Any]],
|
||||||
Optional[List[Dict[str, Any]]],
|
Optional[List[ChatCompletionThinkingBlock]],
|
||||||
|
Optional[str],
|
||||||
List[ChatCompletionToolCallChunk],
|
List[ChatCompletionToolCallChunk],
|
||||||
]:
|
]:
|
||||||
text_content = ""
|
text_content = ""
|
||||||
citations: Optional[List[Any]] = None
|
citations: Optional[List[Any]] = None
|
||||||
thinking_blocks: Optional[List[Dict[str, Any]]] = None
|
thinking_blocks: Optional[List[ChatCompletionThinkingBlock]] = None
|
||||||
|
reasoning_content: Optional[str] = None
|
||||||
tool_calls: List[ChatCompletionToolCallChunk] = []
|
tool_calls: List[ChatCompletionToolCallChunk] = []
|
||||||
for idx, content in enumerate(completion_response["content"]):
|
for idx, content in enumerate(completion_response["content"]):
|
||||||
if content["type"] == "text":
|
if content["type"] == "text":
|
||||||
|
@ -615,8 +620,13 @@ class AnthropicConfig(BaseConfig):
|
||||||
if content.get("thinking", None) is not None:
|
if content.get("thinking", None) is not None:
|
||||||
if thinking_blocks is None:
|
if thinking_blocks is None:
|
||||||
thinking_blocks = []
|
thinking_blocks = []
|
||||||
thinking_blocks.append(content)
|
thinking_blocks.append(cast(ChatCompletionThinkingBlock, content))
|
||||||
return text_content, citations, thinking_blocks, tool_calls
|
if thinking_blocks is not None:
|
||||||
|
reasoning_content = ""
|
||||||
|
for block in thinking_blocks:
|
||||||
|
if "thinking" in block:
|
||||||
|
reasoning_content += block["thinking"]
|
||||||
|
return text_content, citations, thinking_blocks, reasoning_content, tool_calls
|
||||||
|
|
||||||
def transform_response(
|
def transform_response(
|
||||||
self,
|
self,
|
||||||
|
@ -666,10 +676,11 @@ class AnthropicConfig(BaseConfig):
|
||||||
else:
|
else:
|
||||||
text_content = ""
|
text_content = ""
|
||||||
citations: Optional[List[Any]] = None
|
citations: Optional[List[Any]] = None
|
||||||
thinking_blocks: Optional[List[Dict[str, Any]]] = None
|
thinking_blocks: Optional[List[ChatCompletionThinkingBlock]] = None
|
||||||
|
reasoning_content: Optional[str] = None
|
||||||
tool_calls: List[ChatCompletionToolCallChunk] = []
|
tool_calls: List[ChatCompletionToolCallChunk] = []
|
||||||
|
|
||||||
text_content, citations, thinking_blocks, tool_calls = (
|
text_content, citations, thinking_blocks, reasoning_content, tool_calls = (
|
||||||
self.extract_response_content(completion_response=completion_response)
|
self.extract_response_content(completion_response=completion_response)
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@ -680,6 +691,8 @@ class AnthropicConfig(BaseConfig):
|
||||||
"citations": citations,
|
"citations": citations,
|
||||||
"thinking_blocks": thinking_blocks,
|
"thinking_blocks": thinking_blocks,
|
||||||
},
|
},
|
||||||
|
thinking_blocks=thinking_blocks,
|
||||||
|
reasoning_content=reasoning_content,
|
||||||
)
|
)
|
||||||
|
|
||||||
## HANDLE JSON MODE - anthropic returns single function call
|
## HANDLE JSON MODE - anthropic returns single function call
|
||||||
|
@ -774,6 +787,13 @@ class AnthropicConfig(BaseConfig):
|
||||||
headers=cast(httpx.Headers, headers),
|
headers=cast(httpx.Headers, headers),
|
||||||
)
|
)
|
||||||
|
|
||||||
|
def _get_user_anthropic_beta_headers(
|
||||||
|
self, anthropic_beta_header: Optional[str]
|
||||||
|
) -> Optional[List[str]]:
|
||||||
|
if anthropic_beta_header is None:
|
||||||
|
return None
|
||||||
|
return anthropic_beta_header.split(",")
|
||||||
|
|
||||||
def validate_environment(
|
def validate_environment(
|
||||||
self,
|
self,
|
||||||
headers: dict,
|
headers: dict,
|
||||||
|
@ -794,13 +814,18 @@ class AnthropicConfig(BaseConfig):
|
||||||
prompt_caching_set = self.is_cache_control_set(messages=messages)
|
prompt_caching_set = self.is_cache_control_set(messages=messages)
|
||||||
computer_tool_used = self.is_computer_tool_used(tools=tools)
|
computer_tool_used = self.is_computer_tool_used(tools=tools)
|
||||||
pdf_used = self.is_pdf_used(messages=messages)
|
pdf_used = self.is_pdf_used(messages=messages)
|
||||||
|
user_anthropic_beta_headers = self._get_user_anthropic_beta_headers(
|
||||||
|
anthropic_beta_header=headers.get("anthropic-beta")
|
||||||
|
)
|
||||||
anthropic_headers = self.get_anthropic_headers(
|
anthropic_headers = self.get_anthropic_headers(
|
||||||
computer_tool_used=computer_tool_used,
|
computer_tool_used=computer_tool_used,
|
||||||
prompt_caching_set=prompt_caching_set,
|
prompt_caching_set=prompt_caching_set,
|
||||||
pdf_used=pdf_used,
|
pdf_used=pdf_used,
|
||||||
api_key=api_key,
|
api_key=api_key,
|
||||||
is_vertex_request=optional_params.get("is_vertex_request", False),
|
is_vertex_request=optional_params.get("is_vertex_request", False),
|
||||||
|
user_anthropic_beta_headers=user_anthropic_beta_headers,
|
||||||
)
|
)
|
||||||
|
|
||||||
headers = {**headers, **anthropic_headers}
|
headers = {**headers, **anthropic_headers}
|
||||||
|
|
||||||
return headers
|
return headers
|
||||||
|
|
|
@ -1,4 +1,5 @@
|
||||||
from typing import Any, List, Optional, Tuple, cast
|
from typing import Any, List, Optional, Tuple, cast
|
||||||
|
from urllib.parse import urlparse
|
||||||
|
|
||||||
import httpx
|
import httpx
|
||||||
from httpx import Response
|
from httpx import Response
|
||||||
|
@ -28,16 +29,29 @@ class AzureAIStudioConfig(OpenAIConfig):
|
||||||
api_key: Optional[str] = None,
|
api_key: Optional[str] = None,
|
||||||
api_base: Optional[str] = None,
|
api_base: Optional[str] = None,
|
||||||
) -> dict:
|
) -> dict:
|
||||||
if api_base and "services.ai.azure.com" in api_base:
|
if api_base and self._should_use_api_key_header(api_base):
|
||||||
headers["api-key"] = api_key
|
headers["api-key"] = api_key
|
||||||
else:
|
else:
|
||||||
headers["Authorization"] = f"Bearer {api_key}"
|
headers["Authorization"] = f"Bearer {api_key}"
|
||||||
|
|
||||||
return headers
|
return headers
|
||||||
|
|
||||||
|
def _should_use_api_key_header(self, api_base: str) -> bool:
|
||||||
|
"""
|
||||||
|
Returns True if the request should use `api-key` header for authentication.
|
||||||
|
"""
|
||||||
|
parsed_url = urlparse(api_base)
|
||||||
|
host = parsed_url.hostname
|
||||||
|
if host and (
|
||||||
|
host.endswith(".services.ai.azure.com")
|
||||||
|
or host.endswith(".openai.azure.com")
|
||||||
|
):
|
||||||
|
return True
|
||||||
|
return False
|
||||||
|
|
||||||
def get_complete_url(
|
def get_complete_url(
|
||||||
self,
|
self,
|
||||||
api_base: str,
|
api_base: Optional[str],
|
||||||
model: str,
|
model: str,
|
||||||
optional_params: dict,
|
optional_params: dict,
|
||||||
stream: Optional[bool] = None,
|
stream: Optional[bool] = None,
|
||||||
|
@ -58,6 +72,10 @@ class AzureAIStudioConfig(OpenAIConfig):
|
||||||
- A complete URL string, e.g.,
|
- A complete URL string, e.g.,
|
||||||
"https://litellm8397336933.services.ai.azure.com/models/chat/completions?api-version=2024-05-01-preview"
|
"https://litellm8397336933.services.ai.azure.com/models/chat/completions?api-version=2024-05-01-preview"
|
||||||
"""
|
"""
|
||||||
|
if api_base is None:
|
||||||
|
raise ValueError(
|
||||||
|
f"api_base is required for Azure AI Studio. Please set the api_base parameter. Passed `api_base={api_base}`"
|
||||||
|
)
|
||||||
original_url = httpx.URL(api_base)
|
original_url = httpx.URL(api_base)
|
||||||
|
|
||||||
# Extract api_version or use default
|
# Extract api_version or use default
|
||||||
|
|
|
@ -111,6 +111,19 @@ class BaseConfig(ABC):
|
||||||
"""
|
"""
|
||||||
return False
|
return False
|
||||||
|
|
||||||
|
def _add_tools_to_optional_params(self, optional_params: dict, tools: List) -> dict:
|
||||||
|
"""
|
||||||
|
Helper util to add tools to optional_params.
|
||||||
|
"""
|
||||||
|
if "tools" not in optional_params:
|
||||||
|
optional_params["tools"] = tools
|
||||||
|
else:
|
||||||
|
optional_params["tools"] = [
|
||||||
|
*optional_params["tools"],
|
||||||
|
*tools,
|
||||||
|
]
|
||||||
|
return optional_params
|
||||||
|
|
||||||
def translate_developer_role_to_system_role(
|
def translate_developer_role_to_system_role(
|
||||||
self,
|
self,
|
||||||
messages: List[AllMessageValues],
|
messages: List[AllMessageValues],
|
||||||
|
@ -158,6 +171,7 @@ class BaseConfig(ABC):
|
||||||
optional_params: dict,
|
optional_params: dict,
|
||||||
value: dict,
|
value: dict,
|
||||||
is_response_format_supported: bool,
|
is_response_format_supported: bool,
|
||||||
|
enforce_tool_choice: bool = True,
|
||||||
) -> dict:
|
) -> dict:
|
||||||
"""
|
"""
|
||||||
Follow similar approach to anthropic - translate to a single tool call.
|
Follow similar approach to anthropic - translate to a single tool call.
|
||||||
|
@ -195,9 +209,11 @@ class BaseConfig(ABC):
|
||||||
|
|
||||||
optional_params.setdefault("tools", [])
|
optional_params.setdefault("tools", [])
|
||||||
optional_params["tools"].append(_tool)
|
optional_params["tools"].append(_tool)
|
||||||
optional_params["tool_choice"] = _tool_choice
|
if enforce_tool_choice:
|
||||||
|
optional_params["tool_choice"] = _tool_choice
|
||||||
|
|
||||||
optional_params["json_mode"] = True
|
optional_params["json_mode"] = True
|
||||||
else:
|
elif is_response_format_supported:
|
||||||
optional_params["response_format"] = value
|
optional_params["response_format"] = value
|
||||||
return optional_params
|
return optional_params
|
||||||
|
|
||||||
|
@ -249,7 +265,7 @@ class BaseConfig(ABC):
|
||||||
|
|
||||||
def get_complete_url(
|
def get_complete_url(
|
||||||
self,
|
self,
|
||||||
api_base: str,
|
api_base: Optional[str],
|
||||||
model: str,
|
model: str,
|
||||||
optional_params: dict,
|
optional_params: dict,
|
||||||
stream: Optional[bool] = None,
|
stream: Optional[bool] = None,
|
||||||
|
@ -261,6 +277,8 @@ class BaseConfig(ABC):
|
||||||
|
|
||||||
Some providers need `model` in `api_base`
|
Some providers need `model` in `api_base`
|
||||||
"""
|
"""
|
||||||
|
if api_base is None:
|
||||||
|
raise ValueError("api_base is required")
|
||||||
return api_base
|
return api_base
|
||||||
|
|
||||||
@abstractmethod
|
@abstractmethod
|
||||||
|
@ -315,6 +333,7 @@ class BaseConfig(ABC):
|
||||||
data: dict,
|
data: dict,
|
||||||
messages: list,
|
messages: list,
|
||||||
client: Optional[AsyncHTTPHandler] = None,
|
client: Optional[AsyncHTTPHandler] = None,
|
||||||
|
json_mode: Optional[bool] = None,
|
||||||
) -> CustomStreamWrapper:
|
) -> CustomStreamWrapper:
|
||||||
raise NotImplementedError
|
raise NotImplementedError
|
||||||
|
|
||||||
|
@ -328,6 +347,7 @@ class BaseConfig(ABC):
|
||||||
data: dict,
|
data: dict,
|
||||||
messages: list,
|
messages: list,
|
||||||
client: Optional[Union[HTTPHandler, AsyncHTTPHandler]] = None,
|
client: Optional[Union[HTTPHandler, AsyncHTTPHandler]] = None,
|
||||||
|
json_mode: Optional[bool] = None,
|
||||||
) -> CustomStreamWrapper:
|
) -> CustomStreamWrapper:
|
||||||
raise NotImplementedError
|
raise NotImplementedError
|
||||||
|
|
||||||
|
|
|
@ -2,13 +2,14 @@ import hashlib
|
||||||
import json
|
import json
|
||||||
import os
|
import os
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
from typing import TYPE_CHECKING, Any, Dict, List, Optional, Tuple
|
from typing import TYPE_CHECKING, Any, Dict, List, Optional, Tuple, cast, get_args
|
||||||
|
|
||||||
import httpx
|
import httpx
|
||||||
from pydantic import BaseModel
|
from pydantic import BaseModel
|
||||||
|
|
||||||
from litellm._logging import verbose_logger
|
from litellm._logging import verbose_logger
|
||||||
from litellm.caching.caching import DualCache
|
from litellm.caching.caching import DualCache
|
||||||
|
from litellm.constants import BEDROCK_INVOKE_PROVIDERS_LITERAL
|
||||||
from litellm.litellm_core_utils.dd_tracing import tracer
|
from litellm.litellm_core_utils.dd_tracing import tracer
|
||||||
from litellm.secret_managers.main import get_secret
|
from litellm.secret_managers.main import get_secret
|
||||||
|
|
||||||
|
@ -223,6 +224,60 @@ class BaseAWSLLM:
|
||||||
# Catch any unexpected errors and return None
|
# Catch any unexpected errors and return None
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _get_provider_from_model_path(
|
||||||
|
model_path: str,
|
||||||
|
) -> Optional[BEDROCK_INVOKE_PROVIDERS_LITERAL]:
|
||||||
|
"""
|
||||||
|
Helper function to get the provider from a model path with format: provider/model-name
|
||||||
|
|
||||||
|
Args:
|
||||||
|
model_path (str): The model path (e.g., 'llama/arn:aws:bedrock:us-east-1:086734376398:imported-model/r4c4kewx2s0n' or 'anthropic/model-name')
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Optional[str]: The provider name, or None if no valid provider found
|
||||||
|
"""
|
||||||
|
parts = model_path.split("/")
|
||||||
|
if len(parts) >= 1:
|
||||||
|
provider = parts[0]
|
||||||
|
if provider in get_args(BEDROCK_INVOKE_PROVIDERS_LITERAL):
|
||||||
|
return cast(BEDROCK_INVOKE_PROVIDERS_LITERAL, provider)
|
||||||
|
return None
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def get_bedrock_invoke_provider(
|
||||||
|
model: str,
|
||||||
|
) -> Optional[BEDROCK_INVOKE_PROVIDERS_LITERAL]:
|
||||||
|
"""
|
||||||
|
Helper function to get the bedrock provider from the model
|
||||||
|
|
||||||
|
handles 3 scenarions:
|
||||||
|
1. model=invoke/anthropic.claude-3-5-sonnet-20240620-v1:0 -> Returns `anthropic`
|
||||||
|
2. model=anthropic.claude-3-5-sonnet-20240620-v1:0 -> Returns `anthropic`
|
||||||
|
3. model=llama/arn:aws:bedrock:us-east-1:086734376398:imported-model/r4c4kewx2s0n -> Returns `llama`
|
||||||
|
4. model=us.amazon.nova-pro-v1:0 -> Returns `nova`
|
||||||
|
"""
|
||||||
|
if model.startswith("invoke/"):
|
||||||
|
model = model.replace("invoke/", "", 1)
|
||||||
|
|
||||||
|
_split_model = model.split(".")[0]
|
||||||
|
if _split_model in get_args(BEDROCK_INVOKE_PROVIDERS_LITERAL):
|
||||||
|
return cast(BEDROCK_INVOKE_PROVIDERS_LITERAL, _split_model)
|
||||||
|
|
||||||
|
# If not a known provider, check for pattern with two slashes
|
||||||
|
provider = BaseAWSLLM._get_provider_from_model_path(model)
|
||||||
|
if provider is not None:
|
||||||
|
return provider
|
||||||
|
|
||||||
|
# check if provider == "nova"
|
||||||
|
if "nova" in model:
|
||||||
|
return "nova"
|
||||||
|
else:
|
||||||
|
for provider in get_args(BEDROCK_INVOKE_PROVIDERS_LITERAL):
|
||||||
|
if provider in model:
|
||||||
|
return provider
|
||||||
|
return None
|
||||||
|
|
||||||
def _get_aws_region_name(
|
def _get_aws_region_name(
|
||||||
self, optional_params: dict, model: Optional[str] = None
|
self, optional_params: dict, model: Optional[str] = None
|
||||||
) -> str:
|
) -> str:
|
||||||
|
|
|
@ -23,6 +23,7 @@ from litellm.types.llms.openai import (
|
||||||
AllMessageValues,
|
AllMessageValues,
|
||||||
ChatCompletionResponseMessage,
|
ChatCompletionResponseMessage,
|
||||||
ChatCompletionSystemMessage,
|
ChatCompletionSystemMessage,
|
||||||
|
ChatCompletionThinkingBlock,
|
||||||
ChatCompletionToolCallChunk,
|
ChatCompletionToolCallChunk,
|
||||||
ChatCompletionToolCallFunctionChunk,
|
ChatCompletionToolCallFunctionChunk,
|
||||||
ChatCompletionToolParam,
|
ChatCompletionToolParam,
|
||||||
|
@ -116,6 +117,10 @@ class AmazonConverseConfig(BaseConfig):
|
||||||
# only anthropic and mistral support tool choice config. otherwise (E.g. cohere) will fail the call - https://docs.aws.amazon.com/bedrock/latest/APIReference/API_runtime_ToolChoice.html
|
# only anthropic and mistral support tool choice config. otherwise (E.g. cohere) will fail the call - https://docs.aws.amazon.com/bedrock/latest/APIReference/API_runtime_ToolChoice.html
|
||||||
supported_params.append("tool_choice")
|
supported_params.append("tool_choice")
|
||||||
|
|
||||||
|
if (
|
||||||
|
"claude-3-7" in model
|
||||||
|
): # [TODO]: move to a 'supports_reasoning_content' param from model cost map
|
||||||
|
supported_params.append("thinking")
|
||||||
return supported_params
|
return supported_params
|
||||||
|
|
||||||
def map_tool_choice_values(
|
def map_tool_choice_values(
|
||||||
|
@ -162,6 +167,7 @@ class AmazonConverseConfig(BaseConfig):
|
||||||
self,
|
self,
|
||||||
json_schema: Optional[dict] = None,
|
json_schema: Optional[dict] = None,
|
||||||
schema_name: str = "json_tool_call",
|
schema_name: str = "json_tool_call",
|
||||||
|
description: Optional[str] = None,
|
||||||
) -> ChatCompletionToolParam:
|
) -> ChatCompletionToolParam:
|
||||||
"""
|
"""
|
||||||
Handles creating a tool call for getting responses in JSON format.
|
Handles creating a tool call for getting responses in JSON format.
|
||||||
|
@ -184,11 +190,15 @@ class AmazonConverseConfig(BaseConfig):
|
||||||
else:
|
else:
|
||||||
_input_schema = json_schema
|
_input_schema = json_schema
|
||||||
|
|
||||||
|
tool_param_function_chunk = ChatCompletionToolParamFunctionChunk(
|
||||||
|
name=schema_name, parameters=_input_schema
|
||||||
|
)
|
||||||
|
if description:
|
||||||
|
tool_param_function_chunk["description"] = description
|
||||||
|
|
||||||
_tool = ChatCompletionToolParam(
|
_tool = ChatCompletionToolParam(
|
||||||
type="function",
|
type="function",
|
||||||
function=ChatCompletionToolParamFunctionChunk(
|
function=tool_param_function_chunk,
|
||||||
name=schema_name, parameters=_input_schema
|
|
||||||
),
|
|
||||||
)
|
)
|
||||||
return _tool
|
return _tool
|
||||||
|
|
||||||
|
@ -201,15 +211,26 @@ class AmazonConverseConfig(BaseConfig):
|
||||||
messages: Optional[List[AllMessageValues]] = None,
|
messages: Optional[List[AllMessageValues]] = None,
|
||||||
) -> dict:
|
) -> dict:
|
||||||
for param, value in non_default_params.items():
|
for param, value in non_default_params.items():
|
||||||
if param == "response_format":
|
if param == "response_format" and isinstance(value, dict):
|
||||||
|
|
||||||
|
ignore_response_format_types = ["text"]
|
||||||
|
if value["type"] in ignore_response_format_types: # value is a no-op
|
||||||
|
continue
|
||||||
|
|
||||||
json_schema: Optional[dict] = None
|
json_schema: Optional[dict] = None
|
||||||
schema_name: str = ""
|
schema_name: str = ""
|
||||||
|
description: Optional[str] = None
|
||||||
if "response_schema" in value:
|
if "response_schema" in value:
|
||||||
json_schema = value["response_schema"]
|
json_schema = value["response_schema"]
|
||||||
schema_name = "json_tool_call"
|
schema_name = "json_tool_call"
|
||||||
elif "json_schema" in value:
|
elif "json_schema" in value:
|
||||||
json_schema = value["json_schema"]["schema"]
|
json_schema = value["json_schema"]["schema"]
|
||||||
schema_name = value["json_schema"]["name"]
|
schema_name = value["json_schema"]["name"]
|
||||||
|
description = value["json_schema"].get("description")
|
||||||
|
|
||||||
|
if "type" in value and value["type"] == "text":
|
||||||
|
continue
|
||||||
|
|
||||||
"""
|
"""
|
||||||
Follow similar approach to anthropic - translate to a single tool call.
|
Follow similar approach to anthropic - translate to a single tool call.
|
||||||
|
|
||||||
|
@ -218,12 +239,14 @@ class AmazonConverseConfig(BaseConfig):
|
||||||
- You should set tool_choice (see Forcing tool use) to instruct the model to explicitly use that tool
|
- You should set tool_choice (see Forcing tool use) to instruct the model to explicitly use that tool
|
||||||
- Remember that the model will pass the input to the tool, so the name of the tool and description should be from the model’s perspective.
|
- Remember that the model will pass the input to the tool, so the name of the tool and description should be from the model’s perspective.
|
||||||
"""
|
"""
|
||||||
_tool_choice = {"name": schema_name, "type": "tool"}
|
|
||||||
_tool = self._create_json_tool_call_for_response_format(
|
_tool = self._create_json_tool_call_for_response_format(
|
||||||
json_schema=json_schema,
|
json_schema=json_schema,
|
||||||
schema_name=schema_name if schema_name != "" else "json_tool_call",
|
schema_name=schema_name if schema_name != "" else "json_tool_call",
|
||||||
|
description=description,
|
||||||
|
)
|
||||||
|
optional_params = self._add_tools_to_optional_params(
|
||||||
|
optional_params=optional_params, tools=[_tool]
|
||||||
)
|
)
|
||||||
optional_params["tools"] = [_tool]
|
|
||||||
if litellm.utils.supports_tool_choice(
|
if litellm.utils.supports_tool_choice(
|
||||||
model=model, custom_llm_provider=self.custom_llm_provider
|
model=model, custom_llm_provider=self.custom_llm_provider
|
||||||
):
|
):
|
||||||
|
@ -250,14 +273,17 @@ class AmazonConverseConfig(BaseConfig):
|
||||||
if param == "top_p":
|
if param == "top_p":
|
||||||
optional_params["topP"] = value
|
optional_params["topP"] = value
|
||||||
if param == "tools":
|
if param == "tools":
|
||||||
optional_params["tools"] = value
|
optional_params = self._add_tools_to_optional_params(
|
||||||
|
optional_params=optional_params, tools=value
|
||||||
|
)
|
||||||
if param == "tool_choice":
|
if param == "tool_choice":
|
||||||
_tool_choice_value = self.map_tool_choice_values(
|
_tool_choice_value = self.map_tool_choice_values(
|
||||||
model=model, tool_choice=value, drop_params=drop_params # type: ignore
|
model=model, tool_choice=value, drop_params=drop_params # type: ignore
|
||||||
)
|
)
|
||||||
if _tool_choice_value is not None:
|
if _tool_choice_value is not None:
|
||||||
optional_params["tool_choice"] = _tool_choice_value
|
optional_params["tool_choice"] = _tool_choice_value
|
||||||
|
if param == "thinking":
|
||||||
|
optional_params["thinking"] = value
|
||||||
return optional_params
|
return optional_params
|
||||||
|
|
||||||
@overload
|
@overload
|
||||||
|
@ -545,6 +571,37 @@ class AmazonConverseConfig(BaseConfig):
|
||||||
encoding=encoding,
|
encoding=encoding,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
def _transform_reasoning_content(
|
||||||
|
self, reasoning_content_blocks: List[BedrockConverseReasoningContentBlock]
|
||||||
|
) -> str:
|
||||||
|
"""
|
||||||
|
Extract the reasoning text from the reasoning content blocks
|
||||||
|
|
||||||
|
Ensures deepseek reasoning content compatible output.
|
||||||
|
"""
|
||||||
|
reasoning_content_str = ""
|
||||||
|
for block in reasoning_content_blocks:
|
||||||
|
if "reasoningText" in block:
|
||||||
|
reasoning_content_str += block["reasoningText"]["text"]
|
||||||
|
return reasoning_content_str
|
||||||
|
|
||||||
|
def _transform_thinking_blocks(
|
||||||
|
self, thinking_blocks: List[BedrockConverseReasoningContentBlock]
|
||||||
|
) -> List[ChatCompletionThinkingBlock]:
|
||||||
|
"""Return a consistent format for thinking blocks between Anthropic and Bedrock."""
|
||||||
|
thinking_blocks_list: List[ChatCompletionThinkingBlock] = []
|
||||||
|
for block in thinking_blocks:
|
||||||
|
if "reasoningText" in block:
|
||||||
|
_thinking_block = ChatCompletionThinkingBlock(type="thinking")
|
||||||
|
_text = block["reasoningText"].get("text")
|
||||||
|
_signature = block["reasoningText"].get("signature")
|
||||||
|
if _text is not None:
|
||||||
|
_thinking_block["thinking"] = _text
|
||||||
|
if _signature is not None:
|
||||||
|
_thinking_block["signature_delta"] = _signature
|
||||||
|
thinking_blocks_list.append(_thinking_block)
|
||||||
|
return thinking_blocks_list
|
||||||
|
|
||||||
def _transform_response(
|
def _transform_response(
|
||||||
self,
|
self,
|
||||||
model: str,
|
model: str,
|
||||||
|
@ -618,6 +675,10 @@ class AmazonConverseConfig(BaseConfig):
|
||||||
chat_completion_message: ChatCompletionResponseMessage = {"role": "assistant"}
|
chat_completion_message: ChatCompletionResponseMessage = {"role": "assistant"}
|
||||||
content_str = ""
|
content_str = ""
|
||||||
tools: List[ChatCompletionToolCallChunk] = []
|
tools: List[ChatCompletionToolCallChunk] = []
|
||||||
|
reasoningContentBlocks: Optional[List[BedrockConverseReasoningContentBlock]] = (
|
||||||
|
None
|
||||||
|
)
|
||||||
|
|
||||||
if message is not None:
|
if message is not None:
|
||||||
for idx, content in enumerate(message["content"]):
|
for idx, content in enumerate(message["content"]):
|
||||||
"""
|
"""
|
||||||
|
@ -644,8 +705,22 @@ class AmazonConverseConfig(BaseConfig):
|
||||||
index=idx,
|
index=idx,
|
||||||
)
|
)
|
||||||
tools.append(_tool_response_chunk)
|
tools.append(_tool_response_chunk)
|
||||||
chat_completion_message["content"] = content_str
|
if "reasoningContent" in content:
|
||||||
|
if reasoningContentBlocks is None:
|
||||||
|
reasoningContentBlocks = []
|
||||||
|
reasoningContentBlocks.append(content["reasoningContent"])
|
||||||
|
|
||||||
|
if reasoningContentBlocks is not None:
|
||||||
|
chat_completion_message["provider_specific_fields"] = {
|
||||||
|
"reasoningContentBlocks": reasoningContentBlocks,
|
||||||
|
}
|
||||||
|
chat_completion_message["reasoning_content"] = (
|
||||||
|
self._transform_reasoning_content(reasoningContentBlocks)
|
||||||
|
)
|
||||||
|
chat_completion_message["thinking_blocks"] = (
|
||||||
|
self._transform_thinking_blocks(reasoningContentBlocks)
|
||||||
|
)
|
||||||
|
chat_completion_message["content"] = content_str
|
||||||
if json_mode is True and tools is not None and len(tools) == 1:
|
if json_mode is True and tools is not None and len(tools) == 1:
|
||||||
# to support 'json_schema' logic on bedrock models
|
# to support 'json_schema' logic on bedrock models
|
||||||
json_mode_content_str: Optional[str] = tools[0]["function"].get("arguments")
|
json_mode_content_str: Optional[str] = tools[0]["function"].get("arguments")
|
||||||
|
|
|
@ -26,7 +26,6 @@ import httpx # type: ignore
|
||||||
|
|
||||||
import litellm
|
import litellm
|
||||||
from litellm import verbose_logger
|
from litellm import verbose_logger
|
||||||
from litellm._logging import print_verbose
|
|
||||||
from litellm.caching.caching import InMemoryCache
|
from litellm.caching.caching import InMemoryCache
|
||||||
from litellm.litellm_core_utils.core_helpers import map_finish_reason
|
from litellm.litellm_core_utils.core_helpers import map_finish_reason
|
||||||
from litellm.litellm_core_utils.litellm_logging import Logging
|
from litellm.litellm_core_utils.litellm_logging import Logging
|
||||||
|
@ -51,13 +50,19 @@ from litellm.llms.custom_httpx.http_handler import (
|
||||||
)
|
)
|
||||||
from litellm.types.llms.bedrock import *
|
from litellm.types.llms.bedrock import *
|
||||||
from litellm.types.llms.openai import (
|
from litellm.types.llms.openai import (
|
||||||
|
ChatCompletionThinkingBlock,
|
||||||
ChatCompletionToolCallChunk,
|
ChatCompletionToolCallChunk,
|
||||||
ChatCompletionToolCallFunctionChunk,
|
ChatCompletionToolCallFunctionChunk,
|
||||||
ChatCompletionUsageBlock,
|
ChatCompletionUsageBlock,
|
||||||
)
|
)
|
||||||
from litellm.types.utils import ChatCompletionMessageToolCall, Choices
|
from litellm.types.utils import ChatCompletionMessageToolCall, Choices, Delta
|
||||||
from litellm.types.utils import GenericStreamingChunk as GChunk
|
from litellm.types.utils import GenericStreamingChunk as GChunk
|
||||||
from litellm.types.utils import ModelResponse, ModelResponseStream, Usage
|
from litellm.types.utils import (
|
||||||
|
ModelResponse,
|
||||||
|
ModelResponseStream,
|
||||||
|
StreamingChoices,
|
||||||
|
Usage,
|
||||||
|
)
|
||||||
from litellm.utils import CustomStreamWrapper, get_secret
|
from litellm.utils import CustomStreamWrapper, get_secret
|
||||||
|
|
||||||
from ..base_aws_llm import BaseAWSLLM
|
from ..base_aws_llm import BaseAWSLLM
|
||||||
|
@ -212,7 +217,6 @@ async def make_call(
|
||||||
api_key="",
|
api_key="",
|
||||||
data=data,
|
data=data,
|
||||||
messages=messages,
|
messages=messages,
|
||||||
print_verbose=print_verbose,
|
|
||||||
encoding=litellm.encoding,
|
encoding=litellm.encoding,
|
||||||
) # type: ignore
|
) # type: ignore
|
||||||
completion_stream: Any = MockResponseIterator(
|
completion_stream: Any = MockResponseIterator(
|
||||||
|
@ -222,6 +226,7 @@ async def make_call(
|
||||||
decoder: AWSEventStreamDecoder = AmazonAnthropicClaudeStreamDecoder(
|
decoder: AWSEventStreamDecoder = AmazonAnthropicClaudeStreamDecoder(
|
||||||
model=model,
|
model=model,
|
||||||
sync_stream=False,
|
sync_stream=False,
|
||||||
|
json_mode=json_mode,
|
||||||
)
|
)
|
||||||
completion_stream = decoder.aiter_bytes(
|
completion_stream = decoder.aiter_bytes(
|
||||||
response.aiter_bytes(chunk_size=1024)
|
response.aiter_bytes(chunk_size=1024)
|
||||||
|
@ -298,7 +303,6 @@ def make_sync_call(
|
||||||
api_key="",
|
api_key="",
|
||||||
data=data,
|
data=data,
|
||||||
messages=messages,
|
messages=messages,
|
||||||
print_verbose=print_verbose,
|
|
||||||
encoding=litellm.encoding,
|
encoding=litellm.encoding,
|
||||||
) # type: ignore
|
) # type: ignore
|
||||||
completion_stream: Any = MockResponseIterator(
|
completion_stream: Any = MockResponseIterator(
|
||||||
|
@ -308,6 +312,7 @@ def make_sync_call(
|
||||||
decoder: AWSEventStreamDecoder = AmazonAnthropicClaudeStreamDecoder(
|
decoder: AWSEventStreamDecoder = AmazonAnthropicClaudeStreamDecoder(
|
||||||
model=model,
|
model=model,
|
||||||
sync_stream=True,
|
sync_stream=True,
|
||||||
|
json_mode=json_mode,
|
||||||
)
|
)
|
||||||
completion_stream = decoder.iter_bytes(response.iter_bytes(chunk_size=1024))
|
completion_stream = decoder.iter_bytes(response.iter_bytes(chunk_size=1024))
|
||||||
elif bedrock_invoke_provider == "deepseek_r1":
|
elif bedrock_invoke_provider == "deepseek_r1":
|
||||||
|
@ -525,7 +530,7 @@ class BedrockLLM(BaseAWSLLM):
|
||||||
].message.tool_calls:
|
].message.tool_calls:
|
||||||
_tool_call = {**tool_call.dict(), "index": 0}
|
_tool_call = {**tool_call.dict(), "index": 0}
|
||||||
_tool_calls.append(_tool_call)
|
_tool_calls.append(_tool_call)
|
||||||
delta_obj = litellm.utils.Delta(
|
delta_obj = Delta(
|
||||||
content=getattr(
|
content=getattr(
|
||||||
model_response.choices[0].message, "content", None
|
model_response.choices[0].message, "content", None
|
||||||
),
|
),
|
||||||
|
@ -1146,27 +1151,6 @@ class BedrockLLM(BaseAWSLLM):
|
||||||
)
|
)
|
||||||
return streaming_response
|
return streaming_response
|
||||||
|
|
||||||
@staticmethod
|
|
||||||
def get_bedrock_invoke_provider(
|
|
||||||
model: str,
|
|
||||||
) -> Optional[litellm.BEDROCK_INVOKE_PROVIDERS_LITERAL]:
|
|
||||||
"""
|
|
||||||
Helper function to get the bedrock provider from the model
|
|
||||||
|
|
||||||
handles 2 scenarions:
|
|
||||||
1. model=anthropic.claude-3-5-sonnet-20240620-v1:0 -> Returns `anthropic`
|
|
||||||
2. model=llama/arn:aws:bedrock:us-east-1:086734376398:imported-model/r4c4kewx2s0n -> Returns `llama`
|
|
||||||
"""
|
|
||||||
_split_model = model.split(".")[0]
|
|
||||||
if _split_model in get_args(litellm.BEDROCK_INVOKE_PROVIDERS_LITERAL):
|
|
||||||
return cast(litellm.BEDROCK_INVOKE_PROVIDERS_LITERAL, _split_model)
|
|
||||||
|
|
||||||
# If not a known provider, check for pattern with two slashes
|
|
||||||
provider = BedrockLLM._get_provider_from_model_path(model)
|
|
||||||
if provider is not None:
|
|
||||||
return provider
|
|
||||||
return None
|
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def _get_provider_from_model_path(
|
def _get_provider_from_model_path(
|
||||||
model_path: str,
|
model_path: str,
|
||||||
|
@ -1258,14 +1242,37 @@ class AWSEventStreamDecoder:
|
||||||
return True
|
return True
|
||||||
return False
|
return False
|
||||||
|
|
||||||
def converse_chunk_parser(self, chunk_data: dict) -> GChunk:
|
def extract_reasoning_content_str(
|
||||||
|
self, reasoning_content_block: BedrockConverseReasoningContentBlockDelta
|
||||||
|
) -> Optional[str]:
|
||||||
|
if "text" in reasoning_content_block:
|
||||||
|
return reasoning_content_block["text"]
|
||||||
|
return None
|
||||||
|
|
||||||
|
def translate_thinking_blocks(
|
||||||
|
self, thinking_block: BedrockConverseReasoningContentBlockDelta
|
||||||
|
) -> Optional[List[ChatCompletionThinkingBlock]]:
|
||||||
|
"""
|
||||||
|
Translate the thinking blocks to a string
|
||||||
|
"""
|
||||||
|
|
||||||
|
thinking_blocks_list: List[ChatCompletionThinkingBlock] = []
|
||||||
|
_thinking_block = ChatCompletionThinkingBlock(type="thinking")
|
||||||
|
if "text" in thinking_block:
|
||||||
|
_thinking_block["thinking"] = thinking_block["text"]
|
||||||
|
thinking_blocks_list.append(_thinking_block)
|
||||||
|
return thinking_blocks_list
|
||||||
|
|
||||||
|
def converse_chunk_parser(self, chunk_data: dict) -> ModelResponseStream:
|
||||||
try:
|
try:
|
||||||
verbose_logger.debug("\n\nRaw Chunk: {}\n\n".format(chunk_data))
|
verbose_logger.debug("\n\nRaw Chunk: {}\n\n".format(chunk_data))
|
||||||
text = ""
|
text = ""
|
||||||
tool_use: Optional[ChatCompletionToolCallChunk] = None
|
tool_use: Optional[ChatCompletionToolCallChunk] = None
|
||||||
is_finished = False
|
|
||||||
finish_reason = ""
|
finish_reason = ""
|
||||||
usage: Optional[ChatCompletionUsageBlock] = None
|
usage: Optional[ChatCompletionUsageBlock] = None
|
||||||
|
provider_specific_fields: dict = {}
|
||||||
|
reasoning_content: Optional[str] = None
|
||||||
|
thinking_blocks: Optional[List[ChatCompletionThinkingBlock]] = None
|
||||||
|
|
||||||
index = int(chunk_data.get("contentBlockIndex", 0))
|
index = int(chunk_data.get("contentBlockIndex", 0))
|
||||||
if "start" in chunk_data:
|
if "start" in chunk_data:
|
||||||
|
@ -1305,6 +1312,16 @@ class AWSEventStreamDecoder:
|
||||||
},
|
},
|
||||||
"index": index,
|
"index": index,
|
||||||
}
|
}
|
||||||
|
elif "reasoningContent" in delta_obj:
|
||||||
|
provider_specific_fields = {
|
||||||
|
"reasoningContent": delta_obj["reasoningContent"],
|
||||||
|
}
|
||||||
|
reasoning_content = self.extract_reasoning_content_str(
|
||||||
|
delta_obj["reasoningContent"]
|
||||||
|
)
|
||||||
|
thinking_blocks = self.translate_thinking_blocks(
|
||||||
|
delta_obj["reasoningContent"]
|
||||||
|
)
|
||||||
elif (
|
elif (
|
||||||
"contentBlockIndex" in chunk_data
|
"contentBlockIndex" in chunk_data
|
||||||
): # stop block, no 'start' or 'delta' object
|
): # stop block, no 'start' or 'delta' object
|
||||||
|
@ -1321,7 +1338,6 @@ class AWSEventStreamDecoder:
|
||||||
}
|
}
|
||||||
elif "stopReason" in chunk_data:
|
elif "stopReason" in chunk_data:
|
||||||
finish_reason = map_finish_reason(chunk_data.get("stopReason", "stop"))
|
finish_reason = map_finish_reason(chunk_data.get("stopReason", "stop"))
|
||||||
is_finished = True
|
|
||||||
elif "usage" in chunk_data:
|
elif "usage" in chunk_data:
|
||||||
usage = ChatCompletionUsageBlock(
|
usage = ChatCompletionUsageBlock(
|
||||||
prompt_tokens=chunk_data.get("inputTokens", 0),
|
prompt_tokens=chunk_data.get("inputTokens", 0),
|
||||||
|
@ -1329,18 +1345,33 @@ class AWSEventStreamDecoder:
|
||||||
total_tokens=chunk_data.get("totalTokens", 0),
|
total_tokens=chunk_data.get("totalTokens", 0),
|
||||||
)
|
)
|
||||||
|
|
||||||
response = GChunk(
|
model_response_provider_specific_fields = {}
|
||||||
text=text,
|
|
||||||
tool_use=tool_use,
|
|
||||||
is_finished=is_finished,
|
|
||||||
finish_reason=finish_reason,
|
|
||||||
usage=usage,
|
|
||||||
index=index,
|
|
||||||
)
|
|
||||||
|
|
||||||
if "trace" in chunk_data:
|
if "trace" in chunk_data:
|
||||||
trace = chunk_data.get("trace")
|
trace = chunk_data.get("trace")
|
||||||
response["provider_specific_fields"] = {"trace": trace}
|
model_response_provider_specific_fields["trace"] = trace
|
||||||
|
response = ModelResponseStream(
|
||||||
|
choices=[
|
||||||
|
StreamingChoices(
|
||||||
|
finish_reason=finish_reason,
|
||||||
|
index=index,
|
||||||
|
delta=Delta(
|
||||||
|
content=text,
|
||||||
|
role="assistant",
|
||||||
|
tool_calls=[tool_use] if tool_use else None,
|
||||||
|
provider_specific_fields=(
|
||||||
|
provider_specific_fields
|
||||||
|
if provider_specific_fields
|
||||||
|
else None
|
||||||
|
),
|
||||||
|
thinking_blocks=thinking_blocks,
|
||||||
|
reasoning_content=reasoning_content,
|
||||||
|
),
|
||||||
|
)
|
||||||
|
],
|
||||||
|
usage=usage,
|
||||||
|
provider_specific_fields=model_response_provider_specific_fields,
|
||||||
|
)
|
||||||
|
|
||||||
return response
|
return response
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
raise Exception("Received streaming error - {}".format(str(e)))
|
raise Exception("Received streaming error - {}".format(str(e)))
|
||||||
|
@ -1474,6 +1505,7 @@ class AmazonAnthropicClaudeStreamDecoder(AWSEventStreamDecoder):
|
||||||
self,
|
self,
|
||||||
model: str,
|
model: str,
|
||||||
sync_stream: bool,
|
sync_stream: bool,
|
||||||
|
json_mode: Optional[bool] = None,
|
||||||
) -> None:
|
) -> None:
|
||||||
"""
|
"""
|
||||||
Child class of AWSEventStreamDecoder that handles the streaming response from the Anthropic family of models
|
Child class of AWSEventStreamDecoder that handles the streaming response from the Anthropic family of models
|
||||||
|
@ -1484,9 +1516,10 @@ class AmazonAnthropicClaudeStreamDecoder(AWSEventStreamDecoder):
|
||||||
self.anthropic_model_response_iterator = AnthropicModelResponseIterator(
|
self.anthropic_model_response_iterator = AnthropicModelResponseIterator(
|
||||||
streaming_response=None,
|
streaming_response=None,
|
||||||
sync_stream=sync_stream,
|
sync_stream=sync_stream,
|
||||||
|
json_mode=json_mode,
|
||||||
)
|
)
|
||||||
|
|
||||||
def _chunk_parser(self, chunk_data: dict) -> GChunk:
|
def _chunk_parser(self, chunk_data: dict) -> ModelResponseStream:
|
||||||
return self.anthropic_model_response_iterator.chunk_parser(chunk=chunk_data)
|
return self.anthropic_model_response_iterator.chunk_parser(chunk=chunk_data)
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -3,8 +3,10 @@ from typing import Optional
|
||||||
|
|
||||||
import litellm
|
import litellm
|
||||||
|
|
||||||
|
from .base_invoke_transformation import AmazonInvokeConfig
|
||||||
|
|
||||||
class AmazonAnthropicConfig:
|
|
||||||
|
class AmazonAnthropicConfig(AmazonInvokeConfig):
|
||||||
"""
|
"""
|
||||||
Reference: https://us-west-2.console.aws.amazon.com/bedrock/home?region=us-west-2#/providers?model=claude
|
Reference: https://us-west-2.console.aws.amazon.com/bedrock/home?region=us-west-2#/providers?model=claude
|
||||||
|
|
||||||
|
@ -57,9 +59,7 @@ class AmazonAnthropicConfig:
|
||||||
and v is not None
|
and v is not None
|
||||||
}
|
}
|
||||||
|
|
||||||
def get_supported_openai_params(
|
def get_supported_openai_params(self, model: str):
|
||||||
self,
|
|
||||||
):
|
|
||||||
return [
|
return [
|
||||||
"max_tokens",
|
"max_tokens",
|
||||||
"max_completion_tokens",
|
"max_completion_tokens",
|
||||||
|
@ -69,7 +69,13 @@ class AmazonAnthropicConfig:
|
||||||
"stream",
|
"stream",
|
||||||
]
|
]
|
||||||
|
|
||||||
def map_openai_params(self, non_default_params: dict, optional_params: dict):
|
def map_openai_params(
|
||||||
|
self,
|
||||||
|
non_default_params: dict,
|
||||||
|
optional_params: dict,
|
||||||
|
model: str,
|
||||||
|
drop_params: bool,
|
||||||
|
):
|
||||||
for param, value in non_default_params.items():
|
for param, value in non_default_params.items():
|
||||||
if param == "max_tokens" or param == "max_completion_tokens":
|
if param == "max_tokens" or param == "max_completion_tokens":
|
||||||
optional_params["max_tokens_to_sample"] = value
|
optional_params["max_tokens_to_sample"] = value
|
||||||
|
|
|
@ -2,7 +2,7 @@ from typing import TYPE_CHECKING, Any, List, Optional
|
||||||
|
|
||||||
import httpx
|
import httpx
|
||||||
|
|
||||||
import litellm
|
from litellm.llms.anthropic.chat.transformation import AnthropicConfig
|
||||||
from litellm.llms.bedrock.chat.invoke_transformations.base_invoke_transformation import (
|
from litellm.llms.bedrock.chat.invoke_transformations.base_invoke_transformation import (
|
||||||
AmazonInvokeConfig,
|
AmazonInvokeConfig,
|
||||||
)
|
)
|
||||||
|
@ -17,7 +17,7 @@ else:
|
||||||
LiteLLMLoggingObj = Any
|
LiteLLMLoggingObj = Any
|
||||||
|
|
||||||
|
|
||||||
class AmazonAnthropicClaude3Config(AmazonInvokeConfig):
|
class AmazonAnthropicClaude3Config(AmazonInvokeConfig, AnthropicConfig):
|
||||||
"""
|
"""
|
||||||
Reference:
|
Reference:
|
||||||
https://us-west-2.console.aws.amazon.com/bedrock/home?region=us-west-2#/providers?model=claude
|
https://us-west-2.console.aws.amazon.com/bedrock/home?region=us-west-2#/providers?model=claude
|
||||||
|
@ -28,18 +28,8 @@ class AmazonAnthropicClaude3Config(AmazonInvokeConfig):
|
||||||
|
|
||||||
anthropic_version: str = "bedrock-2023-05-31"
|
anthropic_version: str = "bedrock-2023-05-31"
|
||||||
|
|
||||||
def get_supported_openai_params(self, model: str):
|
def get_supported_openai_params(self, model: str) -> List[str]:
|
||||||
return [
|
return AnthropicConfig.get_supported_openai_params(self, model)
|
||||||
"max_tokens",
|
|
||||||
"max_completion_tokens",
|
|
||||||
"tools",
|
|
||||||
"tool_choice",
|
|
||||||
"stream",
|
|
||||||
"stop",
|
|
||||||
"temperature",
|
|
||||||
"top_p",
|
|
||||||
"extra_headers",
|
|
||||||
]
|
|
||||||
|
|
||||||
def map_openai_params(
|
def map_openai_params(
|
||||||
self,
|
self,
|
||||||
|
@ -47,21 +37,14 @@ class AmazonAnthropicClaude3Config(AmazonInvokeConfig):
|
||||||
optional_params: dict,
|
optional_params: dict,
|
||||||
model: str,
|
model: str,
|
||||||
drop_params: bool,
|
drop_params: bool,
|
||||||
):
|
) -> dict:
|
||||||
for param, value in non_default_params.items():
|
return AnthropicConfig.map_openai_params(
|
||||||
if param == "max_tokens" or param == "max_completion_tokens":
|
self,
|
||||||
optional_params["max_tokens"] = value
|
non_default_params,
|
||||||
if param == "tools":
|
optional_params,
|
||||||
optional_params["tools"] = value
|
model,
|
||||||
if param == "stream":
|
drop_params,
|
||||||
optional_params["stream"] = value
|
)
|
||||||
if param == "stop":
|
|
||||||
optional_params["stop_sequences"] = value
|
|
||||||
if param == "temperature":
|
|
||||||
optional_params["temperature"] = value
|
|
||||||
if param == "top_p":
|
|
||||||
optional_params["top_p"] = value
|
|
||||||
return optional_params
|
|
||||||
|
|
||||||
def transform_request(
|
def transform_request(
|
||||||
self,
|
self,
|
||||||
|
@ -71,7 +54,8 @@ class AmazonAnthropicClaude3Config(AmazonInvokeConfig):
|
||||||
litellm_params: dict,
|
litellm_params: dict,
|
||||||
headers: dict,
|
headers: dict,
|
||||||
) -> dict:
|
) -> dict:
|
||||||
_anthropic_request = litellm.AnthropicConfig().transform_request(
|
_anthropic_request = AnthropicConfig.transform_request(
|
||||||
|
self,
|
||||||
model=model,
|
model=model,
|
||||||
messages=messages,
|
messages=messages,
|
||||||
optional_params=optional_params,
|
optional_params=optional_params,
|
||||||
|
@ -80,6 +64,7 @@ class AmazonAnthropicClaude3Config(AmazonInvokeConfig):
|
||||||
)
|
)
|
||||||
|
|
||||||
_anthropic_request.pop("model", None)
|
_anthropic_request.pop("model", None)
|
||||||
|
_anthropic_request.pop("stream", None)
|
||||||
if "anthropic_version" not in _anthropic_request:
|
if "anthropic_version" not in _anthropic_request:
|
||||||
_anthropic_request["anthropic_version"] = self.anthropic_version
|
_anthropic_request["anthropic_version"] = self.anthropic_version
|
||||||
|
|
||||||
|
@ -99,7 +84,8 @@ class AmazonAnthropicClaude3Config(AmazonInvokeConfig):
|
||||||
api_key: Optional[str] = None,
|
api_key: Optional[str] = None,
|
||||||
json_mode: Optional[bool] = None,
|
json_mode: Optional[bool] = None,
|
||||||
) -> ModelResponse:
|
) -> ModelResponse:
|
||||||
return litellm.AnthropicConfig().transform_response(
|
return AnthropicConfig.transform_response(
|
||||||
|
self,
|
||||||
model=model,
|
model=model,
|
||||||
raw_response=raw_response,
|
raw_response=raw_response,
|
||||||
model_response=model_response,
|
model_response=model_response,
|
||||||
|
|
|
@ -73,7 +73,7 @@ class AmazonInvokeConfig(BaseConfig, BaseAWSLLM):
|
||||||
|
|
||||||
def get_complete_url(
|
def get_complete_url(
|
||||||
self,
|
self,
|
||||||
api_base: str,
|
api_base: Optional[str],
|
||||||
model: str,
|
model: str,
|
||||||
optional_params: dict,
|
optional_params: dict,
|
||||||
stream: Optional[bool] = None,
|
stream: Optional[bool] = None,
|
||||||
|
@ -461,6 +461,7 @@ class AmazonInvokeConfig(BaseConfig, BaseAWSLLM):
|
||||||
data: dict,
|
data: dict,
|
||||||
messages: list,
|
messages: list,
|
||||||
client: Optional[AsyncHTTPHandler] = None,
|
client: Optional[AsyncHTTPHandler] = None,
|
||||||
|
json_mode: Optional[bool] = None,
|
||||||
) -> CustomStreamWrapper:
|
) -> CustomStreamWrapper:
|
||||||
streaming_response = CustomStreamWrapper(
|
streaming_response = CustomStreamWrapper(
|
||||||
completion_stream=None,
|
completion_stream=None,
|
||||||
|
@ -475,6 +476,7 @@ class AmazonInvokeConfig(BaseConfig, BaseAWSLLM):
|
||||||
logging_obj=logging_obj,
|
logging_obj=logging_obj,
|
||||||
fake_stream=True if "ai21" in api_base else False,
|
fake_stream=True if "ai21" in api_base else False,
|
||||||
bedrock_invoke_provider=self.get_bedrock_invoke_provider(model),
|
bedrock_invoke_provider=self.get_bedrock_invoke_provider(model),
|
||||||
|
json_mode=json_mode,
|
||||||
),
|
),
|
||||||
model=model,
|
model=model,
|
||||||
custom_llm_provider="bedrock",
|
custom_llm_provider="bedrock",
|
||||||
|
@ -493,6 +495,7 @@ class AmazonInvokeConfig(BaseConfig, BaseAWSLLM):
|
||||||
data: dict,
|
data: dict,
|
||||||
messages: list,
|
messages: list,
|
||||||
client: Optional[Union[HTTPHandler, AsyncHTTPHandler]] = None,
|
client: Optional[Union[HTTPHandler, AsyncHTTPHandler]] = None,
|
||||||
|
json_mode: Optional[bool] = None,
|
||||||
) -> CustomStreamWrapper:
|
) -> CustomStreamWrapper:
|
||||||
if client is None or isinstance(client, AsyncHTTPHandler):
|
if client is None or isinstance(client, AsyncHTTPHandler):
|
||||||
client = _get_httpx_client(params={})
|
client = _get_httpx_client(params={})
|
||||||
|
@ -509,6 +512,7 @@ class AmazonInvokeConfig(BaseConfig, BaseAWSLLM):
|
||||||
logging_obj=logging_obj,
|
logging_obj=logging_obj,
|
||||||
fake_stream=True if "ai21" in api_base else False,
|
fake_stream=True if "ai21" in api_base else False,
|
||||||
bedrock_invoke_provider=self.get_bedrock_invoke_provider(model),
|
bedrock_invoke_provider=self.get_bedrock_invoke_provider(model),
|
||||||
|
json_mode=json_mode,
|
||||||
),
|
),
|
||||||
model=model,
|
model=model,
|
||||||
custom_llm_provider="bedrock",
|
custom_llm_provider="bedrock",
|
||||||
|
@ -534,7 +538,7 @@ class AmazonInvokeConfig(BaseConfig, BaseAWSLLM):
|
||||||
"""
|
"""
|
||||||
Helper function to get the bedrock provider from the model
|
Helper function to get the bedrock provider from the model
|
||||||
|
|
||||||
handles 3 scenarions:
|
handles 4 scenarios:
|
||||||
1. model=invoke/anthropic.claude-3-5-sonnet-20240620-v1:0 -> Returns `anthropic`
|
1. model=invoke/anthropic.claude-3-5-sonnet-20240620-v1:0 -> Returns `anthropic`
|
||||||
2. model=anthropic.claude-3-5-sonnet-20240620-v1:0 -> Returns `anthropic`
|
2. model=anthropic.claude-3-5-sonnet-20240620-v1:0 -> Returns `anthropic`
|
||||||
3. model=llama/arn:aws:bedrock:us-east-1:086734376398:imported-model/r4c4kewx2s0n -> Returns `llama`
|
3. model=llama/arn:aws:bedrock:us-east-1:086734376398:imported-model/r4c4kewx2s0n -> Returns `llama`
|
||||||
|
@ -555,6 +559,10 @@ class AmazonInvokeConfig(BaseConfig, BaseAWSLLM):
|
||||||
# check if provider == "nova"
|
# check if provider == "nova"
|
||||||
if "nova" in model:
|
if "nova" in model:
|
||||||
return "nova"
|
return "nova"
|
||||||
|
|
||||||
|
for provider in get_args(litellm.BEDROCK_INVOKE_PROVIDERS_LITERAL):
|
||||||
|
if provider in model:
|
||||||
|
return provider
|
||||||
return None
|
return None
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
|
|
|
@ -11,6 +11,7 @@ from litellm.llms.base_llm.chat.transformation import (
|
||||||
BaseLLMException,
|
BaseLLMException,
|
||||||
LiteLLMLoggingObj,
|
LiteLLMLoggingObj,
|
||||||
)
|
)
|
||||||
|
from litellm.secret_managers.main import get_secret_str
|
||||||
from litellm.types.llms.openai import AllMessageValues
|
from litellm.types.llms.openai import AllMessageValues
|
||||||
from litellm.types.utils import (
|
from litellm.types.utils import (
|
||||||
ChatCompletionToolCallChunk,
|
ChatCompletionToolCallChunk,
|
||||||
|
@ -75,11 +76,16 @@ class CloudflareChatConfig(BaseConfig):
|
||||||
|
|
||||||
def get_complete_url(
|
def get_complete_url(
|
||||||
self,
|
self,
|
||||||
api_base: str,
|
api_base: Optional[str],
|
||||||
model: str,
|
model: str,
|
||||||
optional_params: dict,
|
optional_params: dict,
|
||||||
stream: Optional[bool] = None,
|
stream: Optional[bool] = None,
|
||||||
) -> str:
|
) -> str:
|
||||||
|
if api_base is None:
|
||||||
|
account_id = get_secret_str("CLOUDFLARE_ACCOUNT_ID")
|
||||||
|
api_base = (
|
||||||
|
f"https://api.cloudflare.com/client/v4/accounts/{account_id}/ai/run/"
|
||||||
|
)
|
||||||
return api_base + model
|
return api_base + model
|
||||||
|
|
||||||
def get_supported_openai_params(self, model: str) -> List[str]:
|
def get_supported_openai_params(self, model: str) -> List[str]:
|
||||||
|
|
|
@ -159,6 +159,7 @@ class BaseLLMHTTPHandler:
|
||||||
encoding: Any,
|
encoding: Any,
|
||||||
api_key: Optional[str] = None,
|
api_key: Optional[str] = None,
|
||||||
client: Optional[AsyncHTTPHandler] = None,
|
client: Optional[AsyncHTTPHandler] = None,
|
||||||
|
json_mode: bool = False,
|
||||||
):
|
):
|
||||||
if client is None:
|
if client is None:
|
||||||
async_httpx_client = get_async_httpx_client(
|
async_httpx_client = get_async_httpx_client(
|
||||||
|
@ -190,6 +191,7 @@ class BaseLLMHTTPHandler:
|
||||||
optional_params=optional_params,
|
optional_params=optional_params,
|
||||||
litellm_params=litellm_params,
|
litellm_params=litellm_params,
|
||||||
encoding=encoding,
|
encoding=encoding,
|
||||||
|
json_mode=json_mode,
|
||||||
)
|
)
|
||||||
|
|
||||||
def completion(
|
def completion(
|
||||||
|
@ -211,6 +213,7 @@ class BaseLLMHTTPHandler:
|
||||||
headers: Optional[dict] = {},
|
headers: Optional[dict] = {},
|
||||||
client: Optional[Union[HTTPHandler, AsyncHTTPHandler]] = None,
|
client: Optional[Union[HTTPHandler, AsyncHTTPHandler]] = None,
|
||||||
):
|
):
|
||||||
|
json_mode: bool = optional_params.pop("json_mode", False)
|
||||||
|
|
||||||
provider_config = ProviderConfigManager.get_provider_chat_config(
|
provider_config = ProviderConfigManager.get_provider_chat_config(
|
||||||
model=model, provider=litellm.LlmProviders(custom_llm_provider)
|
model=model, provider=litellm.LlmProviders(custom_llm_provider)
|
||||||
|
@ -286,6 +289,7 @@ class BaseLLMHTTPHandler:
|
||||||
else None
|
else None
|
||||||
),
|
),
|
||||||
litellm_params=litellm_params,
|
litellm_params=litellm_params,
|
||||||
|
json_mode=json_mode,
|
||||||
)
|
)
|
||||||
|
|
||||||
else:
|
else:
|
||||||
|
@ -309,6 +313,7 @@ class BaseLLMHTTPHandler:
|
||||||
if client is not None and isinstance(client, AsyncHTTPHandler)
|
if client is not None and isinstance(client, AsyncHTTPHandler)
|
||||||
else None
|
else None
|
||||||
),
|
),
|
||||||
|
json_mode=json_mode,
|
||||||
)
|
)
|
||||||
|
|
||||||
if stream is True:
|
if stream is True:
|
||||||
|
@ -327,6 +332,7 @@ class BaseLLMHTTPHandler:
|
||||||
data=data,
|
data=data,
|
||||||
messages=messages,
|
messages=messages,
|
||||||
client=client,
|
client=client,
|
||||||
|
json_mode=json_mode,
|
||||||
)
|
)
|
||||||
completion_stream, headers = self.make_sync_call(
|
completion_stream, headers = self.make_sync_call(
|
||||||
provider_config=provider_config,
|
provider_config=provider_config,
|
||||||
|
@ -380,6 +386,7 @@ class BaseLLMHTTPHandler:
|
||||||
optional_params=optional_params,
|
optional_params=optional_params,
|
||||||
litellm_params=litellm_params,
|
litellm_params=litellm_params,
|
||||||
encoding=encoding,
|
encoding=encoding,
|
||||||
|
json_mode=json_mode,
|
||||||
)
|
)
|
||||||
|
|
||||||
def make_sync_call(
|
def make_sync_call(
|
||||||
|
@ -453,6 +460,7 @@ class BaseLLMHTTPHandler:
|
||||||
litellm_params: dict,
|
litellm_params: dict,
|
||||||
fake_stream: bool = False,
|
fake_stream: bool = False,
|
||||||
client: Optional[AsyncHTTPHandler] = None,
|
client: Optional[AsyncHTTPHandler] = None,
|
||||||
|
json_mode: Optional[bool] = None,
|
||||||
):
|
):
|
||||||
if provider_config.has_custom_stream_wrapper is True:
|
if provider_config.has_custom_stream_wrapper is True:
|
||||||
return provider_config.get_async_custom_stream_wrapper(
|
return provider_config.get_async_custom_stream_wrapper(
|
||||||
|
@ -464,6 +472,7 @@ class BaseLLMHTTPHandler:
|
||||||
data=data,
|
data=data,
|
||||||
messages=messages,
|
messages=messages,
|
||||||
client=client,
|
client=client,
|
||||||
|
json_mode=json_mode,
|
||||||
)
|
)
|
||||||
|
|
||||||
completion_stream, _response_headers = await self.make_async_call_stream_helper(
|
completion_stream, _response_headers = await self.make_async_call_stream_helper(
|
||||||
|
@ -720,7 +729,7 @@ class BaseLLMHTTPHandler:
|
||||||
api_base: Optional[str] = None,
|
api_base: Optional[str] = None,
|
||||||
client: Optional[Union[HTTPHandler, AsyncHTTPHandler]] = None,
|
client: Optional[Union[HTTPHandler, AsyncHTTPHandler]] = None,
|
||||||
) -> RerankResponse:
|
) -> RerankResponse:
|
||||||
|
|
||||||
# get config from model, custom llm provider
|
# get config from model, custom llm provider
|
||||||
headers = provider_config.validate_environment(
|
headers = provider_config.validate_environment(
|
||||||
api_key=api_key,
|
api_key=api_key,
|
||||||
|
|
|
@ -34,3 +34,21 @@ class DeepSeekChatConfig(OpenAIGPTConfig):
|
||||||
) # type: ignore
|
) # type: ignore
|
||||||
dynamic_api_key = api_key or get_secret_str("DEEPSEEK_API_KEY")
|
dynamic_api_key = api_key or get_secret_str("DEEPSEEK_API_KEY")
|
||||||
return api_base, dynamic_api_key
|
return api_base, dynamic_api_key
|
||||||
|
|
||||||
|
def get_complete_url(
|
||||||
|
self,
|
||||||
|
api_base: Optional[str],
|
||||||
|
model: str,
|
||||||
|
optional_params: dict,
|
||||||
|
stream: Optional[bool] = None,
|
||||||
|
) -> str:
|
||||||
|
"""
|
||||||
|
If api_base is not provided, use the default DeepSeek /chat/completions endpoint.
|
||||||
|
"""
|
||||||
|
if not api_base:
|
||||||
|
api_base = "https://api.deepseek.com/beta"
|
||||||
|
|
||||||
|
if not api_base.endswith("/chat/completions"):
|
||||||
|
api_base = f"{api_base}/chat/completions"
|
||||||
|
|
||||||
|
return api_base
|
||||||
|
|
|
@ -90,6 +90,11 @@ class FireworksAIConfig(OpenAIGPTConfig):
|
||||||
) -> dict:
|
) -> dict:
|
||||||
|
|
||||||
supported_openai_params = self.get_supported_openai_params(model=model)
|
supported_openai_params = self.get_supported_openai_params(model=model)
|
||||||
|
is_tools_set = any(
|
||||||
|
param == "tools" and value is not None
|
||||||
|
for param, value in non_default_params.items()
|
||||||
|
)
|
||||||
|
|
||||||
for param, value in non_default_params.items():
|
for param, value in non_default_params.items():
|
||||||
if param == "tool_choice":
|
if param == "tool_choice":
|
||||||
if value == "required":
|
if value == "required":
|
||||||
|
@ -98,18 +103,30 @@ class FireworksAIConfig(OpenAIGPTConfig):
|
||||||
else:
|
else:
|
||||||
# pass through the value of tool choice
|
# pass through the value of tool choice
|
||||||
optional_params["tool_choice"] = value
|
optional_params["tool_choice"] = value
|
||||||
elif (
|
elif param == "response_format":
|
||||||
param == "response_format" and value.get("type", None) == "json_schema"
|
|
||||||
):
|
if (
|
||||||
optional_params["response_format"] = {
|
is_tools_set
|
||||||
"type": "json_object",
|
): # fireworks ai doesn't support tools and response_format together
|
||||||
"schema": value["json_schema"]["schema"],
|
optional_params = self._add_response_format_to_tools(
|
||||||
}
|
optional_params=optional_params,
|
||||||
|
value=value,
|
||||||
|
is_response_format_supported=False,
|
||||||
|
enforce_tool_choice=False, # tools and response_format are both set, don't enforce tool_choice
|
||||||
|
)
|
||||||
|
elif "json_schema" in value:
|
||||||
|
optional_params["response_format"] = {
|
||||||
|
"type": "json_object",
|
||||||
|
"schema": value["json_schema"]["schema"],
|
||||||
|
}
|
||||||
|
else:
|
||||||
|
optional_params["response_format"] = value
|
||||||
elif param == "max_completion_tokens":
|
elif param == "max_completion_tokens":
|
||||||
optional_params["max_tokens"] = value
|
optional_params["max_tokens"] = value
|
||||||
elif param in supported_openai_params:
|
elif param in supported_openai_params:
|
||||||
if value is not None:
|
if value is not None:
|
||||||
optional_params[param] = value
|
optional_params[param] = value
|
||||||
|
|
||||||
return optional_params
|
return optional_params
|
||||||
|
|
||||||
def _add_transform_inline_image_block(
|
def _add_transform_inline_image_block(
|
||||||
|
|
|
@ -353,7 +353,7 @@ class OllamaConfig(BaseConfig):
|
||||||
|
|
||||||
def get_complete_url(
|
def get_complete_url(
|
||||||
self,
|
self,
|
||||||
api_base: str,
|
api_base: Optional[str],
|
||||||
model: str,
|
model: str,
|
||||||
optional_params: dict,
|
optional_params: dict,
|
||||||
stream: Optional[bool] = None,
|
stream: Optional[bool] = None,
|
||||||
|
@ -365,6 +365,8 @@ class OllamaConfig(BaseConfig):
|
||||||
|
|
||||||
Some providers need `model` in `api_base`
|
Some providers need `model` in `api_base`
|
||||||
"""
|
"""
|
||||||
|
if api_base is None:
|
||||||
|
api_base = "http://localhost:11434"
|
||||||
if api_base.endswith("/api/generate"):
|
if api_base.endswith("/api/generate"):
|
||||||
url = api_base
|
url = api_base
|
||||||
else:
|
else:
|
||||||
|
|
|
@ -508,6 +508,7 @@ async def ollama_async_streaming(
|
||||||
verbose_logger.exception(
|
verbose_logger.exception(
|
||||||
"LiteLLM.ollama(): Exception occured - {}".format(str(e))
|
"LiteLLM.ollama(): Exception occured - {}".format(str(e))
|
||||||
)
|
)
|
||||||
|
raise e
|
||||||
|
|
||||||
|
|
||||||
async def ollama_acompletion(
|
async def ollama_acompletion(
|
||||||
|
|
|
@ -263,7 +263,7 @@ class OpenAIGPTConfig(BaseLLMModelInfo, BaseConfig):
|
||||||
|
|
||||||
def get_complete_url(
|
def get_complete_url(
|
||||||
self,
|
self,
|
||||||
api_base: str,
|
api_base: Optional[str],
|
||||||
model: str,
|
model: str,
|
||||||
optional_params: dict,
|
optional_params: dict,
|
||||||
stream: Optional[bool] = None,
|
stream: Optional[bool] = None,
|
||||||
|
@ -274,6 +274,8 @@ class OpenAIGPTConfig(BaseLLMModelInfo, BaseConfig):
|
||||||
Returns:
|
Returns:
|
||||||
str: The complete URL for the API call.
|
str: The complete URL for the API call.
|
||||||
"""
|
"""
|
||||||
|
if api_base is None:
|
||||||
|
api_base = "https://api.openai.com"
|
||||||
endpoint = "chat/completions"
|
endpoint = "chat/completions"
|
||||||
|
|
||||||
# Remove trailing slash from api_base if present
|
# Remove trailing slash from api_base if present
|
||||||
|
|
|
@ -19,6 +19,7 @@ from litellm.litellm_core_utils.get_llm_provider_logic import get_llm_provider
|
||||||
from litellm.types.llms.openai import AllMessageValues, ChatCompletionUserMessage
|
from litellm.types.llms.openai import AllMessageValues, ChatCompletionUserMessage
|
||||||
from litellm.utils import (
|
from litellm.utils import (
|
||||||
supports_function_calling,
|
supports_function_calling,
|
||||||
|
supports_parallel_function_calling,
|
||||||
supports_response_schema,
|
supports_response_schema,
|
||||||
supports_system_messages,
|
supports_system_messages,
|
||||||
)
|
)
|
||||||
|
@ -76,14 +77,19 @@ class OpenAIOSeriesConfig(OpenAIGPTConfig):
|
||||||
model, custom_llm_provider
|
model, custom_llm_provider
|
||||||
)
|
)
|
||||||
_supports_response_schema = supports_response_schema(model, custom_llm_provider)
|
_supports_response_schema = supports_response_schema(model, custom_llm_provider)
|
||||||
|
_supports_parallel_tool_calls = supports_parallel_function_calling(
|
||||||
|
model, custom_llm_provider
|
||||||
|
)
|
||||||
|
|
||||||
if not _supports_function_calling:
|
if not _supports_function_calling:
|
||||||
non_supported_params.append("tools")
|
non_supported_params.append("tools")
|
||||||
non_supported_params.append("tool_choice")
|
non_supported_params.append("tool_choice")
|
||||||
non_supported_params.append("parallel_tool_calls")
|
|
||||||
non_supported_params.append("function_call")
|
non_supported_params.append("function_call")
|
||||||
non_supported_params.append("functions")
|
non_supported_params.append("functions")
|
||||||
|
|
||||||
|
if not _supports_parallel_tool_calls:
|
||||||
|
non_supported_params.append("parallel_tool_calls")
|
||||||
|
|
||||||
if not _supports_response_schema:
|
if not _supports_response_schema:
|
||||||
non_supported_params.append("response_format")
|
non_supported_params.append("response_format")
|
||||||
|
|
||||||
|
|
|
@ -112,6 +112,7 @@ class OpenAIAudioTranscription(OpenAIChatCompletion):
|
||||||
api_base=api_base,
|
api_base=api_base,
|
||||||
timeout=timeout,
|
timeout=timeout,
|
||||||
max_retries=max_retries,
|
max_retries=max_retries,
|
||||||
|
client=client,
|
||||||
)
|
)
|
||||||
|
|
||||||
## LOGGING
|
## LOGGING
|
||||||
|
|
|
@ -138,7 +138,7 @@ class ReplicateConfig(BaseConfig):
|
||||||
|
|
||||||
def get_complete_url(
|
def get_complete_url(
|
||||||
self,
|
self,
|
||||||
api_base: str,
|
api_base: Optional[str],
|
||||||
model: str,
|
model: str,
|
||||||
optional_params: dict,
|
optional_params: dict,
|
||||||
stream: Optional[bool] = None,
|
stream: Optional[bool] = None,
|
||||||
|
|
|
@ -433,6 +433,10 @@ class SagemakerLLM(BaseAWSLLM):
|
||||||
"messages": messages,
|
"messages": messages,
|
||||||
}
|
}
|
||||||
prepared_request = await asyncified_prepare_request(**prepared_request_args)
|
prepared_request = await asyncified_prepare_request(**prepared_request_args)
|
||||||
|
if model_id is not None: # Fixes https://github.com/BerriAI/litellm/issues/8889
|
||||||
|
prepared_request.headers.update(
|
||||||
|
{"X-Amzn-SageMaker-Inference-Component": model_id}
|
||||||
|
)
|
||||||
completion_stream = await self.make_async_call(
|
completion_stream = await self.make_async_call(
|
||||||
api_base=prepared_request.url,
|
api_base=prepared_request.url,
|
||||||
headers=prepared_request.headers, # type: ignore
|
headers=prepared_request.headers, # type: ignore
|
||||||
|
@ -511,7 +515,7 @@ class SagemakerLLM(BaseAWSLLM):
|
||||||
# Add model_id as InferenceComponentName header
|
# Add model_id as InferenceComponentName header
|
||||||
# boto3 doc: https://docs.aws.amazon.com/sagemaker/latest/APIReference/API_runtime_InvokeEndpoint.html
|
# boto3 doc: https://docs.aws.amazon.com/sagemaker/latest/APIReference/API_runtime_InvokeEndpoint.html
|
||||||
prepared_request.headers.update(
|
prepared_request.headers.update(
|
||||||
{"X-Amzn-SageMaker-Inference-Componen": model_id}
|
{"X-Amzn-SageMaker-Inference-Component": model_id}
|
||||||
)
|
)
|
||||||
# make async httpx post request here
|
# make async httpx post request here
|
||||||
try:
|
try:
|
||||||
|
|
|
@ -11,7 +11,7 @@ from litellm.llms.openai.chat.gpt_transformation import OpenAIGPTConfig
|
||||||
|
|
||||||
class SambanovaConfig(OpenAIGPTConfig):
|
class SambanovaConfig(OpenAIGPTConfig):
|
||||||
"""
|
"""
|
||||||
Reference: https://community.sambanova.ai/t/create-chat-completion-api/
|
Reference: https://docs.sambanova.ai/cloud/api-reference/
|
||||||
|
|
||||||
Below are the parameters:
|
Below are the parameters:
|
||||||
"""
|
"""
|
||||||
|
|
|
@ -10,7 +10,10 @@ from litellm.llms.custom_httpx.http_handler import (
|
||||||
)
|
)
|
||||||
from litellm.llms.vertex_ai.gemini.vertex_and_google_ai_studio_gemini import VertexLLM
|
from litellm.llms.vertex_ai.gemini.vertex_and_google_ai_studio_gemini import VertexLLM
|
||||||
from litellm.types.llms.openai import Batch, CreateBatchRequest
|
from litellm.types.llms.openai import Batch, CreateBatchRequest
|
||||||
from litellm.types.llms.vertex_ai import VertexAIBatchPredictionJob
|
from litellm.types.llms.vertex_ai import (
|
||||||
|
VERTEX_CREDENTIALS_TYPES,
|
||||||
|
VertexAIBatchPredictionJob,
|
||||||
|
)
|
||||||
|
|
||||||
from .transformation import VertexAIBatchTransformation
|
from .transformation import VertexAIBatchTransformation
|
||||||
|
|
||||||
|
@ -25,7 +28,7 @@ class VertexAIBatchPrediction(VertexLLM):
|
||||||
_is_async: bool,
|
_is_async: bool,
|
||||||
create_batch_data: CreateBatchRequest,
|
create_batch_data: CreateBatchRequest,
|
||||||
api_base: Optional[str],
|
api_base: Optional[str],
|
||||||
vertex_credentials: Optional[str],
|
vertex_credentials: Optional[VERTEX_CREDENTIALS_TYPES],
|
||||||
vertex_project: Optional[str],
|
vertex_project: Optional[str],
|
||||||
vertex_location: Optional[str],
|
vertex_location: Optional[str],
|
||||||
timeout: Union[float, httpx.Timeout],
|
timeout: Union[float, httpx.Timeout],
|
||||||
|
@ -130,7 +133,7 @@ class VertexAIBatchPrediction(VertexLLM):
|
||||||
_is_async: bool,
|
_is_async: bool,
|
||||||
batch_id: str,
|
batch_id: str,
|
||||||
api_base: Optional[str],
|
api_base: Optional[str],
|
||||||
vertex_credentials: Optional[str],
|
vertex_credentials: Optional[VERTEX_CREDENTIALS_TYPES],
|
||||||
vertex_project: Optional[str],
|
vertex_project: Optional[str],
|
||||||
vertex_location: Optional[str],
|
vertex_location: Optional[str],
|
||||||
timeout: Union[float, httpx.Timeout],
|
timeout: Union[float, httpx.Timeout],
|
||||||
|
|
|
@ -9,6 +9,7 @@ from litellm.integrations.gcs_bucket.gcs_bucket_base import (
|
||||||
)
|
)
|
||||||
from litellm.llms.custom_httpx.http_handler import get_async_httpx_client
|
from litellm.llms.custom_httpx.http_handler import get_async_httpx_client
|
||||||
from litellm.types.llms.openai import CreateFileRequest, FileObject
|
from litellm.types.llms.openai import CreateFileRequest, FileObject
|
||||||
|
from litellm.types.llms.vertex_ai import VERTEX_CREDENTIALS_TYPES
|
||||||
|
|
||||||
from .transformation import VertexAIFilesTransformation
|
from .transformation import VertexAIFilesTransformation
|
||||||
|
|
||||||
|
@ -34,7 +35,7 @@ class VertexAIFilesHandler(GCSBucketBase):
|
||||||
self,
|
self,
|
||||||
create_file_data: CreateFileRequest,
|
create_file_data: CreateFileRequest,
|
||||||
api_base: Optional[str],
|
api_base: Optional[str],
|
||||||
vertex_credentials: Optional[str],
|
vertex_credentials: Optional[VERTEX_CREDENTIALS_TYPES],
|
||||||
vertex_project: Optional[str],
|
vertex_project: Optional[str],
|
||||||
vertex_location: Optional[str],
|
vertex_location: Optional[str],
|
||||||
timeout: Union[float, httpx.Timeout],
|
timeout: Union[float, httpx.Timeout],
|
||||||
|
@ -70,7 +71,7 @@ class VertexAIFilesHandler(GCSBucketBase):
|
||||||
_is_async: bool,
|
_is_async: bool,
|
||||||
create_file_data: CreateFileRequest,
|
create_file_data: CreateFileRequest,
|
||||||
api_base: Optional[str],
|
api_base: Optional[str],
|
||||||
vertex_credentials: Optional[str],
|
vertex_credentials: Optional[VERTEX_CREDENTIALS_TYPES],
|
||||||
vertex_project: Optional[str],
|
vertex_project: Optional[str],
|
||||||
vertex_location: Optional[str],
|
vertex_location: Optional[str],
|
||||||
timeout: Union[float, httpx.Timeout],
|
timeout: Union[float, httpx.Timeout],
|
||||||
|
|
|
@ -13,6 +13,7 @@ from litellm.llms.vertex_ai.gemini.vertex_and_google_ai_studio_gemini import Ver
|
||||||
from litellm.types.fine_tuning import OpenAIFineTuningHyperparameters
|
from litellm.types.fine_tuning import OpenAIFineTuningHyperparameters
|
||||||
from litellm.types.llms.openai import FineTuningJobCreate
|
from litellm.types.llms.openai import FineTuningJobCreate
|
||||||
from litellm.types.llms.vertex_ai import (
|
from litellm.types.llms.vertex_ai import (
|
||||||
|
VERTEX_CREDENTIALS_TYPES,
|
||||||
FineTuneHyperparameters,
|
FineTuneHyperparameters,
|
||||||
FineTuneJobCreate,
|
FineTuneJobCreate,
|
||||||
FineTunesupervisedTuningSpec,
|
FineTunesupervisedTuningSpec,
|
||||||
|
@ -222,7 +223,7 @@ class VertexFineTuningAPI(VertexLLM):
|
||||||
create_fine_tuning_job_data: FineTuningJobCreate,
|
create_fine_tuning_job_data: FineTuningJobCreate,
|
||||||
vertex_project: Optional[str],
|
vertex_project: Optional[str],
|
||||||
vertex_location: Optional[str],
|
vertex_location: Optional[str],
|
||||||
vertex_credentials: Optional[str],
|
vertex_credentials: Optional[VERTEX_CREDENTIALS_TYPES],
|
||||||
api_base: Optional[str],
|
api_base: Optional[str],
|
||||||
timeout: Union[float, httpx.Timeout],
|
timeout: Union[float, httpx.Timeout],
|
||||||
kwargs: Optional[dict] = None,
|
kwargs: Optional[dict] = None,
|
||||||
|
|
|
@ -40,6 +40,7 @@ from litellm.types.llms.openai import (
|
||||||
ChatCompletionUsageBlock,
|
ChatCompletionUsageBlock,
|
||||||
)
|
)
|
||||||
from litellm.types.llms.vertex_ai import (
|
from litellm.types.llms.vertex_ai import (
|
||||||
|
VERTEX_CREDENTIALS_TYPES,
|
||||||
Candidates,
|
Candidates,
|
||||||
ContentType,
|
ContentType,
|
||||||
FunctionCallingConfig,
|
FunctionCallingConfig,
|
||||||
|
@ -930,7 +931,7 @@ class VertexLLM(VertexBase):
|
||||||
client: Optional[AsyncHTTPHandler] = None,
|
client: Optional[AsyncHTTPHandler] = None,
|
||||||
vertex_project: Optional[str] = None,
|
vertex_project: Optional[str] = None,
|
||||||
vertex_location: Optional[str] = None,
|
vertex_location: Optional[str] = None,
|
||||||
vertex_credentials: Optional[str] = None,
|
vertex_credentials: Optional[VERTEX_CREDENTIALS_TYPES] = None,
|
||||||
gemini_api_key: Optional[str] = None,
|
gemini_api_key: Optional[str] = None,
|
||||||
extra_headers: Optional[dict] = None,
|
extra_headers: Optional[dict] = None,
|
||||||
) -> CustomStreamWrapper:
|
) -> CustomStreamWrapper:
|
||||||
|
@ -1018,7 +1019,7 @@ class VertexLLM(VertexBase):
|
||||||
client: Optional[AsyncHTTPHandler] = None,
|
client: Optional[AsyncHTTPHandler] = None,
|
||||||
vertex_project: Optional[str] = None,
|
vertex_project: Optional[str] = None,
|
||||||
vertex_location: Optional[str] = None,
|
vertex_location: Optional[str] = None,
|
||||||
vertex_credentials: Optional[str] = None,
|
vertex_credentials: Optional[VERTEX_CREDENTIALS_TYPES] = None,
|
||||||
gemini_api_key: Optional[str] = None,
|
gemini_api_key: Optional[str] = None,
|
||||||
extra_headers: Optional[dict] = None,
|
extra_headers: Optional[dict] = None,
|
||||||
) -> Union[ModelResponse, CustomStreamWrapper]:
|
) -> Union[ModelResponse, CustomStreamWrapper]:
|
||||||
|
@ -1123,7 +1124,7 @@ class VertexLLM(VertexBase):
|
||||||
timeout: Optional[Union[float, httpx.Timeout]],
|
timeout: Optional[Union[float, httpx.Timeout]],
|
||||||
vertex_project: Optional[str],
|
vertex_project: Optional[str],
|
||||||
vertex_location: Optional[str],
|
vertex_location: Optional[str],
|
||||||
vertex_credentials: Optional[str],
|
vertex_credentials: Optional[VERTEX_CREDENTIALS_TYPES],
|
||||||
gemini_api_key: Optional[str],
|
gemini_api_key: Optional[str],
|
||||||
litellm_params: dict,
|
litellm_params: dict,
|
||||||
logger_fn=None,
|
logger_fn=None,
|
||||||
|
|
|
@ -11,6 +11,7 @@ from litellm.llms.custom_httpx.http_handler import (
|
||||||
get_async_httpx_client,
|
get_async_httpx_client,
|
||||||
)
|
)
|
||||||
from litellm.llms.vertex_ai.gemini.vertex_and_google_ai_studio_gemini import VertexLLM
|
from litellm.llms.vertex_ai.gemini.vertex_and_google_ai_studio_gemini import VertexLLM
|
||||||
|
from litellm.types.llms.vertex_ai import VERTEX_CREDENTIALS_TYPES
|
||||||
from litellm.types.utils import ImageResponse
|
from litellm.types.utils import ImageResponse
|
||||||
|
|
||||||
|
|
||||||
|
@ -44,7 +45,7 @@ class VertexImageGeneration(VertexLLM):
|
||||||
prompt: str,
|
prompt: str,
|
||||||
vertex_project: Optional[str],
|
vertex_project: Optional[str],
|
||||||
vertex_location: Optional[str],
|
vertex_location: Optional[str],
|
||||||
vertex_credentials: Optional[str],
|
vertex_credentials: Optional[VERTEX_CREDENTIALS_TYPES],
|
||||||
model_response: ImageResponse,
|
model_response: ImageResponse,
|
||||||
logging_obj: Any,
|
logging_obj: Any,
|
||||||
model: Optional[
|
model: Optional[
|
||||||
|
@ -139,7 +140,7 @@ class VertexImageGeneration(VertexLLM):
|
||||||
prompt: str,
|
prompt: str,
|
||||||
vertex_project: Optional[str],
|
vertex_project: Optional[str],
|
||||||
vertex_location: Optional[str],
|
vertex_location: Optional[str],
|
||||||
vertex_credentials: Optional[str],
|
vertex_credentials: Optional[VERTEX_CREDENTIALS_TYPES],
|
||||||
model_response: litellm.ImageResponse,
|
model_response: litellm.ImageResponse,
|
||||||
logging_obj: Any,
|
logging_obj: Any,
|
||||||
model: Optional[
|
model: Optional[
|
||||||
|
|
|
@ -9,6 +9,7 @@ from litellm.llms.custom_httpx.http_handler import (
|
||||||
)
|
)
|
||||||
from litellm.llms.openai.openai import HttpxBinaryResponseContent
|
from litellm.llms.openai.openai import HttpxBinaryResponseContent
|
||||||
from litellm.llms.vertex_ai.gemini.vertex_and_google_ai_studio_gemini import VertexLLM
|
from litellm.llms.vertex_ai.gemini.vertex_and_google_ai_studio_gemini import VertexLLM
|
||||||
|
from litellm.types.llms.vertex_ai import VERTEX_CREDENTIALS_TYPES
|
||||||
|
|
||||||
|
|
||||||
class VertexInput(TypedDict, total=False):
|
class VertexInput(TypedDict, total=False):
|
||||||
|
@ -45,7 +46,7 @@ class VertexTextToSpeechAPI(VertexLLM):
|
||||||
logging_obj,
|
logging_obj,
|
||||||
vertex_project: Optional[str],
|
vertex_project: Optional[str],
|
||||||
vertex_location: Optional[str],
|
vertex_location: Optional[str],
|
||||||
vertex_credentials: Optional[str],
|
vertex_credentials: Optional[VERTEX_CREDENTIALS_TYPES],
|
||||||
api_base: Optional[str],
|
api_base: Optional[str],
|
||||||
timeout: Union[float, httpx.Timeout],
|
timeout: Union[float, httpx.Timeout],
|
||||||
model: str,
|
model: str,
|
||||||
|
|
|
@ -160,7 +160,8 @@ class VertexAIPartnerModels(VertexBase):
|
||||||
url=default_api_base,
|
url=default_api_base,
|
||||||
)
|
)
|
||||||
|
|
||||||
model = model.split("@")[0]
|
if "codestral" in model or "mistral" in model:
|
||||||
|
model = model.split("@")[0]
|
||||||
|
|
||||||
if "codestral" in model and litellm_params.get("text_completion") is True:
|
if "codestral" in model and litellm_params.get("text_completion") is True:
|
||||||
optional_params["model"] = model
|
optional_params["model"] = model
|
||||||
|
|
|
@ -41,7 +41,7 @@ class VertexEmbedding(VertexBase):
|
||||||
client: Optional[Union[AsyncHTTPHandler, HTTPHandler]] = None,
|
client: Optional[Union[AsyncHTTPHandler, HTTPHandler]] = None,
|
||||||
vertex_project: Optional[str] = None,
|
vertex_project: Optional[str] = None,
|
||||||
vertex_location: Optional[str] = None,
|
vertex_location: Optional[str] = None,
|
||||||
vertex_credentials: Optional[str] = None,
|
vertex_credentials: Optional[VERTEX_CREDENTIALS_TYPES] = None,
|
||||||
gemini_api_key: Optional[str] = None,
|
gemini_api_key: Optional[str] = None,
|
||||||
extra_headers: Optional[dict] = None,
|
extra_headers: Optional[dict] = None,
|
||||||
) -> EmbeddingResponse:
|
) -> EmbeddingResponse:
|
||||||
|
@ -148,7 +148,7 @@ class VertexEmbedding(VertexBase):
|
||||||
client: Optional[AsyncHTTPHandler] = None,
|
client: Optional[AsyncHTTPHandler] = None,
|
||||||
vertex_project: Optional[str] = None,
|
vertex_project: Optional[str] = None,
|
||||||
vertex_location: Optional[str] = None,
|
vertex_location: Optional[str] = None,
|
||||||
vertex_credentials: Optional[str] = None,
|
vertex_credentials: Optional[VERTEX_CREDENTIALS_TYPES] = None,
|
||||||
gemini_api_key: Optional[str] = None,
|
gemini_api_key: Optional[str] = None,
|
||||||
extra_headers: Optional[dict] = None,
|
extra_headers: Optional[dict] = None,
|
||||||
encoding=None,
|
encoding=None,
|
||||||
|
|
|
@ -12,6 +12,7 @@ from litellm._logging import verbose_logger
|
||||||
from litellm.litellm_core_utils.asyncify import asyncify
|
from litellm.litellm_core_utils.asyncify import asyncify
|
||||||
from litellm.llms.base import BaseLLM
|
from litellm.llms.base import BaseLLM
|
||||||
from litellm.llms.custom_httpx.http_handler import AsyncHTTPHandler
|
from litellm.llms.custom_httpx.http_handler import AsyncHTTPHandler
|
||||||
|
from litellm.types.llms.vertex_ai import VERTEX_CREDENTIALS_TYPES
|
||||||
|
|
||||||
from .common_utils import _get_gemini_url, _get_vertex_url, all_gemini_url_modes
|
from .common_utils import _get_gemini_url, _get_vertex_url, all_gemini_url_modes
|
||||||
|
|
||||||
|
@ -34,7 +35,7 @@ class VertexBase(BaseLLM):
|
||||||
return vertex_region or "us-central1"
|
return vertex_region or "us-central1"
|
||||||
|
|
||||||
def load_auth(
|
def load_auth(
|
||||||
self, credentials: Optional[str], project_id: Optional[str]
|
self, credentials: Optional[VERTEX_CREDENTIALS_TYPES], project_id: Optional[str]
|
||||||
) -> Tuple[Any, str]:
|
) -> Tuple[Any, str]:
|
||||||
import google.auth as google_auth
|
import google.auth as google_auth
|
||||||
from google.auth import identity_pool
|
from google.auth import identity_pool
|
||||||
|
@ -42,29 +43,36 @@ class VertexBase(BaseLLM):
|
||||||
Request, # type: ignore[import-untyped]
|
Request, # type: ignore[import-untyped]
|
||||||
)
|
)
|
||||||
|
|
||||||
if credentials is not None and isinstance(credentials, str):
|
if credentials is not None:
|
||||||
import google.oauth2.service_account
|
import google.oauth2.service_account
|
||||||
|
|
||||||
verbose_logger.debug(
|
if isinstance(credentials, str):
|
||||||
"Vertex: Loading vertex credentials from %s", credentials
|
verbose_logger.debug(
|
||||||
)
|
"Vertex: Loading vertex credentials from %s", credentials
|
||||||
verbose_logger.debug(
|
)
|
||||||
"Vertex: checking if credentials is a valid path, os.path.exists(%s)=%s, current dir %s",
|
verbose_logger.debug(
|
||||||
credentials,
|
"Vertex: checking if credentials is a valid path, os.path.exists(%s)=%s, current dir %s",
|
||||||
os.path.exists(credentials),
|
credentials,
|
||||||
os.getcwd(),
|
os.path.exists(credentials),
|
||||||
)
|
os.getcwd(),
|
||||||
|
)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
if os.path.exists(credentials):
|
if os.path.exists(credentials):
|
||||||
json_obj = json.load(open(credentials))
|
json_obj = json.load(open(credentials))
|
||||||
else:
|
else:
|
||||||
json_obj = json.loads(credentials)
|
json_obj = json.loads(credentials)
|
||||||
except Exception:
|
except Exception:
|
||||||
raise Exception(
|
raise Exception(
|
||||||
"Unable to load vertex credentials from environment. Got={}".format(
|
"Unable to load vertex credentials from environment. Got={}".format(
|
||||||
credentials
|
credentials
|
||||||
|
)
|
||||||
)
|
)
|
||||||
|
elif isinstance(credentials, dict):
|
||||||
|
json_obj = credentials
|
||||||
|
else:
|
||||||
|
raise ValueError(
|
||||||
|
"Invalid credentials type: {}".format(type(credentials))
|
||||||
)
|
)
|
||||||
|
|
||||||
# Check if the JSON object contains Workload Identity Federation configuration
|
# Check if the JSON object contains Workload Identity Federation configuration
|
||||||
|
@ -109,7 +117,7 @@ class VertexBase(BaseLLM):
|
||||||
|
|
||||||
def _ensure_access_token(
|
def _ensure_access_token(
|
||||||
self,
|
self,
|
||||||
credentials: Optional[str],
|
credentials: Optional[VERTEX_CREDENTIALS_TYPES],
|
||||||
project_id: Optional[str],
|
project_id: Optional[str],
|
||||||
custom_llm_provider: Literal[
|
custom_llm_provider: Literal[
|
||||||
"vertex_ai", "vertex_ai_beta", "gemini"
|
"vertex_ai", "vertex_ai_beta", "gemini"
|
||||||
|
@ -202,7 +210,7 @@ class VertexBase(BaseLLM):
|
||||||
gemini_api_key: Optional[str],
|
gemini_api_key: Optional[str],
|
||||||
vertex_project: Optional[str],
|
vertex_project: Optional[str],
|
||||||
vertex_location: Optional[str],
|
vertex_location: Optional[str],
|
||||||
vertex_credentials: Optional[str],
|
vertex_credentials: Optional[VERTEX_CREDENTIALS_TYPES],
|
||||||
stream: Optional[bool],
|
stream: Optional[bool],
|
||||||
custom_llm_provider: Literal["vertex_ai", "vertex_ai_beta", "gemini"],
|
custom_llm_provider: Literal["vertex_ai", "vertex_ai_beta", "gemini"],
|
||||||
api_base: Optional[str],
|
api_base: Optional[str],
|
||||||
|
@ -253,7 +261,7 @@ class VertexBase(BaseLLM):
|
||||||
|
|
||||||
async def _ensure_access_token_async(
|
async def _ensure_access_token_async(
|
||||||
self,
|
self,
|
||||||
credentials: Optional[str],
|
credentials: Optional[VERTEX_CREDENTIALS_TYPES],
|
||||||
project_id: Optional[str],
|
project_id: Optional[str],
|
||||||
custom_llm_provider: Literal[
|
custom_llm_provider: Literal[
|
||||||
"vertex_ai", "vertex_ai_beta", "gemini"
|
"vertex_ai", "vertex_ai_beta", "gemini"
|
||||||
|
|
|
@ -80,7 +80,7 @@ class IBMWatsonXChatConfig(IBMWatsonXMixin, OpenAIGPTConfig):
|
||||||
|
|
||||||
def get_complete_url(
|
def get_complete_url(
|
||||||
self,
|
self,
|
||||||
api_base: str,
|
api_base: Optional[str],
|
||||||
model: str,
|
model: str,
|
||||||
optional_params: dict,
|
optional_params: dict,
|
||||||
stream: Optional[bool] = None,
|
stream: Optional[bool] = None,
|
||||||
|
|
|
@ -315,7 +315,7 @@ class IBMWatsonXAIConfig(IBMWatsonXMixin, BaseConfig):
|
||||||
|
|
||||||
def get_complete_url(
|
def get_complete_url(
|
||||||
self,
|
self,
|
||||||
api_base: str,
|
api_base: Optional[str],
|
||||||
model: str,
|
model: str,
|
||||||
optional_params: dict,
|
optional_params: dict,
|
||||||
stream: Optional[bool] = None,
|
stream: Optional[bool] = None,
|
||||||
|
|
|
@ -94,7 +94,7 @@ from litellm.utils import (
|
||||||
read_config_args,
|
read_config_args,
|
||||||
supports_httpx_timeout,
|
supports_httpx_timeout,
|
||||||
token_counter,
|
token_counter,
|
||||||
validate_chat_completion_messages,
|
validate_and_fix_openai_messages,
|
||||||
validate_chat_completion_tool_choice,
|
validate_chat_completion_tool_choice,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@ -166,6 +166,7 @@ from .llms.vertex_ai.vertex_model_garden.main import VertexAIModelGardenModels
|
||||||
from .llms.vllm.completion import handler as vllm_handler
|
from .llms.vllm.completion import handler as vllm_handler
|
||||||
from .llms.watsonx.chat.handler import WatsonXChatHandler
|
from .llms.watsonx.chat.handler import WatsonXChatHandler
|
||||||
from .llms.watsonx.common_utils import IBMWatsonXMixin
|
from .llms.watsonx.common_utils import IBMWatsonXMixin
|
||||||
|
from .types.llms.anthropic import AnthropicThinkingParam
|
||||||
from .types.llms.openai import (
|
from .types.llms.openai import (
|
||||||
ChatCompletionAssistantMessage,
|
ChatCompletionAssistantMessage,
|
||||||
ChatCompletionAudioParam,
|
ChatCompletionAudioParam,
|
||||||
|
@ -341,6 +342,7 @@ async def acompletion(
|
||||||
model_list: Optional[list] = None, # pass in a list of api_base,keys, etc.
|
model_list: Optional[list] = None, # pass in a list of api_base,keys, etc.
|
||||||
extra_headers: Optional[dict] = None,
|
extra_headers: Optional[dict] = None,
|
||||||
# Optional liteLLM function params
|
# Optional liteLLM function params
|
||||||
|
thinking: Optional[AnthropicThinkingParam] = None,
|
||||||
**kwargs,
|
**kwargs,
|
||||||
) -> Union[ModelResponse, CustomStreamWrapper]:
|
) -> Union[ModelResponse, CustomStreamWrapper]:
|
||||||
"""
|
"""
|
||||||
|
@ -431,6 +433,7 @@ async def acompletion(
|
||||||
"reasoning_effort": reasoning_effort,
|
"reasoning_effort": reasoning_effort,
|
||||||
"extra_headers": extra_headers,
|
"extra_headers": extra_headers,
|
||||||
"acompletion": True, # assuming this is a required parameter
|
"acompletion": True, # assuming this is a required parameter
|
||||||
|
"thinking": thinking,
|
||||||
}
|
}
|
||||||
if custom_llm_provider is None:
|
if custom_llm_provider is None:
|
||||||
_, custom_llm_provider, _, _ = get_llm_provider(
|
_, custom_llm_provider, _, _ = get_llm_provider(
|
||||||
|
@ -800,6 +803,7 @@ def completion( # type: ignore # noqa: PLR0915
|
||||||
api_key: Optional[str] = None,
|
api_key: Optional[str] = None,
|
||||||
model_list: Optional[list] = None, # pass in a list of api_base,keys, etc.
|
model_list: Optional[list] = None, # pass in a list of api_base,keys, etc.
|
||||||
# Optional liteLLM function params
|
# Optional liteLLM function params
|
||||||
|
thinking: Optional[AnthropicThinkingParam] = None,
|
||||||
**kwargs,
|
**kwargs,
|
||||||
) -> Union[ModelResponse, CustomStreamWrapper]:
|
) -> Union[ModelResponse, CustomStreamWrapper]:
|
||||||
"""
|
"""
|
||||||
|
@ -851,7 +855,7 @@ def completion( # type: ignore # noqa: PLR0915
|
||||||
if model is None:
|
if model is None:
|
||||||
raise ValueError("model param not passed in.")
|
raise ValueError("model param not passed in.")
|
||||||
# validate messages
|
# validate messages
|
||||||
messages = validate_chat_completion_messages(messages=messages)
|
messages = validate_and_fix_openai_messages(messages=messages)
|
||||||
# validate tool_choice
|
# validate tool_choice
|
||||||
tool_choice = validate_chat_completion_tool_choice(tool_choice=tool_choice)
|
tool_choice = validate_chat_completion_tool_choice(tool_choice=tool_choice)
|
||||||
######### unpacking kwargs #####################
|
######### unpacking kwargs #####################
|
||||||
|
@ -1106,6 +1110,7 @@ def completion( # type: ignore # noqa: PLR0915
|
||||||
parallel_tool_calls=parallel_tool_calls,
|
parallel_tool_calls=parallel_tool_calls,
|
||||||
messages=messages,
|
messages=messages,
|
||||||
reasoning_effort=reasoning_effort,
|
reasoning_effort=reasoning_effort,
|
||||||
|
thinking=thinking,
|
||||||
**non_default_params,
|
**non_default_params,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@ -3409,6 +3414,7 @@ def embedding( # noqa: PLR0915
|
||||||
or custom_llm_provider == "openai"
|
or custom_llm_provider == "openai"
|
||||||
or custom_llm_provider == "together_ai"
|
or custom_llm_provider == "together_ai"
|
||||||
or custom_llm_provider == "nvidia_nim"
|
or custom_llm_provider == "nvidia_nim"
|
||||||
|
or custom_llm_provider == "litellm_proxy"
|
||||||
):
|
):
|
||||||
api_base = (
|
api_base = (
|
||||||
api_base
|
api_base
|
||||||
|
@ -3485,7 +3491,8 @@ def embedding( # noqa: PLR0915
|
||||||
# set API KEY
|
# set API KEY
|
||||||
if api_key is None:
|
if api_key is None:
|
||||||
api_key = (
|
api_key = (
|
||||||
litellm.api_key
|
api_key
|
||||||
|
or litellm.api_key
|
||||||
or litellm.openai_like_key
|
or litellm.openai_like_key
|
||||||
or get_secret_str("OPENAI_LIKE_API_KEY")
|
or get_secret_str("OPENAI_LIKE_API_KEY")
|
||||||
)
|
)
|
||||||
|
@ -4596,7 +4603,10 @@ def image_generation( # noqa: PLR0915
|
||||||
client=client,
|
client=client,
|
||||||
headers=headers,
|
headers=headers,
|
||||||
)
|
)
|
||||||
elif custom_llm_provider == "openai":
|
elif (
|
||||||
|
custom_llm_provider == "openai"
|
||||||
|
or custom_llm_provider in litellm.openai_compatible_providers
|
||||||
|
):
|
||||||
model_response = openai_chat_completions.image_generation(
|
model_response = openai_chat_completions.image_generation(
|
||||||
model=model,
|
model=model,
|
||||||
prompt=prompt,
|
prompt=prompt,
|
||||||
|
@ -5042,8 +5052,7 @@ def transcription(
|
||||||
)
|
)
|
||||||
elif (
|
elif (
|
||||||
custom_llm_provider == "openai"
|
custom_llm_provider == "openai"
|
||||||
or custom_llm_provider == "groq"
|
or custom_llm_provider in litellm.openai_compatible_providers
|
||||||
or custom_llm_provider == "fireworks_ai"
|
|
||||||
):
|
):
|
||||||
api_base = (
|
api_base = (
|
||||||
api_base
|
api_base
|
||||||
|
@ -5201,7 +5210,10 @@ def speech(
|
||||||
custom_llm_provider=custom_llm_provider,
|
custom_llm_provider=custom_llm_provider,
|
||||||
)
|
)
|
||||||
response: Optional[HttpxBinaryResponseContent] = None
|
response: Optional[HttpxBinaryResponseContent] = None
|
||||||
if custom_llm_provider == "openai":
|
if (
|
||||||
|
custom_llm_provider == "openai"
|
||||||
|
or custom_llm_provider in litellm.openai_compatible_providers
|
||||||
|
):
|
||||||
if voice is None or not (isinstance(voice, str)):
|
if voice is None or not (isinstance(voice, str)):
|
||||||
raise litellm.BadRequestError(
|
raise litellm.BadRequestError(
|
||||||
message="'voice' is required to be passed as a string for OpenAI TTS",
|
message="'voice' is required to be passed as a string for OpenAI TTS",
|
||||||
|
|
|
@ -76,6 +76,44 @@
|
||||||
"supports_system_messages": true,
|
"supports_system_messages": true,
|
||||||
"supports_tool_choice": true
|
"supports_tool_choice": true
|
||||||
},
|
},
|
||||||
|
"gpt-4.5-preview": {
|
||||||
|
"max_tokens": 16384,
|
||||||
|
"max_input_tokens": 128000,
|
||||||
|
"max_output_tokens": 16384,
|
||||||
|
"input_cost_per_token": 0.000075,
|
||||||
|
"output_cost_per_token": 0.00015,
|
||||||
|
"input_cost_per_token_batches": 0.0000375,
|
||||||
|
"output_cost_per_token_batches": 0.000075,
|
||||||
|
"cache_read_input_token_cost": 0.0000375,
|
||||||
|
"litellm_provider": "openai",
|
||||||
|
"mode": "chat",
|
||||||
|
"supports_function_calling": true,
|
||||||
|
"supports_parallel_function_calling": true,
|
||||||
|
"supports_response_schema": true,
|
||||||
|
"supports_vision": true,
|
||||||
|
"supports_prompt_caching": true,
|
||||||
|
"supports_system_messages": true,
|
||||||
|
"supports_tool_choice": true
|
||||||
|
},
|
||||||
|
"gpt-4.5-preview-2025-02-27": {
|
||||||
|
"max_tokens": 16384,
|
||||||
|
"max_input_tokens": 128000,
|
||||||
|
"max_output_tokens": 16384,
|
||||||
|
"input_cost_per_token": 0.000075,
|
||||||
|
"output_cost_per_token": 0.00015,
|
||||||
|
"input_cost_per_token_batches": 0.0000375,
|
||||||
|
"output_cost_per_token_batches": 0.000075,
|
||||||
|
"cache_read_input_token_cost": 0.0000375,
|
||||||
|
"litellm_provider": "openai",
|
||||||
|
"mode": "chat",
|
||||||
|
"supports_function_calling": true,
|
||||||
|
"supports_parallel_function_calling": true,
|
||||||
|
"supports_response_schema": true,
|
||||||
|
"supports_vision": true,
|
||||||
|
"supports_prompt_caching": true,
|
||||||
|
"supports_system_messages": true,
|
||||||
|
"supports_tool_choice": true
|
||||||
|
},
|
||||||
"gpt-4o-audio-preview": {
|
"gpt-4o-audio-preview": {
|
||||||
"max_tokens": 16384,
|
"max_tokens": 16384,
|
||||||
"max_input_tokens": 128000,
|
"max_input_tokens": 128000,
|
||||||
|
@ -1409,7 +1447,7 @@
|
||||||
"mode": "chat",
|
"mode": "chat",
|
||||||
"supports_function_calling": true,
|
"supports_function_calling": true,
|
||||||
"supports_parallel_function_calling": true,
|
"supports_parallel_function_calling": true,
|
||||||
"deprecation_date": "2025-03-31",
|
"deprecation_date": "2025-05-31",
|
||||||
"supports_tool_choice": true
|
"supports_tool_choice": true
|
||||||
},
|
},
|
||||||
"azure/gpt-3.5-turbo-0125": {
|
"azure/gpt-3.5-turbo-0125": {
|
||||||
|
@ -1732,6 +1770,19 @@
|
||||||
"source":"https://azuremarketplace.microsoft.com/en-us/marketplace/apps/metagenai.meta-llama-3-1-405b-instruct-offer?tab=PlansAndPrice",
|
"source":"https://azuremarketplace.microsoft.com/en-us/marketplace/apps/metagenai.meta-llama-3-1-405b-instruct-offer?tab=PlansAndPrice",
|
||||||
"supports_tool_choice": true
|
"supports_tool_choice": true
|
||||||
},
|
},
|
||||||
|
"azure_ai/Phi-4": {
|
||||||
|
"max_tokens": 4096,
|
||||||
|
"max_input_tokens": 128000,
|
||||||
|
"max_output_tokens": 4096,
|
||||||
|
"input_cost_per_token": 0.000000125,
|
||||||
|
"output_cost_per_token": 0.0000005,
|
||||||
|
"litellm_provider": "azure_ai",
|
||||||
|
"mode": "chat",
|
||||||
|
"supports_vision": false,
|
||||||
|
"source": "https://techcommunity.microsoft.com/blog/machinelearningblog/affordable-innovation-unveiling-the-pricing-of-phi-3-slms-on-models-as-a-service/4156495",
|
||||||
|
"supports_function_calling": true,
|
||||||
|
"supports_tool_choice": true
|
||||||
|
},
|
||||||
"azure_ai/Phi-3.5-mini-instruct": {
|
"azure_ai/Phi-3.5-mini-instruct": {
|
||||||
"max_tokens": 4096,
|
"max_tokens": 4096,
|
||||||
"max_input_tokens": 128000,
|
"max_input_tokens": 128000,
|
||||||
|
@ -2731,6 +2782,25 @@
|
||||||
"supports_tool_choice": true
|
"supports_tool_choice": true
|
||||||
},
|
},
|
||||||
"claude-3-5-haiku-20241022": {
|
"claude-3-5-haiku-20241022": {
|
||||||
|
"max_tokens": 8192,
|
||||||
|
"max_input_tokens": 200000,
|
||||||
|
"max_output_tokens": 8192,
|
||||||
|
"input_cost_per_token": 0.0000008,
|
||||||
|
"output_cost_per_token": 0.000004,
|
||||||
|
"cache_creation_input_token_cost": 0.000001,
|
||||||
|
"cache_read_input_token_cost": 0.0000008,
|
||||||
|
"litellm_provider": "anthropic",
|
||||||
|
"mode": "chat",
|
||||||
|
"supports_function_calling": true,
|
||||||
|
"supports_vision": true,
|
||||||
|
"tool_use_system_prompt_tokens": 264,
|
||||||
|
"supports_assistant_prefill": true,
|
||||||
|
"supports_prompt_caching": true,
|
||||||
|
"supports_response_schema": true,
|
||||||
|
"deprecation_date": "2025-10-01",
|
||||||
|
"supports_tool_choice": true
|
||||||
|
},
|
||||||
|
"claude-3-5-haiku-latest": {
|
||||||
"max_tokens": 8192,
|
"max_tokens": 8192,
|
||||||
"max_input_tokens": 200000,
|
"max_input_tokens": 200000,
|
||||||
"max_output_tokens": 8192,
|
"max_output_tokens": 8192,
|
||||||
|
@ -2741,6 +2811,7 @@
|
||||||
"litellm_provider": "anthropic",
|
"litellm_provider": "anthropic",
|
||||||
"mode": "chat",
|
"mode": "chat",
|
||||||
"supports_function_calling": true,
|
"supports_function_calling": true,
|
||||||
|
"supports_vision": true,
|
||||||
"tool_use_system_prompt_tokens": 264,
|
"tool_use_system_prompt_tokens": 264,
|
||||||
"supports_assistant_prefill": true,
|
"supports_assistant_prefill": true,
|
||||||
"supports_prompt_caching": true,
|
"supports_prompt_caching": true,
|
||||||
|
@ -2748,6 +2819,25 @@
|
||||||
"deprecation_date": "2025-10-01",
|
"deprecation_date": "2025-10-01",
|
||||||
"supports_tool_choice": true
|
"supports_tool_choice": true
|
||||||
},
|
},
|
||||||
|
"claude-3-opus-latest": {
|
||||||
|
"max_tokens": 4096,
|
||||||
|
"max_input_tokens": 200000,
|
||||||
|
"max_output_tokens": 4096,
|
||||||
|
"input_cost_per_token": 0.000015,
|
||||||
|
"output_cost_per_token": 0.000075,
|
||||||
|
"cache_creation_input_token_cost": 0.00001875,
|
||||||
|
"cache_read_input_token_cost": 0.0000015,
|
||||||
|
"litellm_provider": "anthropic",
|
||||||
|
"mode": "chat",
|
||||||
|
"supports_function_calling": true,
|
||||||
|
"supports_vision": true,
|
||||||
|
"tool_use_system_prompt_tokens": 395,
|
||||||
|
"supports_assistant_prefill": true,
|
||||||
|
"supports_prompt_caching": true,
|
||||||
|
"supports_response_schema": true,
|
||||||
|
"deprecation_date": "2025-03-01",
|
||||||
|
"supports_tool_choice": true
|
||||||
|
},
|
||||||
"claude-3-opus-20240229": {
|
"claude-3-opus-20240229": {
|
||||||
"max_tokens": 4096,
|
"max_tokens": 4096,
|
||||||
"max_input_tokens": 200000,
|
"max_input_tokens": 200000,
|
||||||
|
@ -2784,6 +2874,25 @@
|
||||||
"deprecation_date": "2025-07-21",
|
"deprecation_date": "2025-07-21",
|
||||||
"supports_tool_choice": true
|
"supports_tool_choice": true
|
||||||
},
|
},
|
||||||
|
"claude-3-5-sonnet-latest": {
|
||||||
|
"max_tokens": 8192,
|
||||||
|
"max_input_tokens": 200000,
|
||||||
|
"max_output_tokens": 8192,
|
||||||
|
"input_cost_per_token": 0.000003,
|
||||||
|
"output_cost_per_token": 0.000015,
|
||||||
|
"cache_creation_input_token_cost": 0.00000375,
|
||||||
|
"cache_read_input_token_cost": 0.0000003,
|
||||||
|
"litellm_provider": "anthropic",
|
||||||
|
"mode": "chat",
|
||||||
|
"supports_function_calling": true,
|
||||||
|
"supports_vision": true,
|
||||||
|
"tool_use_system_prompt_tokens": 159,
|
||||||
|
"supports_assistant_prefill": true,
|
||||||
|
"supports_prompt_caching": true,
|
||||||
|
"supports_response_schema": true,
|
||||||
|
"deprecation_date": "2025-06-01",
|
||||||
|
"supports_tool_choice": true
|
||||||
|
},
|
||||||
"claude-3-5-sonnet-20240620": {
|
"claude-3-5-sonnet-20240620": {
|
||||||
"max_tokens": 8192,
|
"max_tokens": 8192,
|
||||||
"max_input_tokens": 200000,
|
"max_input_tokens": 200000,
|
||||||
|
@ -2803,6 +2912,25 @@
|
||||||
"deprecation_date": "2025-06-01",
|
"deprecation_date": "2025-06-01",
|
||||||
"supports_tool_choice": true
|
"supports_tool_choice": true
|
||||||
},
|
},
|
||||||
|
"claude-3-7-sonnet-latest": {
|
||||||
|
"max_tokens": 8192,
|
||||||
|
"max_input_tokens": 200000,
|
||||||
|
"max_output_tokens": 8192,
|
||||||
|
"input_cost_per_token": 0.000003,
|
||||||
|
"output_cost_per_token": 0.000015,
|
||||||
|
"cache_creation_input_token_cost": 0.00000375,
|
||||||
|
"cache_read_input_token_cost": 0.0000003,
|
||||||
|
"litellm_provider": "anthropic",
|
||||||
|
"mode": "chat",
|
||||||
|
"supports_function_calling": true,
|
||||||
|
"supports_vision": true,
|
||||||
|
"tool_use_system_prompt_tokens": 159,
|
||||||
|
"supports_assistant_prefill": true,
|
||||||
|
"supports_prompt_caching": true,
|
||||||
|
"supports_response_schema": true,
|
||||||
|
"deprecation_date": "2025-06-01",
|
||||||
|
"supports_tool_choice": true
|
||||||
|
},
|
||||||
"claude-3-7-sonnet-20250219": {
|
"claude-3-7-sonnet-20250219": {
|
||||||
"max_tokens": 8192,
|
"max_tokens": 8192,
|
||||||
"max_input_tokens": 200000,
|
"max_input_tokens": 200000,
|
||||||
|
@ -2819,7 +2947,7 @@
|
||||||
"supports_assistant_prefill": true,
|
"supports_assistant_prefill": true,
|
||||||
"supports_prompt_caching": true,
|
"supports_prompt_caching": true,
|
||||||
"supports_response_schema": true,
|
"supports_response_schema": true,
|
||||||
"deprecation_date": "2025-06-01",
|
"deprecation_date": "2026-02-01",
|
||||||
"supports_tool_choice": true
|
"supports_tool_choice": true
|
||||||
},
|
},
|
||||||
"claude-3-5-sonnet-20241022": {
|
"claude-3-5-sonnet-20241022": {
|
||||||
|
@ -4074,7 +4202,7 @@
|
||||||
"supports_assistant_prefill": true,
|
"supports_assistant_prefill": true,
|
||||||
"supports_tool_choice": true
|
"supports_tool_choice": true
|
||||||
},
|
},
|
||||||
"vertex_ai/claude-3-7-sonnet-20250219": {
|
"vertex_ai/claude-3-7-sonnet@20250219": {
|
||||||
"max_tokens": 8192,
|
"max_tokens": 8192,
|
||||||
"max_input_tokens": 200000,
|
"max_input_tokens": 200000,
|
||||||
"max_output_tokens": 8192,
|
"max_output_tokens": 8192,
|
||||||
|
@ -5495,6 +5623,35 @@
|
||||||
"tool_use_system_prompt_tokens": 159,
|
"tool_use_system_prompt_tokens": 159,
|
||||||
"supports_tool_choice": true
|
"supports_tool_choice": true
|
||||||
},
|
},
|
||||||
|
"openrouter/anthropic/claude-3.7-sonnet": {
|
||||||
|
"max_tokens": 8192,
|
||||||
|
"max_input_tokens": 200000,
|
||||||
|
"max_output_tokens": 8192,
|
||||||
|
"input_cost_per_token": 0.000003,
|
||||||
|
"output_cost_per_token": 0.000015,
|
||||||
|
"input_cost_per_image": 0.0048,
|
||||||
|
"litellm_provider": "openrouter",
|
||||||
|
"mode": "chat",
|
||||||
|
"supports_function_calling": true,
|
||||||
|
"supports_vision": true,
|
||||||
|
"tool_use_system_prompt_tokens": 159,
|
||||||
|
"supports_assistant_prefill": true,
|
||||||
|
"supports_tool_choice": true
|
||||||
|
},
|
||||||
|
"openrouter/anthropic/claude-3.7-sonnet:beta": {
|
||||||
|
"max_tokens": 8192,
|
||||||
|
"max_input_tokens": 200000,
|
||||||
|
"max_output_tokens": 8192,
|
||||||
|
"input_cost_per_token": 0.000003,
|
||||||
|
"output_cost_per_token": 0.000015,
|
||||||
|
"input_cost_per_image": 0.0048,
|
||||||
|
"litellm_provider": "openrouter",
|
||||||
|
"mode": "chat",
|
||||||
|
"supports_function_calling": true,
|
||||||
|
"supports_vision": true,
|
||||||
|
"tool_use_system_prompt_tokens": 159,
|
||||||
|
"supports_tool_choice": true
|
||||||
|
},
|
||||||
"openrouter/anthropic/claude-3-sonnet": {
|
"openrouter/anthropic/claude-3-sonnet": {
|
||||||
"max_tokens": 200000,
|
"max_tokens": 200000,
|
||||||
"input_cost_per_token": 0.000003,
|
"input_cost_per_token": 0.000003,
|
||||||
|
@ -6468,6 +6625,21 @@
|
||||||
"supports_response_schema": true,
|
"supports_response_schema": true,
|
||||||
"supports_tool_choice": true
|
"supports_tool_choice": true
|
||||||
},
|
},
|
||||||
|
"us.anthropic.claude-3-7-sonnet-20250219-v1:0": {
|
||||||
|
"max_tokens": 8192,
|
||||||
|
"max_input_tokens": 200000,
|
||||||
|
"max_output_tokens": 8192,
|
||||||
|
"input_cost_per_token": 0.000003,
|
||||||
|
"output_cost_per_token": 0.000015,
|
||||||
|
"litellm_provider": "bedrock_converse",
|
||||||
|
"mode": "chat",
|
||||||
|
"supports_function_calling": true,
|
||||||
|
"supports_vision": true,
|
||||||
|
"supports_assistant_prefill": true,
|
||||||
|
"supports_prompt_caching": true,
|
||||||
|
"supports_response_schema": true,
|
||||||
|
"supports_tool_choice": true
|
||||||
|
},
|
||||||
"us.anthropic.claude-3-haiku-20240307-v1:0": {
|
"us.anthropic.claude-3-haiku-20240307-v1:0": {
|
||||||
"max_tokens": 4096,
|
"max_tokens": 4096,
|
||||||
"max_input_tokens": 200000,
|
"max_input_tokens": 200000,
|
||||||
|
|
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
|
@ -1 +1 @@
|
||||||
<!DOCTYPE html><html id="__next_error__"><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width, initial-scale=1"/><link rel="preload" as="script" fetchPriority="low" href="/ui/_next/static/chunks/webpack-75a5453f51d60261.js"/><script src="/ui/_next/static/chunks/fd9d1056-524b80e1a6b8bb06.js" async=""></script><script src="/ui/_next/static/chunks/117-883150efc583d711.js" async=""></script><script src="/ui/_next/static/chunks/main-app-475d6efe4080647d.js" async=""></script><title>LiteLLM Dashboard</title><meta name="description" content="LiteLLM Proxy Admin UI"/><link rel="icon" href="/ui/favicon.ico" type="image/x-icon" sizes="16x16"/><meta name="next-size-adjust"/><script src="/ui/_next/static/chunks/polyfills-42372ed130431b0a.js" noModule=""></script></head><body><script src="/ui/_next/static/chunks/webpack-75a5453f51d60261.js" async=""></script><script>(self.__next_f=self.__next_f||[]).push([0]);self.__next_f.push([2,null])</script><script>self.__next_f.push([1,"1:HL[\"/ui/_next/static/media/a34f9d1faa5f3315-s.p.woff2\",\"font\",{\"crossOrigin\":\"\",\"type\":\"font/woff2\"}]\n2:HL[\"/ui/_next/static/css/86f6cc749f6b8493.css\",\"style\"]\n3:HL[\"/ui/_next/static/css/2a6af5dc23d92a9a.css\",\"style\"]\n"])</script><script>self.__next_f.push([1,"4:I[12846,[],\"\"]\n6:I[19107,[],\"ClientPageRoot\"]\n7:I[35319,[\"665\",\"static/chunks/3014691f-0b72c78cfebbd712.js\",\"990\",\"static/chunks/13b76428-ebdf3012af0e4489.js\",\"441\",\"static/chunks/441-79926bf2b9d89e04.js\",\"261\",\"static/chunks/261-cb27c20c4f8ec4c6.js\",\"899\",\"static/chunks/899-354f59ecde307dfa.js\",\"678\",\"static/chunks/678-58bcfc3337902198.js\",\"250\",\"static/chunks/250-fd088aaa064b7d46.js\",\"699\",\"static/chunks/699-a194d60126b95923.js\",\"931\",\"static/chunks/app/page-84c68f24f2d4d77b.js\"],\"default\",1]\n8:I[4707,[],\"\"]\n9:I[36423,[],\"\"]\nb:I[61060,[],\"\"]\nc:[]\n"])</script><script>self.__next_f.push([1,"0:[\"$\",\"$L4\",null,{\"buildId\":\"Z74g7wOKfx1z1d_BuB0ip\",\"assetPrefix\":\"/ui\",\"urlParts\":[\"\",\"\"],\"initialTree\":[\"\",{\"children\":[\"__PAGE__\",{}]},\"$undefined\",\"$undefined\",true],\"initialSeedData\":[\"\",{\"children\":[\"__PAGE__\",{},[[\"$L5\",[\"$\",\"$L6\",null,{\"props\":{\"params\":{},\"searchParams\":{}},\"Component\":\"$7\"}],null],null],null]},[[[[\"$\",\"link\",\"0\",{\"rel\":\"stylesheet\",\"href\":\"/ui/_next/static/css/86f6cc749f6b8493.css\",\"precedence\":\"next\",\"crossOrigin\":\"$undefined\"}],[\"$\",\"link\",\"1\",{\"rel\":\"stylesheet\",\"href\":\"/ui/_next/static/css/2a6af5dc23d92a9a.css\",\"precedence\":\"next\",\"crossOrigin\":\"$undefined\"}]],[\"$\",\"html\",null,{\"lang\":\"en\",\"children\":[\"$\",\"body\",null,{\"className\":\"__className_cf7686\",\"children\":[\"$\",\"$L8\",null,{\"parallelRouterKey\":\"children\",\"segmentPath\":[\"children\"],\"error\":\"$undefined\",\"errorStyles\":\"$undefined\",\"errorScripts\":\"$undefined\",\"template\":[\"$\",\"$L9\",null,{}],\"templateStyles\":\"$undefined\",\"templateScripts\":\"$undefined\",\"notFound\":[[\"$\",\"title\",null,{\"children\":\"404: This page could not be found.\"}],[\"$\",\"div\",null,{\"style\":{\"fontFamily\":\"system-ui,\\\"Segoe UI\\\",Roboto,Helvetica,Arial,sans-serif,\\\"Apple Color Emoji\\\",\\\"Segoe UI Emoji\\\"\",\"height\":\"100vh\",\"textAlign\":\"center\",\"display\":\"flex\",\"flexDirection\":\"column\",\"alignItems\":\"center\",\"justifyContent\":\"center\"},\"children\":[\"$\",\"div\",null,{\"children\":[[\"$\",\"style\",null,{\"dangerouslySetInnerHTML\":{\"__html\":\"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}\"}}],[\"$\",\"h1\",null,{\"className\":\"next-error-h1\",\"style\":{\"display\":\"inline-block\",\"margin\":\"0 20px 0 0\",\"padding\":\"0 23px 0 0\",\"fontSize\":24,\"fontWeight\":500,\"verticalAlign\":\"top\",\"lineHeight\":\"49px\"},\"children\":\"404\"}],[\"$\",\"div\",null,{\"style\":{\"display\":\"inline-block\"},\"children\":[\"$\",\"h2\",null,{\"style\":{\"fontSize\":14,\"fontWeight\":400,\"lineHeight\":\"49px\",\"margin\":0},\"children\":\"This page could not be found.\"}]}]]}]}]],\"notFoundStyles\":[]}]}]}]],null],null],\"couldBeIntercepted\":false,\"initialHead\":[null,\"$La\"],\"globalErrorComponent\":\"$b\",\"missingSlots\":\"$Wc\"}]\n"])</script><script>self.__next_f.push([1,"a:[[\"$\",\"meta\",\"0\",{\"name\":\"viewport\",\"content\":\"width=device-width, initial-scale=1\"}],[\"$\",\"meta\",\"1\",{\"charSet\":\"utf-8\"}],[\"$\",\"title\",\"2\",{\"children\":\"LiteLLM Dashboard\"}],[\"$\",\"meta\",\"3\",{\"name\":\"description\",\"content\":\"LiteLLM Proxy Admin UI\"}],[\"$\",\"link\",\"4\",{\"rel\":\"icon\",\"href\":\"/ui/favicon.ico\",\"type\":\"image/x-icon\",\"sizes\":\"16x16\"}],[\"$\",\"meta\",\"5\",{\"name\":\"next-size-adjust\"}]]\n5:null\n"])</script></body></html>
|
<!DOCTYPE html><html id="__next_error__"><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width, initial-scale=1"/><link rel="preload" as="script" fetchPriority="low" href="/ui/_next/static/chunks/webpack-75a5453f51d60261.js"/><script src="/ui/_next/static/chunks/fd9d1056-524b80e1a6b8bb06.js" async=""></script><script src="/ui/_next/static/chunks/117-883150efc583d711.js" async=""></script><script src="/ui/_next/static/chunks/main-app-475d6efe4080647d.js" async=""></script><title>LiteLLM Dashboard</title><meta name="description" content="LiteLLM Proxy Admin UI"/><link rel="icon" href="/ui/favicon.ico" type="image/x-icon" sizes="16x16"/><meta name="next-size-adjust"/><script src="/ui/_next/static/chunks/polyfills-42372ed130431b0a.js" noModule=""></script></head><body><script src="/ui/_next/static/chunks/webpack-75a5453f51d60261.js" async=""></script><script>(self.__next_f=self.__next_f||[]).push([0]);self.__next_f.push([2,null])</script><script>self.__next_f.push([1,"1:HL[\"/ui/_next/static/media/a34f9d1faa5f3315-s.p.woff2\",\"font\",{\"crossOrigin\":\"\",\"type\":\"font/woff2\"}]\n2:HL[\"/ui/_next/static/css/86f6cc749f6b8493.css\",\"style\"]\n3:HL[\"/ui/_next/static/css/f41c66e22715ab00.css\",\"style\"]\n"])</script><script>self.__next_f.push([1,"4:I[12846,[],\"\"]\n6:I[19107,[],\"ClientPageRoot\"]\n7:I[89076,[\"665\",\"static/chunks/3014691f-0b72c78cfebbd712.js\",\"990\",\"static/chunks/13b76428-ebdf3012af0e4489.js\",\"441\",\"static/chunks/441-79926bf2b9d89e04.js\",\"261\",\"static/chunks/261-cb27c20c4f8ec4c6.js\",\"899\",\"static/chunks/899-354f59ecde307dfa.js\",\"914\",\"static/chunks/914-000d10374f86fc1a.js\",\"250\",\"static/chunks/250-8b26aa68cd90cbb2.js\",\"699\",\"static/chunks/699-6b82f8e7b98ca1a3.js\",\"931\",\"static/chunks/app/page-fbe63e2a496641d2.js\"],\"default\",1]\n8:I[4707,[],\"\"]\n9:I[36423,[],\"\"]\nb:I[61060,[],\"\"]\nc:[]\n"])</script><script>self.__next_f.push([1,"0:[\"$\",\"$L4\",null,{\"buildId\":\"8I5x-IqExlZLRs0oeiz6b\",\"assetPrefix\":\"/ui\",\"urlParts\":[\"\",\"\"],\"initialTree\":[\"\",{\"children\":[\"__PAGE__\",{}]},\"$undefined\",\"$undefined\",true],\"initialSeedData\":[\"\",{\"children\":[\"__PAGE__\",{},[[\"$L5\",[\"$\",\"$L6\",null,{\"props\":{\"params\":{},\"searchParams\":{}},\"Component\":\"$7\"}],null],null],null]},[[[[\"$\",\"link\",\"0\",{\"rel\":\"stylesheet\",\"href\":\"/ui/_next/static/css/86f6cc749f6b8493.css\",\"precedence\":\"next\",\"crossOrigin\":\"$undefined\"}],[\"$\",\"link\",\"1\",{\"rel\":\"stylesheet\",\"href\":\"/ui/_next/static/css/f41c66e22715ab00.css\",\"precedence\":\"next\",\"crossOrigin\":\"$undefined\"}]],[\"$\",\"html\",null,{\"lang\":\"en\",\"children\":[\"$\",\"body\",null,{\"className\":\"__className_cf7686\",\"children\":[\"$\",\"$L8\",null,{\"parallelRouterKey\":\"children\",\"segmentPath\":[\"children\"],\"error\":\"$undefined\",\"errorStyles\":\"$undefined\",\"errorScripts\":\"$undefined\",\"template\":[\"$\",\"$L9\",null,{}],\"templateStyles\":\"$undefined\",\"templateScripts\":\"$undefined\",\"notFound\":[[\"$\",\"title\",null,{\"children\":\"404: This page could not be found.\"}],[\"$\",\"div\",null,{\"style\":{\"fontFamily\":\"system-ui,\\\"Segoe UI\\\",Roboto,Helvetica,Arial,sans-serif,\\\"Apple Color Emoji\\\",\\\"Segoe UI Emoji\\\"\",\"height\":\"100vh\",\"textAlign\":\"center\",\"display\":\"flex\",\"flexDirection\":\"column\",\"alignItems\":\"center\",\"justifyContent\":\"center\"},\"children\":[\"$\",\"div\",null,{\"children\":[[\"$\",\"style\",null,{\"dangerouslySetInnerHTML\":{\"__html\":\"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}\"}}],[\"$\",\"h1\",null,{\"className\":\"next-error-h1\",\"style\":{\"display\":\"inline-block\",\"margin\":\"0 20px 0 0\",\"padding\":\"0 23px 0 0\",\"fontSize\":24,\"fontWeight\":500,\"verticalAlign\":\"top\",\"lineHeight\":\"49px\"},\"children\":\"404\"}],[\"$\",\"div\",null,{\"style\":{\"display\":\"inline-block\"},\"children\":[\"$\",\"h2\",null,{\"style\":{\"fontSize\":14,\"fontWeight\":400,\"lineHeight\":\"49px\",\"margin\":0},\"children\":\"This page could not be found.\"}]}]]}]}]],\"notFoundStyles\":[]}]}]}]],null],null],\"couldBeIntercepted\":false,\"initialHead\":[null,\"$La\"],\"globalErrorComponent\":\"$b\",\"missingSlots\":\"$Wc\"}]\n"])</script><script>self.__next_f.push([1,"a:[[\"$\",\"meta\",\"0\",{\"name\":\"viewport\",\"content\":\"width=device-width, initial-scale=1\"}],[\"$\",\"meta\",\"1\",{\"charSet\":\"utf-8\"}],[\"$\",\"title\",\"2\",{\"children\":\"LiteLLM Dashboard\"}],[\"$\",\"meta\",\"3\",{\"name\":\"description\",\"content\":\"LiteLLM Proxy Admin UI\"}],[\"$\",\"link\",\"4\",{\"rel\":\"icon\",\"href\":\"/ui/favicon.ico\",\"type\":\"image/x-icon\",\"sizes\":\"16x16\"}],[\"$\",\"meta\",\"5\",{\"name\":\"next-size-adjust\"}]]\n5:null\n"])</script></body></html>
|
|
@ -1,7 +1,7 @@
|
||||||
2:I[19107,[],"ClientPageRoot"]
|
2:I[19107,[],"ClientPageRoot"]
|
||||||
3:I[35319,["665","static/chunks/3014691f-0b72c78cfebbd712.js","990","static/chunks/13b76428-ebdf3012af0e4489.js","441","static/chunks/441-79926bf2b9d89e04.js","261","static/chunks/261-cb27c20c4f8ec4c6.js","899","static/chunks/899-354f59ecde307dfa.js","678","static/chunks/678-58bcfc3337902198.js","250","static/chunks/250-fd088aaa064b7d46.js","699","static/chunks/699-a194d60126b95923.js","931","static/chunks/app/page-84c68f24f2d4d77b.js"],"default",1]
|
3:I[89076,["665","static/chunks/3014691f-0b72c78cfebbd712.js","990","static/chunks/13b76428-ebdf3012af0e4489.js","441","static/chunks/441-79926bf2b9d89e04.js","261","static/chunks/261-cb27c20c4f8ec4c6.js","899","static/chunks/899-354f59ecde307dfa.js","914","static/chunks/914-000d10374f86fc1a.js","250","static/chunks/250-8b26aa68cd90cbb2.js","699","static/chunks/699-6b82f8e7b98ca1a3.js","931","static/chunks/app/page-fbe63e2a496641d2.js"],"default",1]
|
||||||
4:I[4707,[],""]
|
4:I[4707,[],""]
|
||||||
5:I[36423,[],""]
|
5:I[36423,[],""]
|
||||||
0:["Z74g7wOKfx1z1d_BuB0ip",[[["",{"children":["__PAGE__",{}]},"$undefined","$undefined",true],["",{"children":["__PAGE__",{},[["$L1",["$","$L2",null,{"props":{"params":{},"searchParams":{}},"Component":"$3"}],null],null],null]},[[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/86f6cc749f6b8493.css","precedence":"next","crossOrigin":"$undefined"}],["$","link","1",{"rel":"stylesheet","href":"/ui/_next/static/css/2a6af5dc23d92a9a.css","precedence":"next","crossOrigin":"$undefined"}]],["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_cf7686","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[]}]}]}]],null],null],["$L6",null]]]]
|
0:["8I5x-IqExlZLRs0oeiz6b",[[["",{"children":["__PAGE__",{}]},"$undefined","$undefined",true],["",{"children":["__PAGE__",{},[["$L1",["$","$L2",null,{"props":{"params":{},"searchParams":{}},"Component":"$3"}],null],null],null]},[[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/86f6cc749f6b8493.css","precedence":"next","crossOrigin":"$undefined"}],["$","link","1",{"rel":"stylesheet","href":"/ui/_next/static/css/f41c66e22715ab00.css","precedence":"next","crossOrigin":"$undefined"}]],["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_cf7686","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[]}]}]}]],null],null],["$L6",null]]]]
|
||||||
6:[["$","meta","0",{"name":"viewport","content":"width=device-width, initial-scale=1"}],["$","meta","1",{"charSet":"utf-8"}],["$","title","2",{"children":"LiteLLM Dashboard"}],["$","meta","3",{"name":"description","content":"LiteLLM Proxy Admin UI"}],["$","link","4",{"rel":"icon","href":"/ui/favicon.ico","type":"image/x-icon","sizes":"16x16"}],["$","meta","5",{"name":"next-size-adjust"}]]
|
6:[["$","meta","0",{"name":"viewport","content":"width=device-width, initial-scale=1"}],["$","meta","1",{"charSet":"utf-8"}],["$","title","2",{"children":"LiteLLM Dashboard"}],["$","meta","3",{"name":"description","content":"LiteLLM Proxy Admin UI"}],["$","link","4",{"rel":"icon","href":"/ui/favicon.ico","type":"image/x-icon","sizes":"16x16"}],["$","meta","5",{"name":"next-size-adjust"}]]
|
||||||
1:null
|
1:null
|
||||||
|
|
|
@ -1,7 +1,7 @@
|
||||||
2:I[19107,[],"ClientPageRoot"]
|
2:I[19107,[],"ClientPageRoot"]
|
||||||
3:I[52829,["441","static/chunks/441-79926bf2b9d89e04.js","261","static/chunks/261-cb27c20c4f8ec4c6.js","250","static/chunks/250-fd088aaa064b7d46.js","699","static/chunks/699-a194d60126b95923.js","418","static/chunks/app/model_hub/page-6f97b95f1023b0e9.js"],"default",1]
|
3:I[52829,["441","static/chunks/441-79926bf2b9d89e04.js","261","static/chunks/261-cb27c20c4f8ec4c6.js","250","static/chunks/250-8b26aa68cd90cbb2.js","699","static/chunks/699-6b82f8e7b98ca1a3.js","418","static/chunks/app/model_hub/page-6f97b95f1023b0e9.js"],"default",1]
|
||||||
4:I[4707,[],""]
|
4:I[4707,[],""]
|
||||||
5:I[36423,[],""]
|
5:I[36423,[],""]
|
||||||
0:["Z74g7wOKfx1z1d_BuB0ip",[[["",{"children":["model_hub",{"children":["__PAGE__",{}]}]},"$undefined","$undefined",true],["",{"children":["model_hub",{"children":["__PAGE__",{},[["$L1",["$","$L2",null,{"props":{"params":{},"searchParams":{}},"Component":"$3"}],null],null],null]},[null,["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children","model_hub","children"],"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":"$undefined","notFoundStyles":"$undefined"}]],null]},[[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/86f6cc749f6b8493.css","precedence":"next","crossOrigin":"$undefined"}],["$","link","1",{"rel":"stylesheet","href":"/ui/_next/static/css/2a6af5dc23d92a9a.css","precedence":"next","crossOrigin":"$undefined"}]],["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_cf7686","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[]}]}]}]],null],null],["$L6",null]]]]
|
0:["8I5x-IqExlZLRs0oeiz6b",[[["",{"children":["model_hub",{"children":["__PAGE__",{}]}]},"$undefined","$undefined",true],["",{"children":["model_hub",{"children":["__PAGE__",{},[["$L1",["$","$L2",null,{"props":{"params":{},"searchParams":{}},"Component":"$3"}],null],null],null]},[null,["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children","model_hub","children"],"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":"$undefined","notFoundStyles":"$undefined"}]],null]},[[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/86f6cc749f6b8493.css","precedence":"next","crossOrigin":"$undefined"}],["$","link","1",{"rel":"stylesheet","href":"/ui/_next/static/css/f41c66e22715ab00.css","precedence":"next","crossOrigin":"$undefined"}]],["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_cf7686","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[]}]}]}]],null],null],["$L6",null]]]]
|
||||||
6:[["$","meta","0",{"name":"viewport","content":"width=device-width, initial-scale=1"}],["$","meta","1",{"charSet":"utf-8"}],["$","title","2",{"children":"LiteLLM Dashboard"}],["$","meta","3",{"name":"description","content":"LiteLLM Proxy Admin UI"}],["$","link","4",{"rel":"icon","href":"/ui/favicon.ico","type":"image/x-icon","sizes":"16x16"}],["$","meta","5",{"name":"next-size-adjust"}]]
|
6:[["$","meta","0",{"name":"viewport","content":"width=device-width, initial-scale=1"}],["$","meta","1",{"charSet":"utf-8"}],["$","title","2",{"children":"LiteLLM Dashboard"}],["$","meta","3",{"name":"description","content":"LiteLLM Proxy Admin UI"}],["$","link","4",{"rel":"icon","href":"/ui/favicon.ico","type":"image/x-icon","sizes":"16x16"}],["$","meta","5",{"name":"next-size-adjust"}]]
|
||||||
1:null
|
1:null
|
||||||
|
|
File diff suppressed because one or more lines are too long
|
@ -1,7 +1,7 @@
|
||||||
2:I[19107,[],"ClientPageRoot"]
|
2:I[19107,[],"ClientPageRoot"]
|
||||||
3:I[12011,["665","static/chunks/3014691f-0b72c78cfebbd712.js","441","static/chunks/441-79926bf2b9d89e04.js","899","static/chunks/899-354f59ecde307dfa.js","250","static/chunks/250-fd088aaa064b7d46.js","461","static/chunks/app/onboarding/page-801b31bb95fa3d1c.js"],"default",1]
|
3:I[12011,["665","static/chunks/3014691f-0b72c78cfebbd712.js","441","static/chunks/441-79926bf2b9d89e04.js","899","static/chunks/899-354f59ecde307dfa.js","250","static/chunks/250-8b26aa68cd90cbb2.js","461","static/chunks/app/onboarding/page-f2e9aa9e77b66520.js"],"default",1]
|
||||||
4:I[4707,[],""]
|
4:I[4707,[],""]
|
||||||
5:I[36423,[],""]
|
5:I[36423,[],""]
|
||||||
0:["Z74g7wOKfx1z1d_BuB0ip",[[["",{"children":["onboarding",{"children":["__PAGE__",{}]}]},"$undefined","$undefined",true],["",{"children":["onboarding",{"children":["__PAGE__",{},[["$L1",["$","$L2",null,{"props":{"params":{},"searchParams":{}},"Component":"$3"}],null],null],null]},[null,["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children","onboarding","children"],"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":"$undefined","notFoundStyles":"$undefined"}]],null]},[[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/86f6cc749f6b8493.css","precedence":"next","crossOrigin":"$undefined"}],["$","link","1",{"rel":"stylesheet","href":"/ui/_next/static/css/2a6af5dc23d92a9a.css","precedence":"next","crossOrigin":"$undefined"}]],["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_cf7686","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[]}]}]}]],null],null],["$L6",null]]]]
|
0:["8I5x-IqExlZLRs0oeiz6b",[[["",{"children":["onboarding",{"children":["__PAGE__",{}]}]},"$undefined","$undefined",true],["",{"children":["onboarding",{"children":["__PAGE__",{},[["$L1",["$","$L2",null,{"props":{"params":{},"searchParams":{}},"Component":"$3"}],null],null],null]},[null,["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children","onboarding","children"],"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":"$undefined","notFoundStyles":"$undefined"}]],null]},[[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/86f6cc749f6b8493.css","precedence":"next","crossOrigin":"$undefined"}],["$","link","1",{"rel":"stylesheet","href":"/ui/_next/static/css/f41c66e22715ab00.css","precedence":"next","crossOrigin":"$undefined"}]],["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_cf7686","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[]}]}]}]],null],null],["$L6",null]]]]
|
||||||
6:[["$","meta","0",{"name":"viewport","content":"width=device-width, initial-scale=1"}],["$","meta","1",{"charSet":"utf-8"}],["$","title","2",{"children":"LiteLLM Dashboard"}],["$","meta","3",{"name":"description","content":"LiteLLM Proxy Admin UI"}],["$","link","4",{"rel":"icon","href":"/ui/favicon.ico","type":"image/x-icon","sizes":"16x16"}],["$","meta","5",{"name":"next-size-adjust"}]]
|
6:[["$","meta","0",{"name":"viewport","content":"width=device-width, initial-scale=1"}],["$","meta","1",{"charSet":"utf-8"}],["$","title","2",{"children":"LiteLLM Dashboard"}],["$","meta","3",{"name":"description","content":"LiteLLM Proxy Admin UI"}],["$","link","4",{"rel":"icon","href":"/ui/favicon.ico","type":"image/x-icon","sizes":"16x16"}],["$","meta","5",{"name":"next-size-adjust"}]]
|
||||||
1:null
|
1:null
|
||||||
|
|
|
@ -1,9 +1,5 @@
|
||||||
model_list:
|
model_list:
|
||||||
- model_name: anthropic/claude-3-7-sonnet-20250219
|
- model_name: claude-3.7
|
||||||
litellm_params:
|
|
||||||
model: anthropic/claude-3-7-sonnet-20250219
|
|
||||||
api_key: os.environ/ANTHROPIC_API_KEY
|
|
||||||
- model_name: gpt-4
|
|
||||||
litellm_params:
|
litellm_params:
|
||||||
model: openai/gpt-3.5-turbo
|
model: openai/gpt-3.5-turbo
|
||||||
api_key: os.environ/OPENAI_API_KEY
|
api_key: os.environ/OPENAI_API_KEY
|
||||||
|
@ -14,3 +10,19 @@ model_list:
|
||||||
- model_name: deepseek-r1-api
|
- model_name: deepseek-r1-api
|
||||||
litellm_params:
|
litellm_params:
|
||||||
model: deepseek/deepseek-reasoner
|
model: deepseek/deepseek-reasoner
|
||||||
|
- model_name: cohere.embed-english-v3
|
||||||
|
litellm_params:
|
||||||
|
model: bedrock/cohere.embed-english-v3
|
||||||
|
api_key: os.environ/COHERE_API_KEY
|
||||||
|
- model_name: bedrock-claude-3-7
|
||||||
|
litellm_params:
|
||||||
|
model: bedrock/invoke/us.anthropic.claude-3-7-sonnet-20250219-v1:0
|
||||||
|
- model_name: bedrock-claude-3-5-sonnet
|
||||||
|
litellm_params:
|
||||||
|
model: bedrock/invoke/us.anthropic.claude-3-5-sonnet-20240620-v1:0
|
||||||
|
- model_name: bedrock-nova
|
||||||
|
litellm_params:
|
||||||
|
model: bedrock/us.amazon.nova-pro-v1:0
|
||||||
|
|
||||||
|
litellm_settings:
|
||||||
|
callbacks: ["langfuse"]
|
|
@ -26,6 +26,8 @@ from litellm.types.utils import (
|
||||||
ModelResponse,
|
ModelResponse,
|
||||||
ProviderField,
|
ProviderField,
|
||||||
StandardCallbackDynamicParams,
|
StandardCallbackDynamicParams,
|
||||||
|
StandardLoggingPayloadErrorInformation,
|
||||||
|
StandardLoggingPayloadStatus,
|
||||||
StandardPassThroughResponseObject,
|
StandardPassThroughResponseObject,
|
||||||
TextCompletionResponse,
|
TextCompletionResponse,
|
||||||
)
|
)
|
||||||
|
@ -610,6 +612,8 @@ class GenerateKeyResponse(KeyRequestBase):
|
||||||
token_id: Optional[str] = None
|
token_id: Optional[str] = None
|
||||||
litellm_budget_table: Optional[Any] = None
|
litellm_budget_table: Optional[Any] = None
|
||||||
token: Optional[str] = None
|
token: Optional[str] = None
|
||||||
|
created_by: Optional[str] = None
|
||||||
|
updated_by: Optional[str] = None
|
||||||
|
|
||||||
@model_validator(mode="before")
|
@model_validator(mode="before")
|
||||||
@classmethod
|
@classmethod
|
||||||
|
@ -1387,7 +1391,9 @@ class LiteLLM_VerificationToken(LiteLLMPydanticObjectBase):
|
||||||
litellm_budget_table: Optional[dict] = None
|
litellm_budget_table: Optional[dict] = None
|
||||||
org_id: Optional[str] = None # org id for a given key
|
org_id: Optional[str] = None # org id for a given key
|
||||||
created_at: Optional[datetime] = None
|
created_at: Optional[datetime] = None
|
||||||
|
created_by: Optional[str] = None
|
||||||
updated_at: Optional[datetime] = None
|
updated_at: Optional[datetime] = None
|
||||||
|
updated_by: Optional[str] = None
|
||||||
|
|
||||||
model_config = ConfigDict(protected_namespaces=())
|
model_config = ConfigDict(protected_namespaces=())
|
||||||
|
|
||||||
|
@ -1574,6 +1580,10 @@ class LiteLLM_UserTableFiltered(BaseModel): # done to avoid exposing sensitive
|
||||||
user_email: str
|
user_email: str
|
||||||
|
|
||||||
|
|
||||||
|
class LiteLLM_UserTableWithKeyCount(LiteLLM_UserTable):
|
||||||
|
key_count: int = 0
|
||||||
|
|
||||||
|
|
||||||
class LiteLLM_EndUserTable(LiteLLMPydanticObjectBase):
|
class LiteLLM_EndUserTable(LiteLLMPydanticObjectBase):
|
||||||
user_id: str
|
user_id: str
|
||||||
blocked: bool
|
blocked: bool
|
||||||
|
@ -1704,6 +1714,7 @@ class WebhookEvent(CallInfo):
|
||||||
class SpecialModelNames(enum.Enum):
|
class SpecialModelNames(enum.Enum):
|
||||||
all_team_models = "all-team-models"
|
all_team_models = "all-team-models"
|
||||||
all_proxy_models = "all-proxy-models"
|
all_proxy_models = "all-proxy-models"
|
||||||
|
no_default_models = "no-default-models"
|
||||||
|
|
||||||
|
|
||||||
class InvitationNew(LiteLLMPydanticObjectBase):
|
class InvitationNew(LiteLLMPydanticObjectBase):
|
||||||
|
@ -1846,6 +1857,9 @@ class SpendLogsMetadata(TypedDict):
|
||||||
] # special param to log k,v pairs to spendlogs for a call
|
] # special param to log k,v pairs to spendlogs for a call
|
||||||
requester_ip_address: Optional[str]
|
requester_ip_address: Optional[str]
|
||||||
applied_guardrails: Optional[List[str]]
|
applied_guardrails: Optional[List[str]]
|
||||||
|
status: StandardLoggingPayloadStatus
|
||||||
|
proxy_server_request: Optional[str]
|
||||||
|
error_information: Optional[StandardLoggingPayloadErrorInformation]
|
||||||
|
|
||||||
|
|
||||||
class SpendLogsPayload(TypedDict):
|
class SpendLogsPayload(TypedDict):
|
||||||
|
|
|
@ -1116,6 +1116,14 @@ async def can_user_call_model(
|
||||||
if user_object is None:
|
if user_object is None:
|
||||||
return True
|
return True
|
||||||
|
|
||||||
|
if SpecialModelNames.no_default_models.value in user_object.models:
|
||||||
|
raise ProxyException(
|
||||||
|
message=f"User not allowed to access model. No default model access, only team models allowed. Tried to access {model}",
|
||||||
|
type=ProxyErrorTypes.key_model_access_denied,
|
||||||
|
param="model",
|
||||||
|
code=status.HTTP_401_UNAUTHORIZED,
|
||||||
|
)
|
||||||
|
|
||||||
return await _can_object_call_model(
|
return await _can_object_call_model(
|
||||||
model=model,
|
model=model,
|
||||||
llm_router=llm_router,
|
llm_router=llm_router,
|
||||||
|
|
|
@ -1,5 +1,6 @@
|
||||||
# What is this?
|
# What is this?
|
||||||
## Common checks for /v1/models and `/model/info`
|
## Common checks for /v1/models and `/model/info`
|
||||||
|
import copy
|
||||||
from typing import Dict, List, Optional, Set
|
from typing import Dict, List, Optional, Set
|
||||||
|
|
||||||
import litellm
|
import litellm
|
||||||
|
@ -30,7 +31,7 @@ def get_provider_models(provider: str) -> Optional[List[str]]:
|
||||||
return get_valid_models()
|
return get_valid_models()
|
||||||
|
|
||||||
if provider in litellm.models_by_provider:
|
if provider in litellm.models_by_provider:
|
||||||
provider_models = litellm.models_by_provider[provider]
|
provider_models = copy.deepcopy(litellm.models_by_provider[provider])
|
||||||
for idx, _model in enumerate(provider_models):
|
for idx, _model in enumerate(provider_models):
|
||||||
if provider not in _model:
|
if provider not in _model:
|
||||||
provider_models[idx] = f"{provider}/{_model}"
|
provider_models[idx] = f"{provider}/{_model}"
|
||||||
|
|
|
@ -240,3 +240,18 @@ class RouteChecks:
|
||||||
RouteChecks._route_matches_pattern(route=route, pattern=allowed_route)
|
RouteChecks._route_matches_pattern(route=route, pattern=allowed_route)
|
||||||
for allowed_route in allowed_routes
|
for allowed_route in allowed_routes
|
||||||
) # Check pattern match
|
) # Check pattern match
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _is_assistants_api_request(request: Request) -> bool:
|
||||||
|
"""
|
||||||
|
Returns True if `thread` or `assistant` is in the request path
|
||||||
|
|
||||||
|
Args:
|
||||||
|
request (Request): The request object
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
bool: True if `thread` or `assistant` is in the request path, False otherwise
|
||||||
|
"""
|
||||||
|
if "thread" in request.url.path or "assistant" in request.url.path:
|
||||||
|
return True
|
||||||
|
return False
|
||||||
|
|
|
@ -8,6 +8,7 @@ Returns a UserAPIKeyAuth object if the API key is valid
|
||||||
"""
|
"""
|
||||||
|
|
||||||
import asyncio
|
import asyncio
|
||||||
|
import re
|
||||||
import secrets
|
import secrets
|
||||||
from datetime import datetime, timezone
|
from datetime import datetime, timezone
|
||||||
from typing import Optional, cast
|
from typing import Optional, cast
|
||||||
|
@ -279,6 +280,21 @@ def get_rbac_role(jwt_handler: JWTHandler, scopes: List[str]) -> str:
|
||||||
return LitellmUserRoles.TEAM
|
return LitellmUserRoles.TEAM
|
||||||
|
|
||||||
|
|
||||||
|
def get_model_from_request(request_data: dict, route: str) -> Optional[str]:
|
||||||
|
|
||||||
|
# First try to get model from request_data
|
||||||
|
model = request_data.get("model")
|
||||||
|
|
||||||
|
# If model not in request_data, try to extract from route
|
||||||
|
if model is None:
|
||||||
|
# Parse model from route that follows the pattern /openai/deployments/{model}/*
|
||||||
|
match = re.match(r"/openai/deployments/([^/]+)", route)
|
||||||
|
if match:
|
||||||
|
model = match.group(1)
|
||||||
|
|
||||||
|
return model
|
||||||
|
|
||||||
|
|
||||||
async def _user_api_key_auth_builder( # noqa: PLR0915
|
async def _user_api_key_auth_builder( # noqa: PLR0915
|
||||||
request: Request,
|
request: Request,
|
||||||
api_key: str,
|
api_key: str,
|
||||||
|
@ -807,7 +823,7 @@ async def _user_api_key_auth_builder( # noqa: PLR0915
|
||||||
# the validation will occur when checking the team has access to this model
|
# the validation will occur when checking the team has access to this model
|
||||||
pass
|
pass
|
||||||
else:
|
else:
|
||||||
model = request_data.get("model", None)
|
model = get_model_from_request(request_data, route)
|
||||||
fallback_models = cast(
|
fallback_models = cast(
|
||||||
Optional[List[ALL_FALLBACK_MODEL_VALUES]],
|
Optional[List[ALL_FALLBACK_MODEL_VALUES]],
|
||||||
request_data.get("fallbacks", None),
|
request_data.get("fallbacks", None),
|
||||||
|
|
|
@ -42,7 +42,26 @@ async def _read_request_body(request: Optional[Request]) -> Dict:
|
||||||
if not body:
|
if not body:
|
||||||
parsed_body = {}
|
parsed_body = {}
|
||||||
else:
|
else:
|
||||||
parsed_body = orjson.loads(body)
|
try:
|
||||||
|
parsed_body = orjson.loads(body)
|
||||||
|
except orjson.JSONDecodeError:
|
||||||
|
# Fall back to the standard json module which is more forgiving
|
||||||
|
# First decode bytes to string if needed
|
||||||
|
body_str = body.decode("utf-8") if isinstance(body, bytes) else body
|
||||||
|
|
||||||
|
# Replace invalid surrogate pairs
|
||||||
|
import re
|
||||||
|
|
||||||
|
# This regex finds incomplete surrogate pairs
|
||||||
|
body_str = re.sub(
|
||||||
|
r"[\uD800-\uDBFF](?![\uDC00-\uDFFF])", "", body_str
|
||||||
|
)
|
||||||
|
# This regex finds low surrogates without high surrogates
|
||||||
|
body_str = re.sub(
|
||||||
|
r"(?<![\uD800-\uDBFF])[\uDC00-\uDFFF]", "", body_str
|
||||||
|
)
|
||||||
|
|
||||||
|
parsed_body = json.loads(body_str)
|
||||||
|
|
||||||
# Cache the parsed result
|
# Cache the parsed result
|
||||||
_safe_set_request_parsed_body(request=request, parsed_body=parsed_body)
|
_safe_set_request_parsed_body(request=request, parsed_body=parsed_body)
|
||||||
|
@ -62,8 +81,8 @@ async def _read_request_body(request: Optional[Request]) -> Dict:
|
||||||
def _safe_get_request_parsed_body(request: Optional[Request]) -> Optional[dict]:
|
def _safe_get_request_parsed_body(request: Optional[Request]) -> Optional[dict]:
|
||||||
if request is None:
|
if request is None:
|
||||||
return None
|
return None
|
||||||
if hasattr(request, "state") and hasattr(request.state, "parsed_body"):
|
if hasattr(request, "scope") and "parsed_body" in request.scope:
|
||||||
return request.state.parsed_body
|
return request.scope["parsed_body"]
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
@ -74,7 +93,7 @@ def _safe_set_request_parsed_body(
|
||||||
try:
|
try:
|
||||||
if request is None:
|
if request is None:
|
||||||
return
|
return
|
||||||
request.state.parsed_body = parsed_body
|
request.scope["parsed_body"] = parsed_body
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
verbose_proxy_logger.debug(
|
verbose_proxy_logger.debug(
|
||||||
"Unexpected error setting request parsed body - {}".format(e)
|
"Unexpected error setting request parsed body - {}".format(e)
|
||||||
|
|
|
@ -64,10 +64,10 @@ def log_db_metrics(func):
|
||||||
# in litellm custom callbacks kwargs is passed as arg[0]
|
# in litellm custom callbacks kwargs is passed as arg[0]
|
||||||
# https://docs.litellm.ai/docs/observability/custom_callback#callback-functions
|
# https://docs.litellm.ai/docs/observability/custom_callback#callback-functions
|
||||||
args is not None
|
args is not None
|
||||||
and len(args) > 0
|
and len(args) > 1
|
||||||
and isinstance(args[0], dict)
|
and isinstance(args[1], dict)
|
||||||
):
|
):
|
||||||
passed_kwargs = args[0]
|
passed_kwargs = args[1]
|
||||||
parent_otel_span = _get_parent_otel_span_from_kwargs(
|
parent_otel_span = _get_parent_otel_span_from_kwargs(
|
||||||
kwargs=passed_kwargs
|
kwargs=passed_kwargs
|
||||||
)
|
)
|
||||||
|
|
|
@ -1,138 +1,206 @@
|
||||||
import asyncio
|
import asyncio
|
||||||
import traceback
|
import traceback
|
||||||
from typing import Optional, Union, cast
|
from datetime import datetime
|
||||||
|
from typing import Any, Optional, Union, cast
|
||||||
|
|
||||||
import litellm
|
import litellm
|
||||||
from litellm._logging import verbose_proxy_logger
|
from litellm._logging import verbose_proxy_logger
|
||||||
|
from litellm.integrations.custom_logger import CustomLogger
|
||||||
from litellm.litellm_core_utils.core_helpers import (
|
from litellm.litellm_core_utils.core_helpers import (
|
||||||
_get_parent_otel_span_from_kwargs,
|
_get_parent_otel_span_from_kwargs,
|
||||||
get_litellm_metadata_from_kwargs,
|
get_litellm_metadata_from_kwargs,
|
||||||
)
|
)
|
||||||
|
from litellm.litellm_core_utils.litellm_logging import StandardLoggingPayloadSetup
|
||||||
|
from litellm.proxy._types import UserAPIKeyAuth
|
||||||
from litellm.proxy.auth.auth_checks import log_db_metrics
|
from litellm.proxy.auth.auth_checks import log_db_metrics
|
||||||
from litellm.types.utils import StandardLoggingPayload
|
from litellm.types.utils import (
|
||||||
|
StandardLoggingPayload,
|
||||||
|
StandardLoggingUserAPIKeyMetadata,
|
||||||
|
)
|
||||||
from litellm.utils import get_end_user_id_for_cost_tracking
|
from litellm.utils import get_end_user_id_for_cost_tracking
|
||||||
|
|
||||||
|
|
||||||
@log_db_metrics
|
class _ProxyDBLogger(CustomLogger):
|
||||||
async def _PROXY_track_cost_callback(
|
async def async_log_success_event(self, kwargs, response_obj, start_time, end_time):
|
||||||
kwargs, # kwargs to completion
|
await self._PROXY_track_cost_callback(
|
||||||
completion_response: litellm.ModelResponse, # response from completion
|
kwargs, response_obj, start_time, end_time
|
||||||
start_time=None,
|
|
||||||
end_time=None, # start/end time for completion
|
|
||||||
):
|
|
||||||
from litellm.proxy.proxy_server import (
|
|
||||||
prisma_client,
|
|
||||||
proxy_logging_obj,
|
|
||||||
update_cache,
|
|
||||||
update_database,
|
|
||||||
)
|
|
||||||
|
|
||||||
verbose_proxy_logger.debug("INSIDE _PROXY_track_cost_callback")
|
|
||||||
try:
|
|
||||||
verbose_proxy_logger.debug(
|
|
||||||
f"kwargs stream: {kwargs.get('stream', None)} + complete streaming response: {kwargs.get('complete_streaming_response', None)}"
|
|
||||||
)
|
|
||||||
parent_otel_span = _get_parent_otel_span_from_kwargs(kwargs=kwargs)
|
|
||||||
litellm_params = kwargs.get("litellm_params", {}) or {}
|
|
||||||
end_user_id = get_end_user_id_for_cost_tracking(litellm_params)
|
|
||||||
metadata = get_litellm_metadata_from_kwargs(kwargs=kwargs)
|
|
||||||
user_id = cast(Optional[str], metadata.get("user_api_key_user_id", None))
|
|
||||||
team_id = cast(Optional[str], metadata.get("user_api_key_team_id", None))
|
|
||||||
org_id = cast(Optional[str], metadata.get("user_api_key_org_id", None))
|
|
||||||
key_alias = cast(Optional[str], metadata.get("user_api_key_alias", None))
|
|
||||||
end_user_max_budget = metadata.get("user_api_end_user_max_budget", None)
|
|
||||||
sl_object: Optional[StandardLoggingPayload] = kwargs.get(
|
|
||||||
"standard_logging_object", None
|
|
||||||
)
|
|
||||||
response_cost = (
|
|
||||||
sl_object.get("response_cost", None)
|
|
||||||
if sl_object is not None
|
|
||||||
else kwargs.get("response_cost", None)
|
|
||||||
)
|
)
|
||||||
|
|
||||||
if response_cost is not None:
|
async def async_post_call_failure_hook(
|
||||||
user_api_key = metadata.get("user_api_key", None)
|
self,
|
||||||
if kwargs.get("cache_hit", False) is True:
|
request_data: dict,
|
||||||
response_cost = 0.0
|
original_exception: Exception,
|
||||||
verbose_proxy_logger.info(
|
user_api_key_dict: UserAPIKeyAuth,
|
||||||
f"Cache Hit: response_cost {response_cost}, for user_id {user_id}"
|
):
|
||||||
)
|
from litellm.proxy.proxy_server import update_database
|
||||||
|
|
||||||
verbose_proxy_logger.debug(
|
_metadata = dict(
|
||||||
f"user_api_key {user_api_key}, prisma_client: {prisma_client}"
|
StandardLoggingUserAPIKeyMetadata(
|
||||||
|
user_api_key_hash=user_api_key_dict.api_key,
|
||||||
|
user_api_key_alias=user_api_key_dict.key_alias,
|
||||||
|
user_api_key_user_email=user_api_key_dict.user_email,
|
||||||
|
user_api_key_user_id=user_api_key_dict.user_id,
|
||||||
|
user_api_key_team_id=user_api_key_dict.team_id,
|
||||||
|
user_api_key_org_id=user_api_key_dict.org_id,
|
||||||
|
user_api_key_team_alias=user_api_key_dict.team_alias,
|
||||||
|
user_api_key_end_user_id=user_api_key_dict.end_user_id,
|
||||||
)
|
)
|
||||||
if _should_track_cost_callback(
|
)
|
||||||
user_api_key=user_api_key,
|
_metadata["user_api_key"] = user_api_key_dict.api_key
|
||||||
user_id=user_id,
|
_metadata["status"] = "failure"
|
||||||
team_id=team_id,
|
_metadata["error_information"] = (
|
||||||
end_user_id=end_user_id,
|
StandardLoggingPayloadSetup.get_error_information(
|
||||||
):
|
original_exception=original_exception,
|
||||||
## UPDATE DATABASE
|
)
|
||||||
await update_database(
|
)
|
||||||
token=user_api_key,
|
|
||||||
response_cost=response_cost,
|
|
||||||
user_id=user_id,
|
|
||||||
end_user_id=end_user_id,
|
|
||||||
team_id=team_id,
|
|
||||||
kwargs=kwargs,
|
|
||||||
completion_response=completion_response,
|
|
||||||
start_time=start_time,
|
|
||||||
end_time=end_time,
|
|
||||||
org_id=org_id,
|
|
||||||
)
|
|
||||||
|
|
||||||
# update cache
|
existing_metadata: dict = request_data.get("metadata", None) or {}
|
||||||
asyncio.create_task(
|
existing_metadata.update(_metadata)
|
||||||
update_cache(
|
|
||||||
|
if "litellm_params" not in request_data:
|
||||||
|
request_data["litellm_params"] = {}
|
||||||
|
request_data["litellm_params"]["proxy_server_request"] = (
|
||||||
|
request_data.get("proxy_server_request") or {}
|
||||||
|
)
|
||||||
|
request_data["litellm_params"]["metadata"] = existing_metadata
|
||||||
|
await update_database(
|
||||||
|
token=user_api_key_dict.api_key,
|
||||||
|
response_cost=0.0,
|
||||||
|
user_id=user_api_key_dict.user_id,
|
||||||
|
end_user_id=user_api_key_dict.end_user_id,
|
||||||
|
team_id=user_api_key_dict.team_id,
|
||||||
|
kwargs=request_data,
|
||||||
|
completion_response=original_exception,
|
||||||
|
start_time=datetime.now(),
|
||||||
|
end_time=datetime.now(),
|
||||||
|
org_id=user_api_key_dict.org_id,
|
||||||
|
)
|
||||||
|
|
||||||
|
@log_db_metrics
|
||||||
|
async def _PROXY_track_cost_callback(
|
||||||
|
self,
|
||||||
|
kwargs, # kwargs to completion
|
||||||
|
completion_response: Optional[
|
||||||
|
Union[litellm.ModelResponse, Any]
|
||||||
|
], # response from completion
|
||||||
|
start_time=None,
|
||||||
|
end_time=None, # start/end time for completion
|
||||||
|
):
|
||||||
|
from litellm.proxy.proxy_server import (
|
||||||
|
prisma_client,
|
||||||
|
proxy_logging_obj,
|
||||||
|
update_cache,
|
||||||
|
update_database,
|
||||||
|
)
|
||||||
|
|
||||||
|
verbose_proxy_logger.debug("INSIDE _PROXY_track_cost_callback")
|
||||||
|
try:
|
||||||
|
verbose_proxy_logger.debug(
|
||||||
|
f"kwargs stream: {kwargs.get('stream', None)} + complete streaming response: {kwargs.get('complete_streaming_response', None)}"
|
||||||
|
)
|
||||||
|
parent_otel_span = _get_parent_otel_span_from_kwargs(kwargs=kwargs)
|
||||||
|
litellm_params = kwargs.get("litellm_params", {}) or {}
|
||||||
|
end_user_id = get_end_user_id_for_cost_tracking(litellm_params)
|
||||||
|
metadata = get_litellm_metadata_from_kwargs(kwargs=kwargs)
|
||||||
|
user_id = cast(Optional[str], metadata.get("user_api_key_user_id", None))
|
||||||
|
team_id = cast(Optional[str], metadata.get("user_api_key_team_id", None))
|
||||||
|
org_id = cast(Optional[str], metadata.get("user_api_key_org_id", None))
|
||||||
|
key_alias = cast(Optional[str], metadata.get("user_api_key_alias", None))
|
||||||
|
end_user_max_budget = metadata.get("user_api_end_user_max_budget", None)
|
||||||
|
sl_object: Optional[StandardLoggingPayload] = kwargs.get(
|
||||||
|
"standard_logging_object", None
|
||||||
|
)
|
||||||
|
response_cost = (
|
||||||
|
sl_object.get("response_cost", None)
|
||||||
|
if sl_object is not None
|
||||||
|
else kwargs.get("response_cost", None)
|
||||||
|
)
|
||||||
|
|
||||||
|
if response_cost is not None:
|
||||||
|
user_api_key = metadata.get("user_api_key", None)
|
||||||
|
if kwargs.get("cache_hit", False) is True:
|
||||||
|
response_cost = 0.0
|
||||||
|
verbose_proxy_logger.info(
|
||||||
|
f"Cache Hit: response_cost {response_cost}, for user_id {user_id}"
|
||||||
|
)
|
||||||
|
|
||||||
|
verbose_proxy_logger.debug(
|
||||||
|
f"user_api_key {user_api_key}, prisma_client: {prisma_client}"
|
||||||
|
)
|
||||||
|
if _should_track_cost_callback(
|
||||||
|
user_api_key=user_api_key,
|
||||||
|
user_id=user_id,
|
||||||
|
team_id=team_id,
|
||||||
|
end_user_id=end_user_id,
|
||||||
|
):
|
||||||
|
## UPDATE DATABASE
|
||||||
|
await update_database(
|
||||||
token=user_api_key,
|
token=user_api_key,
|
||||||
|
response_cost=response_cost,
|
||||||
user_id=user_id,
|
user_id=user_id,
|
||||||
end_user_id=end_user_id,
|
end_user_id=end_user_id,
|
||||||
response_cost=response_cost,
|
|
||||||
team_id=team_id,
|
team_id=team_id,
|
||||||
parent_otel_span=parent_otel_span,
|
kwargs=kwargs,
|
||||||
|
completion_response=completion_response,
|
||||||
|
start_time=start_time,
|
||||||
|
end_time=end_time,
|
||||||
|
org_id=org_id,
|
||||||
)
|
)
|
||||||
)
|
|
||||||
|
|
||||||
await proxy_logging_obj.slack_alerting_instance.customer_spend_alert(
|
# update cache
|
||||||
token=user_api_key,
|
asyncio.create_task(
|
||||||
key_alias=key_alias,
|
update_cache(
|
||||||
end_user_id=end_user_id,
|
token=user_api_key,
|
||||||
response_cost=response_cost,
|
user_id=user_id,
|
||||||
max_budget=end_user_max_budget,
|
end_user_id=end_user_id,
|
||||||
)
|
response_cost=response_cost,
|
||||||
else:
|
team_id=team_id,
|
||||||
raise Exception(
|
parent_otel_span=parent_otel_span,
|
||||||
"User API key and team id and user id missing from custom callback."
|
)
|
||||||
)
|
)
|
||||||
else:
|
|
||||||
if kwargs["stream"] is not True or (
|
await proxy_logging_obj.slack_alerting_instance.customer_spend_alert(
|
||||||
kwargs["stream"] is True and "complete_streaming_response" in kwargs
|
token=user_api_key,
|
||||||
):
|
key_alias=key_alias,
|
||||||
if sl_object is not None:
|
end_user_id=end_user_id,
|
||||||
cost_tracking_failure_debug_info: Union[dict, str] = (
|
response_cost=response_cost,
|
||||||
sl_object["response_cost_failure_debug_info"] # type: ignore
|
max_budget=end_user_max_budget,
|
||||||
or "response_cost_failure_debug_info is None in standard_logging_object"
|
|
||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
cost_tracking_failure_debug_info = (
|
raise Exception(
|
||||||
"standard_logging_object not found"
|
"User API key and team id and user id missing from custom callback."
|
||||||
)
|
)
|
||||||
model = kwargs.get("model")
|
else:
|
||||||
raise Exception(
|
if kwargs["stream"] is not True or (
|
||||||
f"Cost tracking failed for model={model}.\nDebug info - {cost_tracking_failure_debug_info}\nAdd custom pricing - https://docs.litellm.ai/docs/proxy/custom_pricing"
|
kwargs["stream"] is True and "complete_streaming_response" in kwargs
|
||||||
|
):
|
||||||
|
if sl_object is not None:
|
||||||
|
cost_tracking_failure_debug_info: Union[dict, str] = (
|
||||||
|
sl_object["response_cost_failure_debug_info"] # type: ignore
|
||||||
|
or "response_cost_failure_debug_info is None in standard_logging_object"
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
cost_tracking_failure_debug_info = (
|
||||||
|
"standard_logging_object not found"
|
||||||
|
)
|
||||||
|
model = kwargs.get("model")
|
||||||
|
raise Exception(
|
||||||
|
f"Cost tracking failed for model={model}.\nDebug info - {cost_tracking_failure_debug_info}\nAdd custom pricing - https://docs.litellm.ai/docs/proxy/custom_pricing"
|
||||||
|
)
|
||||||
|
except Exception as e:
|
||||||
|
error_msg = f"Error in tracking cost callback - {str(e)}\n Traceback:{traceback.format_exc()}"
|
||||||
|
model = kwargs.get("model", "")
|
||||||
|
metadata = kwargs.get("litellm_params", {}).get("metadata", {})
|
||||||
|
error_msg += f"\n Args to _PROXY_track_cost_callback\n model: {model}\n metadata: {metadata}\n"
|
||||||
|
asyncio.create_task(
|
||||||
|
proxy_logging_obj.failed_tracking_alert(
|
||||||
|
error_message=error_msg,
|
||||||
|
failing_model=model,
|
||||||
)
|
)
|
||||||
except Exception as e:
|
|
||||||
error_msg = f"Error in tracking cost callback - {str(e)}\n Traceback:{traceback.format_exc()}"
|
|
||||||
model = kwargs.get("model", "")
|
|
||||||
metadata = kwargs.get("litellm_params", {}).get("metadata", {})
|
|
||||||
error_msg += f"\n Args to _PROXY_track_cost_callback\n model: {model}\n metadata: {metadata}\n"
|
|
||||||
asyncio.create_task(
|
|
||||||
proxy_logging_obj.failed_tracking_alert(
|
|
||||||
error_message=error_msg,
|
|
||||||
failing_model=model,
|
|
||||||
)
|
)
|
||||||
)
|
verbose_proxy_logger.exception(
|
||||||
verbose_proxy_logger.exception("Error in tracking cost callback - %s", str(e))
|
"Error in tracking cost callback - %s", str(e)
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
def _should_track_cost_callback(
|
def _should_track_cost_callback(
|
||||||
|
|
|
@ -17,6 +17,7 @@ from litellm.proxy._types import (
|
||||||
TeamCallbackMetadata,
|
TeamCallbackMetadata,
|
||||||
UserAPIKeyAuth,
|
UserAPIKeyAuth,
|
||||||
)
|
)
|
||||||
|
from litellm.proxy.auth.route_checks import RouteChecks
|
||||||
from litellm.router import Router
|
from litellm.router import Router
|
||||||
from litellm.types.llms.anthropic import ANTHROPIC_API_HEADERS
|
from litellm.types.llms.anthropic import ANTHROPIC_API_HEADERS
|
||||||
from litellm.types.services import ServiceTypes
|
from litellm.types.services import ServiceTypes
|
||||||
|
@ -59,7 +60,7 @@ def _get_metadata_variable_name(request: Request) -> str:
|
||||||
|
|
||||||
For ALL other endpoints we call this "metadata
|
For ALL other endpoints we call this "metadata
|
||||||
"""
|
"""
|
||||||
if "thread" in request.url.path or "assistant" in request.url.path:
|
if RouteChecks._is_assistants_api_request(request):
|
||||||
return "litellm_metadata"
|
return "litellm_metadata"
|
||||||
if "batches" in request.url.path:
|
if "batches" in request.url.path:
|
||||||
return "litellm_metadata"
|
return "litellm_metadata"
|
||||||
|
@ -424,9 +425,9 @@ class LiteLLMProxyRequestSetup:
|
||||||
tags = [tag.strip() for tag in _tags]
|
tags = [tag.strip() for tag in _tags]
|
||||||
elif isinstance(headers["x-litellm-tags"], list):
|
elif isinstance(headers["x-litellm-tags"], list):
|
||||||
tags = headers["x-litellm-tags"]
|
tags = headers["x-litellm-tags"]
|
||||||
# Check request body for tags
|
# Check request body for tags
|
||||||
if "tags" in data and isinstance(data["tags"], list):
|
if "tags" in data and isinstance(data["tags"], list):
|
||||||
tags = data["tags"]
|
tags = data["tags"]
|
||||||
|
|
||||||
return tags
|
return tags
|
||||||
|
|
||||||
|
|
|
@ -127,7 +127,7 @@ async def new_user(
|
||||||
- user_role: Optional[str] - Specify a user role - "proxy_admin", "proxy_admin_viewer", "internal_user", "internal_user_viewer", "team", "customer". Info about each role here: `https://github.com/BerriAI/litellm/litellm/proxy/_types.py#L20`
|
- user_role: Optional[str] - Specify a user role - "proxy_admin", "proxy_admin_viewer", "internal_user", "internal_user_viewer", "team", "customer". Info about each role here: `https://github.com/BerriAI/litellm/litellm/proxy/_types.py#L20`
|
||||||
- max_budget: Optional[float] - Specify max budget for a given user.
|
- max_budget: Optional[float] - Specify max budget for a given user.
|
||||||
- budget_duration: Optional[str] - Budget is reset at the end of specified duration. If not set, budget is never reset. You can set duration as seconds ("30s"), minutes ("30m"), hours ("30h"), days ("30d"), months ("1mo").
|
- budget_duration: Optional[str] - Budget is reset at the end of specified duration. If not set, budget is never reset. You can set duration as seconds ("30s"), minutes ("30m"), hours ("30h"), days ("30d"), months ("1mo").
|
||||||
- models: Optional[list] - Model_name's a user is allowed to call. (if empty, key is allowed to call all models)
|
- models: Optional[list] - Model_name's a user is allowed to call. (if empty, key is allowed to call all models). Set to ['no-default-models'] to block all model access. Restricting user to only team-based model access.
|
||||||
- tpm_limit: Optional[int] - Specify tpm limit for a given user (Tokens per minute)
|
- tpm_limit: Optional[int] - Specify tpm limit for a given user (Tokens per minute)
|
||||||
- rpm_limit: Optional[int] - Specify rpm limit for a given user (Requests per minute)
|
- rpm_limit: Optional[int] - Specify rpm limit for a given user (Requests per minute)
|
||||||
- auto_create_key: bool - Default=True. Flag used for returning a key as part of the /user/new response
|
- auto_create_key: bool - Default=True. Flag used for returning a key as part of the /user/new response
|
||||||
|
@ -753,6 +753,9 @@ async def get_users(
|
||||||
role: Optional[str] = fastapi.Query(
|
role: Optional[str] = fastapi.Query(
|
||||||
default=None, description="Filter users by role"
|
default=None, description="Filter users by role"
|
||||||
),
|
),
|
||||||
|
user_ids: Optional[str] = fastapi.Query(
|
||||||
|
default=None, description="Get list of users by user_ids"
|
||||||
|
),
|
||||||
page: int = fastapi.Query(default=1, ge=1, description="Page number"),
|
page: int = fastapi.Query(default=1, ge=1, description="Page number"),
|
||||||
page_size: int = fastapi.Query(
|
page_size: int = fastapi.Query(
|
||||||
default=25, ge=1, le=100, description="Number of items per page"
|
default=25, ge=1, le=100, description="Number of items per page"
|
||||||
|
@ -770,12 +773,19 @@ async def get_users(
|
||||||
- proxy_admin_viewer
|
- proxy_admin_viewer
|
||||||
- internal_user
|
- internal_user
|
||||||
- internal_user_viewer
|
- internal_user_viewer
|
||||||
|
user_ids: Optional[str]
|
||||||
|
Get list of users by user_ids. Comma separated list of user_ids.
|
||||||
page: int
|
page: int
|
||||||
The page number to return
|
The page number to return
|
||||||
page_size: int
|
page_size: int
|
||||||
The number of items per page
|
The number of items per page
|
||||||
|
|
||||||
Currently - admin-only endpoint.
|
Currently - admin-only endpoint.
|
||||||
|
|
||||||
|
Example curl:
|
||||||
|
```
|
||||||
|
http://0.0.0.0:4000/user/list?user_ids=default_user_id,693c1a4a-1cc0-4c7c-afe8-b5d2c8d52e17
|
||||||
|
```
|
||||||
"""
|
"""
|
||||||
from litellm.proxy.proxy_server import prisma_client
|
from litellm.proxy.proxy_server import prisma_client
|
||||||
|
|
||||||
|
@ -787,49 +797,69 @@ async def get_users(
|
||||||
|
|
||||||
# Calculate skip and take for pagination
|
# Calculate skip and take for pagination
|
||||||
skip = (page - 1) * page_size
|
skip = (page - 1) * page_size
|
||||||
take = page_size
|
|
||||||
|
|
||||||
# Prepare the query conditions
|
# Prepare the query conditions
|
||||||
where_clause = ""
|
# Build where conditions based on provided parameters
|
||||||
|
where_conditions: Dict[str, Any] = {}
|
||||||
|
|
||||||
if role:
|
if role:
|
||||||
where_clause = f"""WHERE "user_role" = '{role}'"""
|
where_conditions["user_role"] = {
|
||||||
|
"contains": role,
|
||||||
|
"mode": "insensitive", # Case-insensitive search
|
||||||
|
}
|
||||||
|
|
||||||
# Single optimized SQL query that gets both users and total count
|
if user_ids and isinstance(user_ids, str):
|
||||||
sql_query = f"""
|
user_id_list = [uid.strip() for uid in user_ids.split(",") if uid.strip()]
|
||||||
WITH total_users AS (
|
where_conditions["user_id"] = {
|
||||||
SELECT COUNT(*) AS total_number_internal_users
|
"in": user_id_list, # Now passing a list of strings as required by Prisma
|
||||||
FROM "LiteLLM_UserTable"
|
}
|
||||||
),
|
|
||||||
paginated_users AS (
|
users: Optional[List[LiteLLM_UserTable]] = (
|
||||||
SELECT
|
await prisma_client.db.litellm_usertable.find_many(
|
||||||
u.*,
|
where=where_conditions,
|
||||||
(
|
skip=skip,
|
||||||
SELECT COUNT(*)
|
take=page_size,
|
||||||
FROM "LiteLLM_VerificationToken" vt
|
order={"created_at": "desc"},
|
||||||
WHERE vt."user_id" = u."user_id"
|
)
|
||||||
) AS key_count
|
|
||||||
FROM "LiteLLM_UserTable" u
|
|
||||||
{where_clause}
|
|
||||||
LIMIT {take} OFFSET {skip}
|
|
||||||
)
|
)
|
||||||
SELECT
|
|
||||||
(SELECT total_number_internal_users FROM total_users),
|
|
||||||
*
|
|
||||||
FROM paginated_users;
|
|
||||||
"""
|
|
||||||
|
|
||||||
# Execute the query
|
# Get total count of user rows
|
||||||
results = await prisma_client.db.query_raw(sql_query)
|
total_count = await prisma_client.db.litellm_usertable.count(
|
||||||
# Get total count from the first row (if results exist)
|
where=where_conditions # type: ignore
|
||||||
total_count = 0
|
)
|
||||||
if len(results) > 0:
|
|
||||||
total_count = results[0].get("total_number_internal_users")
|
# Get key count for each user
|
||||||
|
if users is not None:
|
||||||
|
user_keys = await prisma_client.db.litellm_verificationtoken.group_by(
|
||||||
|
by=["user_id"],
|
||||||
|
count={"user_id": True},
|
||||||
|
where={"user_id": {"in": [user.user_id for user in users]}},
|
||||||
|
)
|
||||||
|
user_key_counts = {
|
||||||
|
item["user_id"]: item["_count"]["user_id"] for item in user_keys
|
||||||
|
}
|
||||||
|
else:
|
||||||
|
user_key_counts = {}
|
||||||
|
|
||||||
|
verbose_proxy_logger.debug(f"Total count of users: {total_count}")
|
||||||
|
|
||||||
# Calculate total pages
|
# Calculate total pages
|
||||||
total_pages = -(-total_count // page_size) # Ceiling division
|
total_pages = -(-total_count // page_size) # Ceiling division
|
||||||
|
|
||||||
|
# Prepare response
|
||||||
|
user_list: List[LiteLLM_UserTableWithKeyCount] = []
|
||||||
|
if users is not None:
|
||||||
|
for user in users:
|
||||||
|
user_list.append(
|
||||||
|
LiteLLM_UserTableWithKeyCount(
|
||||||
|
**user.model_dump(), key_count=user_key_counts.get(user.user_id, 0)
|
||||||
|
)
|
||||||
|
) # Return full key object
|
||||||
|
else:
|
||||||
|
user_list = []
|
||||||
|
|
||||||
return {
|
return {
|
||||||
"users": results,
|
"users": user_list,
|
||||||
"total": total_count,
|
"total": total_count,
|
||||||
"page": page,
|
"page": page,
|
||||||
"page_size": page_size,
|
"page_size": page_size,
|
||||||
|
|
|
@ -518,6 +518,10 @@ async def generate_key_fn( # noqa: PLR0915
|
||||||
if "budget_duration" in data_json:
|
if "budget_duration" in data_json:
|
||||||
data_json["key_budget_duration"] = data_json.pop("budget_duration", None)
|
data_json["key_budget_duration"] = data_json.pop("budget_duration", None)
|
||||||
|
|
||||||
|
if user_api_key_dict.user_id is not None:
|
||||||
|
data_json["created_by"] = user_api_key_dict.user_id
|
||||||
|
data_json["updated_by"] = user_api_key_dict.user_id
|
||||||
|
|
||||||
# Set tags on the new key
|
# Set tags on the new key
|
||||||
if "tags" in data_json:
|
if "tags" in data_json:
|
||||||
from litellm.proxy.proxy_server import premium_user
|
from litellm.proxy.proxy_server import premium_user
|
||||||
|
@ -1122,6 +1126,8 @@ async def generate_key_helper_fn( # noqa: PLR0915
|
||||||
organization_id: Optional[str] = None,
|
organization_id: Optional[str] = None,
|
||||||
table_name: Optional[Literal["key", "user"]] = None,
|
table_name: Optional[Literal["key", "user"]] = None,
|
||||||
send_invite_email: Optional[bool] = None,
|
send_invite_email: Optional[bool] = None,
|
||||||
|
created_by: Optional[str] = None,
|
||||||
|
updated_by: Optional[str] = None,
|
||||||
):
|
):
|
||||||
from litellm.proxy.proxy_server import (
|
from litellm.proxy.proxy_server import (
|
||||||
litellm_proxy_budget_name,
|
litellm_proxy_budget_name,
|
||||||
|
@ -1225,6 +1231,8 @@ async def generate_key_helper_fn( # noqa: PLR0915
|
||||||
"model_max_budget": model_max_budget_json,
|
"model_max_budget": model_max_budget_json,
|
||||||
"budget_id": budget_id,
|
"budget_id": budget_id,
|
||||||
"blocked": blocked,
|
"blocked": blocked,
|
||||||
|
"created_by": created_by,
|
||||||
|
"updated_by": updated_by,
|
||||||
}
|
}
|
||||||
|
|
||||||
if (
|
if (
|
||||||
|
|
|
@ -14,6 +14,7 @@ from fastapi import APIRouter, Depends, HTTPException, Request, Response
|
||||||
import litellm
|
import litellm
|
||||||
from litellm.constants import BEDROCK_AGENT_RUNTIME_PASS_THROUGH_ROUTES
|
from litellm.constants import BEDROCK_AGENT_RUNTIME_PASS_THROUGH_ROUTES
|
||||||
from litellm.proxy._types import *
|
from litellm.proxy._types import *
|
||||||
|
from litellm.proxy.auth.route_checks import RouteChecks
|
||||||
from litellm.proxy.auth.user_api_key_auth import user_api_key_auth
|
from litellm.proxy.auth.user_api_key_auth import user_api_key_auth
|
||||||
from litellm.proxy.pass_through_endpoints.pass_through_endpoints import (
|
from litellm.proxy.pass_through_endpoints.pass_through_endpoints import (
|
||||||
create_pass_through_route,
|
create_pass_through_route,
|
||||||
|
@ -397,7 +398,7 @@ async def azure_proxy_route(
|
||||||
)
|
)
|
||||||
# Add or update query parameters
|
# Add or update query parameters
|
||||||
azure_api_key = passthrough_endpoint_router.get_credentials(
|
azure_api_key = passthrough_endpoint_router.get_credentials(
|
||||||
custom_llm_provider="azure",
|
custom_llm_provider=litellm.LlmProviders.AZURE.value,
|
||||||
region_name=None,
|
region_name=None,
|
||||||
)
|
)
|
||||||
if azure_api_key is None:
|
if azure_api_key is None:
|
||||||
|
@ -405,13 +406,14 @@ async def azure_proxy_route(
|
||||||
"Required 'AZURE_API_KEY' in environment to make pass-through calls to Azure."
|
"Required 'AZURE_API_KEY' in environment to make pass-through calls to Azure."
|
||||||
)
|
)
|
||||||
|
|
||||||
return await _base_openai_pass_through_handler(
|
return await BaseOpenAIPassThroughHandler._base_openai_pass_through_handler(
|
||||||
endpoint=endpoint,
|
endpoint=endpoint,
|
||||||
request=request,
|
request=request,
|
||||||
fastapi_response=fastapi_response,
|
fastapi_response=fastapi_response,
|
||||||
user_api_key_dict=user_api_key_dict,
|
user_api_key_dict=user_api_key_dict,
|
||||||
base_target_url=base_target_url,
|
base_target_url=base_target_url,
|
||||||
api_key=azure_api_key,
|
api_key=azure_api_key,
|
||||||
|
custom_llm_provider=litellm.LlmProviders.AZURE,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@ -431,10 +433,10 @@ async def openai_proxy_route(
|
||||||
|
|
||||||
|
|
||||||
"""
|
"""
|
||||||
base_target_url = "https://api.openai.com"
|
base_target_url = "https://api.openai.com/"
|
||||||
# Add or update query parameters
|
# Add or update query parameters
|
||||||
openai_api_key = passthrough_endpoint_router.get_credentials(
|
openai_api_key = passthrough_endpoint_router.get_credentials(
|
||||||
custom_llm_provider="openai",
|
custom_llm_provider=litellm.LlmProviders.OPENAI.value,
|
||||||
region_name=None,
|
region_name=None,
|
||||||
)
|
)
|
||||||
if openai_api_key is None:
|
if openai_api_key is None:
|
||||||
|
@ -442,54 +444,113 @@ async def openai_proxy_route(
|
||||||
"Required 'OPENAI_API_KEY' in environment to make pass-through calls to OpenAI."
|
"Required 'OPENAI_API_KEY' in environment to make pass-through calls to OpenAI."
|
||||||
)
|
)
|
||||||
|
|
||||||
return await _base_openai_pass_through_handler(
|
return await BaseOpenAIPassThroughHandler._base_openai_pass_through_handler(
|
||||||
endpoint=endpoint,
|
endpoint=endpoint,
|
||||||
request=request,
|
request=request,
|
||||||
fastapi_response=fastapi_response,
|
fastapi_response=fastapi_response,
|
||||||
user_api_key_dict=user_api_key_dict,
|
user_api_key_dict=user_api_key_dict,
|
||||||
base_target_url=base_target_url,
|
base_target_url=base_target_url,
|
||||||
api_key=openai_api_key,
|
api_key=openai_api_key,
|
||||||
|
custom_llm_provider=litellm.LlmProviders.OPENAI,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
async def _base_openai_pass_through_handler(
|
class BaseOpenAIPassThroughHandler:
|
||||||
endpoint: str,
|
@staticmethod
|
||||||
request: Request,
|
async def _base_openai_pass_through_handler(
|
||||||
fastapi_response: Response,
|
endpoint: str,
|
||||||
user_api_key_dict: UserAPIKeyAuth,
|
request: Request,
|
||||||
base_target_url: str,
|
fastapi_response: Response,
|
||||||
api_key: str,
|
user_api_key_dict: UserAPIKeyAuth,
|
||||||
):
|
base_target_url: str,
|
||||||
encoded_endpoint = httpx.URL(endpoint).path
|
api_key: str,
|
||||||
|
custom_llm_provider: litellm.LlmProviders,
|
||||||
|
):
|
||||||
|
encoded_endpoint = httpx.URL(endpoint).path
|
||||||
|
# Ensure endpoint starts with '/' for proper URL construction
|
||||||
|
if not encoded_endpoint.startswith("/"):
|
||||||
|
encoded_endpoint = "/" + encoded_endpoint
|
||||||
|
|
||||||
# Ensure endpoint starts with '/' for proper URL construction
|
# Construct the full target URL by properly joining the base URL and endpoint path
|
||||||
if not encoded_endpoint.startswith("/"):
|
base_url = httpx.URL(base_target_url)
|
||||||
encoded_endpoint = "/" + encoded_endpoint
|
updated_url = BaseOpenAIPassThroughHandler._join_url_paths(
|
||||||
|
base_url=base_url,
|
||||||
|
path=encoded_endpoint,
|
||||||
|
custom_llm_provider=custom_llm_provider,
|
||||||
|
)
|
||||||
|
|
||||||
# Construct the full target URL using httpx
|
## check for streaming
|
||||||
base_url = httpx.URL(base_target_url)
|
is_streaming_request = False
|
||||||
updated_url = base_url.copy_with(path=encoded_endpoint)
|
if "stream" in str(updated_url):
|
||||||
|
is_streaming_request = True
|
||||||
|
|
||||||
## check for streaming
|
## CREATE PASS-THROUGH
|
||||||
is_streaming_request = False
|
endpoint_func = create_pass_through_route(
|
||||||
if "stream" in str(updated_url):
|
endpoint=endpoint,
|
||||||
is_streaming_request = True
|
target=str(updated_url),
|
||||||
|
custom_headers=BaseOpenAIPassThroughHandler._assemble_headers(
|
||||||
|
api_key=api_key, request=request
|
||||||
|
),
|
||||||
|
) # dynamically construct pass-through endpoint based on incoming path
|
||||||
|
received_value = await endpoint_func(
|
||||||
|
request,
|
||||||
|
fastapi_response,
|
||||||
|
user_api_key_dict,
|
||||||
|
stream=is_streaming_request, # type: ignore
|
||||||
|
query_params=dict(request.query_params), # type: ignore
|
||||||
|
)
|
||||||
|
|
||||||
## CREATE PASS-THROUGH
|
return received_value
|
||||||
endpoint_func = create_pass_through_route(
|
|
||||||
endpoint=endpoint,
|
@staticmethod
|
||||||
target=str(updated_url),
|
def _append_openai_beta_header(headers: dict, request: Request) -> dict:
|
||||||
custom_headers={
|
"""
|
||||||
|
Appends the OpenAI-Beta header to the headers if the request is an OpenAI Assistants API request
|
||||||
|
"""
|
||||||
|
if (
|
||||||
|
RouteChecks._is_assistants_api_request(request) is True
|
||||||
|
and "OpenAI-Beta" not in headers
|
||||||
|
):
|
||||||
|
headers["OpenAI-Beta"] = "assistants=v2"
|
||||||
|
return headers
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _assemble_headers(api_key: str, request: Request) -> dict:
|
||||||
|
base_headers = {
|
||||||
"authorization": "Bearer {}".format(api_key),
|
"authorization": "Bearer {}".format(api_key),
|
||||||
"api-key": "{}".format(api_key),
|
"api-key": "{}".format(api_key),
|
||||||
},
|
}
|
||||||
) # dynamically construct pass-through endpoint based on incoming path
|
return BaseOpenAIPassThroughHandler._append_openai_beta_header(
|
||||||
received_value = await endpoint_func(
|
headers=base_headers,
|
||||||
request,
|
request=request,
|
||||||
fastapi_response,
|
)
|
||||||
user_api_key_dict,
|
|
||||||
stream=is_streaming_request, # type: ignore
|
|
||||||
query_params=dict(request.query_params), # type: ignore
|
|
||||||
)
|
|
||||||
|
|
||||||
return received_value
|
@staticmethod
|
||||||
|
def _join_url_paths(
|
||||||
|
base_url: httpx.URL, path: str, custom_llm_provider: litellm.LlmProviders
|
||||||
|
) -> str:
|
||||||
|
"""
|
||||||
|
Properly joins a base URL with a path, preserving any existing path in the base URL.
|
||||||
|
"""
|
||||||
|
# Join paths correctly by removing trailing/leading slashes as needed
|
||||||
|
if not base_url.path or base_url.path == "/":
|
||||||
|
# If base URL has no path, just use the new path
|
||||||
|
joined_path_str = str(base_url.copy_with(path=path))
|
||||||
|
else:
|
||||||
|
# Otherwise, combine the paths
|
||||||
|
base_path = base_url.path.rstrip("/")
|
||||||
|
clean_path = path.lstrip("/")
|
||||||
|
full_path = f"{base_path}/{clean_path}"
|
||||||
|
joined_path_str = str(base_url.copy_with(path=full_path))
|
||||||
|
|
||||||
|
# Apply OpenAI-specific path handling for both branches
|
||||||
|
if (
|
||||||
|
custom_llm_provider == litellm.LlmProviders.OPENAI
|
||||||
|
and "/v1/" not in joined_path_str
|
||||||
|
):
|
||||||
|
# Insert v1 after api.openai.com for OpenAI requests
|
||||||
|
joined_path_str = joined_path_str.replace(
|
||||||
|
"api.openai.com/", "api.openai.com/v1/"
|
||||||
|
)
|
||||||
|
|
||||||
|
return joined_path_str
|
||||||
|
|
Some files were not shown because too many files have changed in this diff Show more
Loading…
Add table
Add a link
Reference in a new issue