Merge branch 'BerriAI:main' into main
|
@ -1935,12 +1935,12 @@ jobs:
|
|||
pip install prisma
|
||||
pip install fastapi
|
||||
pip install jsonschema
|
||||
pip install "httpx==0.24.1"
|
||||
pip install "httpx==0.27.0"
|
||||
pip install "anyio==3.7.1"
|
||||
pip install "asyncio==3.4.3"
|
||||
pip install "PyGithub==1.59.1"
|
||||
pip install "google-cloud-aiplatform==1.59.0"
|
||||
pip install "anthropic==0.21.3"
|
||||
pip install "anthropic==0.49.0"
|
||||
# Run pytest and generate JUnit XML report
|
||||
- run:
|
||||
name: Build Docker image
|
||||
|
|
21
Makefile
Normal file
|
@ -0,0 +1,21 @@
|
|||
# LiteLLM Makefile
|
||||
# Simple Makefile for running tests and basic development tasks
|
||||
|
||||
.PHONY: help test test-unit test-integration
|
||||
|
||||
# Default target
|
||||
help:
|
||||
@echo "Available commands:"
|
||||
@echo " make test - Run all tests"
|
||||
@echo " make test-unit - Run unit tests"
|
||||
@echo " make test-integration - Run integration tests"
|
||||
|
||||
# Testing
|
||||
test:
|
||||
poetry run pytest tests/
|
||||
|
||||
test-unit:
|
||||
poetry run pytest tests/litellm/
|
||||
|
||||
test-integration:
|
||||
poetry run pytest tests/ -k "not litellm"
|
92
docs/my-website/docs/anthropic_unified.md
Normal file
|
@ -0,0 +1,92 @@
|
|||
import Tabs from '@theme/Tabs';
|
||||
import TabItem from '@theme/TabItem';
|
||||
|
||||
# [BETA] `/v1/messages`
|
||||
|
||||
LiteLLM provides a BETA endpoint in the spec of Anthropic's `/v1/messages` endpoint.
|
||||
|
||||
This currently just supports the Anthropic API.
|
||||
|
||||
| Feature | Supported | Notes |
|
||||
|-------|-------|-------|
|
||||
| Cost Tracking | ✅ | |
|
||||
| Logging | ✅ | works across all integrations |
|
||||
| End-user Tracking | ✅ | |
|
||||
| Streaming | ✅ | |
|
||||
| Fallbacks | ✅ | between anthropic models |
|
||||
| Loadbalancing | ✅ | between anthropic models |
|
||||
|
||||
Planned improvement:
|
||||
- Vertex AI Anthropic support
|
||||
- Bedrock Anthropic support
|
||||
|
||||
## Usage
|
||||
|
||||
<Tabs>
|
||||
<TabItem label="PROXY" value="proxy">
|
||||
|
||||
1. Setup config.yaml
|
||||
|
||||
```yaml
|
||||
model_list:
|
||||
- model_name: anthropic-claude
|
||||
litellm_params:
|
||||
model: claude-3-7-sonnet-latest
|
||||
```
|
||||
|
||||
2. Start proxy
|
||||
|
||||
```bash
|
||||
litellm --config /path/to/config.yaml
|
||||
```
|
||||
|
||||
3. Test it!
|
||||
|
||||
```bash
|
||||
curl -L -X POST 'http://0.0.0.0:4000/v1/messages' \
|
||||
-H 'content-type: application/json' \
|
||||
-H 'x-api-key: $LITELLM_API_KEY' \
|
||||
-H 'anthropic-version: 2023-06-01' \
|
||||
-d '{
|
||||
"model": "anthropic-claude",
|
||||
"messages": [
|
||||
{
|
||||
"role": "user",
|
||||
"content": [
|
||||
{
|
||||
"type": "text",
|
||||
"text": "List 5 important events in the XIX century"
|
||||
}
|
||||
]
|
||||
}
|
||||
],
|
||||
"max_tokens": 4096
|
||||
}'
|
||||
```
|
||||
</TabItem>
|
||||
<TabItem value="sdk" label="SDK">
|
||||
|
||||
```python
|
||||
from litellm.llms.anthropic.experimental_pass_through.messages.handler import anthropic_messages
|
||||
import asyncio
|
||||
import os
|
||||
|
||||
# set env
|
||||
os.environ["ANTHROPIC_API_KEY"] = "my-api-key"
|
||||
|
||||
messages = [{"role": "user", "content": "Hello, can you tell me a short joke?"}]
|
||||
|
||||
# Call the handler
|
||||
async def call():
|
||||
response = await anthropic_messages(
|
||||
messages=messages,
|
||||
api_key=api_key,
|
||||
model="claude-3-haiku-20240307",
|
||||
max_tokens=100,
|
||||
)
|
||||
|
||||
asyncio.run(call())
|
||||
```
|
||||
|
||||
</TabItem>
|
||||
</Tabs>
|
|
@ -190,3 +190,137 @@ Expected Response
|
|||
|
||||
</TabItem>
|
||||
</Tabs>
|
||||
|
||||
|
||||
## Explicitly specify image type
|
||||
|
||||
If you have images without a mime-type, or if litellm is incorrectly inferring the mime type of your image (e.g. calling `gs://` url's with vertex ai), you can set this explicity via the `format` param.
|
||||
|
||||
```python
|
||||
"image_url": {
|
||||
"url": "gs://my-gs-image",
|
||||
"format": "image/jpeg"
|
||||
}
|
||||
```
|
||||
|
||||
LiteLLM will use this for any API endpoint, which supports specifying mime-type (e.g. anthropic/bedrock/vertex ai).
|
||||
|
||||
For others (e.g. openai), it will be ignored.
|
||||
|
||||
<Tabs>
|
||||
<TabItem label="SDK" value="sdk">
|
||||
|
||||
```python
|
||||
import os
|
||||
from litellm import completion
|
||||
|
||||
os.environ["ANTHROPIC_API_KEY"] = "your-api-key"
|
||||
|
||||
# openai call
|
||||
response = completion(
|
||||
model = "claude-3-7-sonnet-latest",
|
||||
messages=[
|
||||
{
|
||||
"role": "user",
|
||||
"content": [
|
||||
{
|
||||
"type": "text",
|
||||
"text": "What’s in this image?"
|
||||
},
|
||||
{
|
||||
"type": "image_url",
|
||||
"image_url": {
|
||||
"url": "https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/2560px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg",
|
||||
"format": "image/jpeg"
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
],
|
||||
)
|
||||
|
||||
```
|
||||
|
||||
</TabItem>
|
||||
<TabItem label="PROXY" value="proxy">
|
||||
|
||||
1. Define vision models on config.yaml
|
||||
|
||||
```yaml
|
||||
model_list:
|
||||
- model_name: gpt-4-vision-preview # OpenAI gpt-4-vision-preview
|
||||
litellm_params:
|
||||
model: openai/gpt-4-vision-preview
|
||||
api_key: os.environ/OPENAI_API_KEY
|
||||
- model_name: llava-hf # Custom OpenAI compatible model
|
||||
litellm_params:
|
||||
model: openai/llava-hf/llava-v1.6-vicuna-7b-hf
|
||||
api_base: http://localhost:8000
|
||||
api_key: fake-key
|
||||
model_info:
|
||||
supports_vision: True # set supports_vision to True so /model/info returns this attribute as True
|
||||
|
||||
```
|
||||
|
||||
2. Run proxy server
|
||||
|
||||
```bash
|
||||
litellm --config config.yaml
|
||||
```
|
||||
|
||||
3. Test it using the OpenAI Python SDK
|
||||
|
||||
|
||||
```python
|
||||
import os
|
||||
from openai import OpenAI
|
||||
|
||||
client = OpenAI(
|
||||
api_key="sk-1234", # your litellm proxy api key
|
||||
)
|
||||
|
||||
response = client.chat.completions.create(
|
||||
model = "gpt-4-vision-preview", # use model="llava-hf" to test your custom OpenAI endpoint
|
||||
messages=[
|
||||
{
|
||||
"role": "user",
|
||||
"content": [
|
||||
{
|
||||
"type": "text",
|
||||
"text": "What’s in this image?"
|
||||
},
|
||||
{
|
||||
"type": "image_url",
|
||||
"image_url": {
|
||||
"url": "https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/2560px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg",
|
||||
"format": "image/jpeg"
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
],
|
||||
)
|
||||
|
||||
```
|
||||
|
||||
|
||||
|
||||
|
||||
</TabItem>
|
||||
</Tabs>
|
||||
|
||||
|
||||
|
||||
## Spec
|
||||
|
||||
```
|
||||
"image_url": str
|
||||
|
||||
OR
|
||||
|
||||
"image_url": {
|
||||
"url": "url OR base64 encoded str",
|
||||
"detail": "openai-only param",
|
||||
"format": "specify mime-type of image"
|
||||
}
|
||||
```
|
|
@ -46,7 +46,7 @@ For security inquiries, please contact us at support@berri.ai
|
|||
|-------------------|-------------------------------------------------------------------------------------------------|
|
||||
| SOC 2 Type I | Certified. Report available upon request on Enterprise plan. |
|
||||
| SOC 2 Type II | In progress. Certificate available by April 15th, 2025 |
|
||||
| ISO27001 | In progress. Certificate available by February 7th, 2025 |
|
||||
| ISO 27001 | Certified. Report available upon request on Enterprise |
|
||||
|
||||
|
||||
## Supported Data Regions for LiteLLM Cloud
|
||||
|
@ -137,7 +137,7 @@ Point of contact email address for general security-related questions: krrish@be
|
|||
Has the Vendor been audited / certified?
|
||||
- SOC 2 Type I. Certified. Report available upon request on Enterprise plan.
|
||||
- SOC 2 Type II. In progress. Certificate available by April 15th, 2025.
|
||||
- ISO27001. In progress. Certificate available by February 7th, 2025.
|
||||
- ISO 27001. Certified. Report available upon request on Enterprise plan.
|
||||
|
||||
Has an information security management system been implemented?
|
||||
- Yes - [CodeQL](https://codeql.github.com/) and a comprehensive ISMS covering multiple security domains.
|
||||
|
|
5
docs/my-website/docs/projects/PDL.md
Normal file
|
@ -0,0 +1,5 @@
|
|||
PDL - A YAML-based approach to prompt programming
|
||||
|
||||
Github: https://github.com/IBM/prompt-declaration-language
|
||||
|
||||
PDL is a declarative approach to prompt programming, helping users to accumulate messages implicitly, with support for model chaining and tool use.
|
9
docs/my-website/docs/projects/pgai.md
Normal file
|
@ -0,0 +1,9 @@
|
|||
# pgai
|
||||
|
||||
[pgai](https://github.com/timescale/pgai) is a suite of tools to develop RAG, semantic search, and other AI applications more easily with PostgreSQL.
|
||||
|
||||
If you don't know what pgai is yet check out the [README](https://github.com/timescale/pgai)!
|
||||
|
||||
If you're already familiar with pgai, you can find litellm specific docs here:
|
||||
- Litellm for [model calling](https://github.com/timescale/pgai/blob/main/docs/model_calling/litellm.md) in pgai
|
||||
- Use the [litellm provider](https://github.com/timescale/pgai/blob/main/docs/vectorizer/api-reference.md#aiembedding_litellm) to automatically create embeddings for your data via the pgai vectorizer.
|
|
@ -286,9 +286,12 @@ print(response)
|
|||
</TabItem>
|
||||
</Tabs>
|
||||
|
||||
## Usage - Function Calling
|
||||
## Usage - Function Calling / Tool calling
|
||||
|
||||
LiteLLM uses Bedrock's Converse API for making tool calls
|
||||
LiteLLM supports tool calling via Bedrock's Converse and Invoke API's.
|
||||
|
||||
<Tabs>
|
||||
<TabItem value="sdk" label="SDK">
|
||||
|
||||
```python
|
||||
from litellm import completion
|
||||
|
@ -333,6 +336,69 @@ assert isinstance(
|
|||
response.choices[0].message.tool_calls[0].function.arguments, str
|
||||
)
|
||||
```
|
||||
</TabItem>
|
||||
<TabItem value="proxy" label="PROXY">
|
||||
|
||||
1. Setup config.yaml
|
||||
|
||||
```yaml
|
||||
model_list:
|
||||
- model_name: bedrock-claude-3-7
|
||||
litellm_params:
|
||||
model: bedrock/us.anthropic.claude-3-7-sonnet-20250219-v1:0 # for bedrock invoke, specify `bedrock/invoke/<model>`
|
||||
```
|
||||
|
||||
2. Start proxy
|
||||
|
||||
```bash
|
||||
litellm --config /path/to/config.yaml
|
||||
```
|
||||
|
||||
3. Test it!
|
||||
|
||||
```bash
|
||||
curl http://0.0.0.0:4000/v1/chat/completions \
|
||||
-H "Content-Type: application/json" \
|
||||
-H "Authorization: Bearer $LITELLM_API_KEY" \
|
||||
-d '{
|
||||
"model": "bedrock-claude-3-7",
|
||||
"messages": [
|
||||
{
|
||||
"role": "user",
|
||||
"content": "What'\''s the weather like in Boston today?"
|
||||
}
|
||||
],
|
||||
"tools": [
|
||||
{
|
||||
"type": "function",
|
||||
"function": {
|
||||
"name": "get_current_weather",
|
||||
"description": "Get the current weather in a given location",
|
||||
"parameters": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"location": {
|
||||
"type": "string",
|
||||
"description": "The city and state, e.g. San Francisco, CA"
|
||||
},
|
||||
"unit": {
|
||||
"type": "string",
|
||||
"enum": ["celsius", "fahrenheit"]
|
||||
}
|
||||
},
|
||||
"required": ["location"]
|
||||
}
|
||||
}
|
||||
}
|
||||
],
|
||||
"tool_choice": "auto"
|
||||
}'
|
||||
|
||||
```
|
||||
|
||||
|
||||
</TabItem>
|
||||
</Tabs>
|
||||
|
||||
|
||||
## Usage - Vision
|
||||
|
@ -390,9 +456,9 @@ Returns 2 new fields in `message` and `delta` object:
|
|||
Each object has the following fields:
|
||||
- `type` - Literal["thinking"] - The type of thinking block
|
||||
- `thinking` - string - The thinking of the response. Also returned in `reasoning_content`
|
||||
- `signature_delta` - string - A base64 encoded string, returned by Anthropic.
|
||||
- `signature` - string - A base64 encoded string, returned by Anthropic.
|
||||
|
||||
The `signature_delta` is required by Anthropic on subsequent calls, if 'thinking' content is passed in (only required to use `thinking` with tool calling). [Learn more](https://docs.anthropic.com/en/docs/build-with-claude/extended-thinking#understanding-thinking-blocks)
|
||||
The `signature` is required by Anthropic on subsequent calls, if 'thinking' content is passed in (only required to use `thinking` with tool calling). [Learn more](https://docs.anthropic.com/en/docs/build-with-claude/extended-thinking#understanding-thinking-blocks)
|
||||
|
||||
<Tabs>
|
||||
<TabItem value="sdk" label="SDK">
|
||||
|
@ -475,7 +541,7 @@ Same as [Anthropic API response](../providers/anthropic#usage---thinking--reason
|
|||
{
|
||||
"type": "thinking",
|
||||
"thinking": "The capital of France is Paris. This is a straightforward factual question.",
|
||||
"signature_delta": "EqoBCkgIARABGAIiQL2UoU0b1OHYi+yCHpBY7U6FQW8/FcoLewocJQPa2HnmLM+NECy50y44F/kD4SULFXi57buI9fAvyBwtyjlOiO0SDE3+r3spdg6PLOo9PBoMma2ku5OTAoR46j9VIjDRlvNmBvff7YW4WI9oU8XagaOBSxLPxElrhyuxppEn7m6bfT40dqBSTDrfiw4FYB4qEPETTI6TA6wtjGAAqmFqKTo="
|
||||
"signature": "EqoBCkgIARABGAIiQL2UoU0b1OHYi+yCHpBY7U6FQW8/FcoLewocJQPa2HnmLM+NECy50y44F/kD4SULFXi57buI9fAvyBwtyjlOiO0SDE3+r3spdg6PLOo9PBoMma2ku5OTAoR46j9VIjDRlvNmBvff7YW4WI9oU8XagaOBSxLPxElrhyuxppEn7m6bfT40dqBSTDrfiw4FYB4qEPETTI6TA6wtjGAAqmFqKTo="
|
||||
}
|
||||
]
|
||||
}
|
||||
|
@ -492,6 +558,111 @@ Same as [Anthropic API response](../providers/anthropic#usage---thinking--reason
|
|||
```
|
||||
|
||||
|
||||
## Usage - Structured Output / JSON mode
|
||||
|
||||
<Tabs>
|
||||
<TabItem value="sdk" label="SDK">
|
||||
|
||||
```python
|
||||
from litellm import completion
|
||||
import os
|
||||
from pydantic import BaseModel
|
||||
|
||||
# set env
|
||||
os.environ["AWS_ACCESS_KEY_ID"] = ""
|
||||
os.environ["AWS_SECRET_ACCESS_KEY"] = ""
|
||||
os.environ["AWS_REGION_NAME"] = ""
|
||||
|
||||
class CalendarEvent(BaseModel):
|
||||
name: str
|
||||
date: str
|
||||
participants: list[str]
|
||||
|
||||
class EventsList(BaseModel):
|
||||
events: list[CalendarEvent]
|
||||
|
||||
response = completion(
|
||||
model="bedrock/anthropic.claude-3-7-sonnet-20250219-v1:0", # specify invoke via `bedrock/invoke/anthropic.claude-3-7-sonnet-20250219-v1:0`
|
||||
response_format=EventsList,
|
||||
messages=[
|
||||
{"role": "system", "content": "You are a helpful assistant designed to output JSON."},
|
||||
{"role": "user", "content": "Who won the world series in 2020?"}
|
||||
],
|
||||
)
|
||||
print(response.choices[0].message.content)
|
||||
```
|
||||
</TabItem>
|
||||
<TabItem value="proxy" label="PROXY">
|
||||
|
||||
1. Setup config.yaml
|
||||
|
||||
```yaml
|
||||
model_list:
|
||||
- model_name: bedrock-claude-3-7
|
||||
litellm_params:
|
||||
model: bedrock/us.anthropic.claude-3-7-sonnet-20250219-v1:0 # specify invoke via `bedrock/invoke/<model_name>`
|
||||
aws_access_key_id: os.environ/CUSTOM_AWS_ACCESS_KEY_ID
|
||||
aws_secret_access_key: os.environ/CUSTOM_AWS_SECRET_ACCESS_KEY
|
||||
aws_region_name: os.environ/CUSTOM_AWS_REGION_NAME
|
||||
```
|
||||
|
||||
2. Start proxy
|
||||
|
||||
```bash
|
||||
litellm --config /path/to/config.yaml
|
||||
```
|
||||
|
||||
3. Test it!
|
||||
|
||||
```bash
|
||||
curl http://0.0.0.0:4000/v1/chat/completions \
|
||||
-H "Content-Type: application/json" \
|
||||
-H "Authorization: Bearer $LITELLM_KEY" \
|
||||
-d '{
|
||||
"model": "bedrock-claude-3-7",
|
||||
"messages": [
|
||||
{
|
||||
"role": "system",
|
||||
"content": "You are a helpful assistant designed to output JSON."
|
||||
},
|
||||
{
|
||||
"role": "user",
|
||||
"content": "Who won the worlde series in 2020?"
|
||||
}
|
||||
],
|
||||
"response_format": {
|
||||
"type": "json_schema",
|
||||
"json_schema": {
|
||||
"name": "math_reasoning",
|
||||
"description": "reason about maths",
|
||||
"schema": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"steps": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"explanation": { "type": "string" },
|
||||
"output": { "type": "string" }
|
||||
},
|
||||
"required": ["explanation", "output"],
|
||||
"additionalProperties": false
|
||||
}
|
||||
},
|
||||
"final_answer": { "type": "string" }
|
||||
},
|
||||
"required": ["steps", "final_answer"],
|
||||
"additionalProperties": false
|
||||
},
|
||||
"strict": true
|
||||
}
|
||||
}
|
||||
}'
|
||||
```
|
||||
</TabItem>
|
||||
</Tabs>
|
||||
|
||||
## Usage - Bedrock Guardrails
|
||||
|
||||
Example of using [Bedrock Guardrails with LiteLLM](https://docs.aws.amazon.com/bedrock/latest/userguide/guardrails-use-converse-api.html)
|
||||
|
|
|
@ -1686,6 +1686,14 @@ assert isinstance(
|
|||
|
||||
Pass any file supported by Vertex AI, through LiteLLM.
|
||||
|
||||
LiteLLM Supports the following image types passed in url
|
||||
|
||||
```
|
||||
Images with Cloud Storage URIs - gs://cloud-samples-data/generative-ai/image/boats.jpeg
|
||||
Images with direct links - https://storage.googleapis.com/github-repo/img/gemini/intro/landmark3.jpg
|
||||
Videos with Cloud Storage URIs - https://storage.googleapis.com/github-repo/img/gemini/multimodality_usecases_overview/pixel8.mp4
|
||||
Base64 Encoded Local Images
|
||||
```
|
||||
|
||||
<Tabs>
|
||||
<TabItem value="sdk" label="SDK">
|
||||
|
|
|
@ -46,18 +46,17 @@ You can see the full DB Schema [here](https://github.com/BerriAI/litellm/blob/ma
|
|||
|
||||
| Table Name | Description | Row Insert Frequency |
|
||||
|------------|-------------|---------------------|
|
||||
| LiteLLM_SpendLogs | Detailed logs of all API requests. Records token usage, spend, and timing information. Tracks which models and keys were used. | **High - every LLM API request** |
|
||||
| LiteLLM_ErrorLogs | Captures failed requests and errors. Stores exception details and request information. Helps with debugging and monitoring. | **Medium - on errors only** |
|
||||
| LiteLLM_SpendLogs | Detailed logs of all API requests. Records token usage, spend, and timing information. Tracks which models and keys were used. | **High - every LLM API request - Success or Failure** |
|
||||
| LiteLLM_AuditLog | Tracks changes to system configuration. Records who made changes and what was modified. Maintains history of updates to teams, users, and models. | **Off by default**, **High - when enabled** |
|
||||
|
||||
## Disable `LiteLLM_SpendLogs` & `LiteLLM_ErrorLogs`
|
||||
## Disable `LiteLLM_SpendLogs`
|
||||
|
||||
You can disable spend_logs and error_logs by setting `disable_spend_logs` and `disable_error_logs` to `True` on the `general_settings` section of your proxy_config.yaml file.
|
||||
|
||||
```yaml
|
||||
general_settings:
|
||||
disable_spend_logs: True # Disable writing spend logs to DB
|
||||
disable_error_logs: True # Disable writing error logs to DB
|
||||
disable_error_logs: True # Only disable writing error logs to DB, regular spend logs will still be written unless `disable_spend_logs: True`
|
||||
```
|
||||
|
||||
### What is the impact of disabling these logs?
|
||||
|
|
|
@ -78,6 +78,7 @@ Inherits from `StandardLoggingUserAPIKeyMetadata` and adds:
|
|||
| `api_base` | `Optional[str]` | Optional API base URL |
|
||||
| `response_cost` | `Optional[str]` | Optional response cost |
|
||||
| `additional_headers` | `Optional[StandardLoggingAdditionalHeaders]` | Additional headers |
|
||||
| `batch_models` | `Optional[List[str]]` | Only set for Batches API. Lists the models used for cost calculation |
|
||||
|
||||
## StandardLoggingModelInformation
|
||||
|
||||
|
|
53
docs/my-website/docs/proxy/master_key_rotations.md
Normal file
|
@ -0,0 +1,53 @@
|
|||
# Rotating Master Key
|
||||
|
||||
Here are our recommended steps for rotating your master key.
|
||||
|
||||
|
||||
**1. Backup your DB**
|
||||
In case of any errors during the encryption/de-encryption process, this will allow you to revert back to current state without issues.
|
||||
|
||||
**2. Call `/key/regenerate` with the new master key**
|
||||
|
||||
```bash
|
||||
curl -L -X POST 'http://localhost:4000/key/regenerate' \
|
||||
-H 'Authorization: Bearer sk-1234' \
|
||||
-H 'Content-Type: application/json' \
|
||||
-d '{
|
||||
"key": "sk-1234",
|
||||
"new_master_key": "sk-PIp1h0RekR"
|
||||
}'
|
||||
```
|
||||
|
||||
This will re-encrypt any models in your Proxy_ModelTable with the new master key.
|
||||
|
||||
Expect to start seeing decryption errors in logs, as your old master key is no longer able to decrypt the new values.
|
||||
|
||||
```bash
|
||||
raise Exception("Unable to decrypt value={}".format(v))
|
||||
Exception: Unable to decrypt value=<new-encrypted-value>
|
||||
```
|
||||
|
||||
**3. Update LITELLM_MASTER_KEY**
|
||||
|
||||
In your environment variables update the value of LITELLM_MASTER_KEY to the new_master_key from Step 2.
|
||||
|
||||
This ensures the key used for decryption from db is the new key.
|
||||
|
||||
**4. Test it**
|
||||
|
||||
Make a test request to a model stored on proxy with a litellm key (new master key or virtual key) and see if it works
|
||||
|
||||
```bash
|
||||
curl -L -X POST 'http://0.0.0.0:4000/v1/chat/completions' \
|
||||
-H 'Content-Type: application/json' \
|
||||
-H 'Authorization: Bearer sk-1234' \
|
||||
-d '{
|
||||
"model": "gpt-4o-mini", # 👈 REPLACE with 'public model name' for any db-model
|
||||
"messages": [
|
||||
{
|
||||
"content": "Hey, how's it going",
|
||||
"role": "user"
|
||||
}
|
||||
],
|
||||
}'
|
||||
```
|
|
@ -107,9 +107,9 @@ general_settings:
|
|||
|
||||
By default, LiteLLM writes several types of logs to the database:
|
||||
- Every LLM API request to the `LiteLLM_SpendLogs` table
|
||||
- LLM Exceptions to the `LiteLLM_LogsErrors` table
|
||||
- LLM Exceptions to the `LiteLLM_SpendLogs` table
|
||||
|
||||
If you're not viewing these logs on the LiteLLM UI (most users use Prometheus for monitoring), you can disable them by setting the following flags to `True`:
|
||||
If you're not viewing these logs on the LiteLLM UI, you can disable them by setting the following flags to `True`:
|
||||
|
||||
```yaml
|
||||
general_settings:
|
||||
|
|
|
@ -4,7 +4,7 @@ Litellm Proxy has the following release cycle:
|
|||
|
||||
- `v1.x.x-nightly`: These are releases which pass ci/cd.
|
||||
- `v1.x.x.rc`: These are releases which pass ci/cd + [manual review](https://github.com/BerriAI/litellm/discussions/8495#discussioncomment-12180711).
|
||||
- `v1.x.x`: These are releases which pass ci/cd + manual review + 3 days of production testing.
|
||||
- `v1.x.x` OR `v1.x.x-stable`: These are releases which pass ci/cd + manual review + 3 days of production testing.
|
||||
|
||||
In production, we recommend using the latest `v1.x.x` release.
|
||||
|
||||
|
|
|
@ -17,7 +17,7 @@ Supported Providers:
|
|||
{
|
||||
"type": "thinking",
|
||||
"thinking": "The capital of France is Paris.",
|
||||
"signature_delta": "EqoBCkgIARABGAIiQL2UoU0b1OHYi+..."
|
||||
"signature": "EqoBCkgIARABGAIiQL2UoU0b1OHYi+..."
|
||||
}
|
||||
]
|
||||
}
|
||||
|
@ -292,7 +292,7 @@ curl http://0.0.0.0:4000/v1/chat/completions \
|
|||
{
|
||||
"type": "thinking",
|
||||
"thinking": "The user is asking for the current weather in three different locations: San Francisco, Tokyo, and Paris. I have access to the `get_current_weather` function that can provide this information.\n\nThe function requires a `location` parameter, and has an optional `unit` parameter. The user hasn't specified which unit they prefer (celsius or fahrenheit), so I'll use the default provided by the function.\n\nI need to make three separate function calls, one for each location:\n1. San Francisco\n2. Tokyo\n3. Paris\n\nThen I'll compile the results into a response with three distinct weather reports as requested by the user.",
|
||||
"signature_delta": "EqoBCkgIARABGAIiQCkBXENoyB+HstUOs/iGjG+bvDbIQRrxPsPpOSt5yDxX6iulZ/4K/w9Rt4J5Nb2+3XUYsyOH+CpZMfADYvItFR4SDPb7CmzoGKoolCMAJRoM62p1ZRASZhrD3swqIjAVY7vOAFWKZyPEJglfX/60+bJphN9W1wXR6rWrqn3MwUbQ5Mb/pnpeb10HMploRgUqEGKOd6fRKTkUoNDuAnPb55c="
|
||||
"signature": "EqoBCkgIARABGAIiQCkBXENoyB+HstUOs/iGjG+bvDbIQRrxPsPpOSt5yDxX6iulZ/4K/w9Rt4J5Nb2+3XUYsyOH+CpZMfADYvItFR4SDPb7CmzoGKoolCMAJRoM62p1ZRASZhrD3swqIjAVY7vOAFWKZyPEJglfX/60+bJphN9W1wXR6rWrqn3MwUbQ5Mb/pnpeb10HMploRgUqEGKOd6fRKTkUoNDuAnPb55c="
|
||||
}
|
||||
],
|
||||
"provider_specific_fields": {
|
||||
|
@ -353,5 +353,5 @@ These fields can be accessed via `response.choices[0].message.reasoning_content`
|
|||
- `thinking_blocks` - Optional[List[Dict[str, str]]]: A list of thinking blocks from the model. Only returned for Anthropic models.
|
||||
- `type` - str: The type of thinking block.
|
||||
- `thinking` - str: The thinking from the model.
|
||||
- `signature_delta` - str: The signature delta from the model.
|
||||
- `signature` - str: The signature delta from the model.
|
||||
|
||||
|
|
|
@ -952,8 +952,8 @@ router_settings:
|
|||
```
|
||||
|
||||
Defaults:
|
||||
- allowed_fails: 0
|
||||
- cooldown_time: 60s
|
||||
- allowed_fails: 3
|
||||
- cooldown_time: 5s (`DEFAULT_COOLDOWN_TIME_SECONDS` in constants.py)
|
||||
|
||||
**Set Per Model**
|
||||
|
||||
|
|
|
@ -96,6 +96,33 @@ litellm --config /path/to/config.yaml
|
|||
```
|
||||
|
||||
|
||||
### Using K/V pairs in 1 AWS Secret
|
||||
|
||||
You can read multiple keys from a single AWS Secret using the `primary_secret_name` parameter:
|
||||
|
||||
```yaml
|
||||
general_settings:
|
||||
key_management_system: "aws_secret_manager"
|
||||
key_management_settings:
|
||||
hosted_keys: [
|
||||
"OPENAI_API_KEY_MODEL_1",
|
||||
"OPENAI_API_KEY_MODEL_2",
|
||||
]
|
||||
primary_secret_name: "litellm_secrets" # 👈 Read multiple keys from one JSON secret
|
||||
```
|
||||
|
||||
The `primary_secret_name` allows you to read multiple keys from a single AWS Secret as a JSON object. For example, the "litellm_secrets" would contain:
|
||||
|
||||
```json
|
||||
{
|
||||
"OPENAI_API_KEY_MODEL_1": "sk-key1...",
|
||||
"OPENAI_API_KEY_MODEL_2": "sk-key2..."
|
||||
}
|
||||
```
|
||||
|
||||
This reduces the number of AWS Secrets you need to manage.
|
||||
|
||||
|
||||
## Hashicorp Vault
|
||||
|
||||
|
||||
|
@ -353,4 +380,7 @@ general_settings:
|
|||
|
||||
# Hosted Keys Settings
|
||||
hosted_keys: ["litellm_master_key"] # OPTIONAL. Specify which env keys you stored on AWS
|
||||
|
||||
# K/V pairs in 1 AWS Secret Settings
|
||||
primary_secret_name: "litellm_secrets" # OPTIONAL. Read multiple keys from one JSON secret on AWS Secret Manager
|
||||
```
|
|
@ -2,9 +2,9 @@ import Image from '@theme/IdealImage';
|
|||
import Tabs from '@theme/Tabs';
|
||||
import TabItem from '@theme/TabItem';
|
||||
|
||||
# Use LiteLLM AI Gateway with Aporia Guardrails
|
||||
# Aporia Guardrails with LiteLLM Gateway
|
||||
|
||||
In this tutorial we will use LiteLLM Proxy with Aporia to detect PII in requests and profanity in responses
|
||||
In this tutorial we will use LiteLLM AI Gateway with Aporia to detect PII in requests and profanity in responses
|
||||
|
||||
## 1. Setup guardrails on Aporia
|
||||
|
||||
|
|
103
docs/my-website/docs/tutorials/openweb_ui.md
Normal file
|
@ -0,0 +1,103 @@
|
|||
import Image from '@theme/IdealImage';
|
||||
import Tabs from '@theme/Tabs';
|
||||
import TabItem from '@theme/TabItem';
|
||||
|
||||
# OpenWeb UI with LiteLLM
|
||||
|
||||
This guide walks you through connecting OpenWeb UI to LiteLLM. Using LiteLLM with OpenWeb UI allows teams to
|
||||
- Access 100+ LLMs on OpenWeb UI
|
||||
- Track Spend / Usage, Set Budget Limits
|
||||
- Send Request/Response Logs to logging destinations like langfuse, s3, gcs buckets, etc.
|
||||
- Set access controls eg. Control what models OpenWebUI can access.
|
||||
|
||||
## Quickstart
|
||||
|
||||
- Make sure to setup LiteLLM with the [LiteLLM Getting Started Guide](https://docs.litellm.ai/docs/proxy/docker_quick_start)
|
||||
|
||||
|
||||
## 1. Start LiteLLM & OpenWebUI
|
||||
|
||||
- OpenWebUI starts running on [http://localhost:3000](http://localhost:3000)
|
||||
- LiteLLM starts running on [http://localhost:4000](http://localhost:4000)
|
||||
|
||||
|
||||
## 2. Create a Virtual Key on LiteLLM
|
||||
|
||||
Virtual Keys are API Keys that allow you to authenticate to LiteLLM Proxy. We will create a Virtual Key that will allow OpenWebUI to access LiteLLM.
|
||||
|
||||
### 2.1 LiteLLM User Management Hierarchy
|
||||
|
||||
On LiteLLM, you can create Organizations, Teams, Users and Virtual Keys. For this tutorial, we will create a Team and a Virtual Key.
|
||||
|
||||
- `Organization` - An Organization is a group of Teams. (US Engineering, EU Developer Tools)
|
||||
- `Team` - A Team is a group of Users. (OpenWeb UI Team, Data Science Team, etc.)
|
||||
- `User` - A User is an individual user (employee, developer, eg. `krrish@litellm.ai`)
|
||||
- `Virtual Key` - A Virtual Key is an API Key that allows you to authenticate to LiteLLM Proxy. A Virtual Key is associated with a User or Team.
|
||||
|
||||
Once the Team is created, you can invite Users to the Team. You can read more about LiteLLM's User Management [here](https://docs.litellm.ai/docs/proxy/user_management_heirarchy).
|
||||
|
||||
### 2.2 Create a Team on LiteLLM
|
||||
|
||||
Navigate to [http://localhost:4000/ui](http://localhost:4000/ui) and create a new team.
|
||||
|
||||
<Image img={require('../../img/litellm_create_team.gif')} />
|
||||
|
||||
### 2.2 Create a Virtual Key on LiteLLM
|
||||
|
||||
Navigate to [http://localhost:4000/ui](http://localhost:4000/ui) and create a new virtual Key.
|
||||
|
||||
LiteLLM allows you to specify what models are available on OpenWeb UI (by specifying the models the key will have access to).
|
||||
|
||||
<Image img={require('../../img/create_key_in_team_oweb.gif')} />
|
||||
|
||||
## 3. Connect OpenWeb UI to LiteLLM
|
||||
|
||||
On OpenWeb UI, navigate to Settings -> Connections and create a new connection to LiteLLM
|
||||
|
||||
Enter the following details:
|
||||
- URL: `http://localhost:4000` (your litellm proxy base url)
|
||||
- Key: `your-virtual-key` (the key you created in the previous step)
|
||||
|
||||
<Image img={require('../../img/litellm_setup_openweb.gif')} />
|
||||
|
||||
### 3.1 Test Request
|
||||
|
||||
On the top left corner, select models you should only see the models you gave the key access to in Step 2.
|
||||
|
||||
Once you selected a model, enter your message content and click on `Submit`
|
||||
|
||||
<Image img={require('../../img/basic_litellm.gif')} />
|
||||
|
||||
### 3.2 Tracking Spend / Usage
|
||||
|
||||
After your request is made, navigate to `Logs` on the LiteLLM UI, you can see Team, Key, Model, Usage and Cost.
|
||||
|
||||
<!-- <Image img={require('../../img/litellm_logs_openweb.gif')} /> -->
|
||||
|
||||
|
||||
|
||||
## Render `thinking` content on OpenWeb UI
|
||||
|
||||
OpenWebUI requires reasoning/thinking content to be rendered with `<think></think>` tags. In order to render this for specific models, you can use the `merge_reasoning_content_in_choices` litellm parameter.
|
||||
|
||||
Example litellm config.yaml:
|
||||
|
||||
```yaml
|
||||
model_list:
|
||||
- model_name: thinking-anthropic-claude-3-7-sonnet
|
||||
litellm_params:
|
||||
model: bedrock/us.anthropic.claude-3-7-sonnet-20250219-v1:0
|
||||
thinking: {"type": "enabled", "budget_tokens": 1024}
|
||||
max_tokens: 1080
|
||||
merge_reasoning_content_in_choices: true
|
||||
```
|
||||
|
||||
### Test it on OpenWeb UI
|
||||
|
||||
On the models dropdown select `thinking-anthropic-claude-3-7-sonnet`
|
||||
|
||||
<Image img={require('../../img/litellm_thinking_openweb.gif')} />
|
||||
|
||||
|
||||
|
||||
|
|
@ -44,7 +44,7 @@ const config = {
|
|||
path: './release_notes',
|
||||
routeBasePath: 'release_notes',
|
||||
blogTitle: 'Release Notes',
|
||||
blogSidebarTitle: 'All Releases',
|
||||
blogSidebarTitle: 'Releases',
|
||||
blogSidebarCount: 'ALL',
|
||||
postsPerPage: 'ALL',
|
||||
showReadingTime: false,
|
||||
|
|
BIN
docs/my-website/img/basic_litellm.gif
Normal file
After Width: | Height: | Size: 2.6 MiB |
BIN
docs/my-website/img/create_key_in_team_oweb.gif
Normal file
After Width: | Height: | Size: 13 MiB |
BIN
docs/my-website/img/litellm_create_team.gif
Normal file
After Width: | Height: | Size: 5.4 MiB |
BIN
docs/my-website/img/litellm_setup_openweb.gif
Normal file
After Width: | Height: | Size: 2.7 MiB |
BIN
docs/my-website/img/litellm_thinking_openweb.gif
Normal file
After Width: | Height: | Size: 5.1 MiB |
BIN
docs/my-website/img/release_notes/anthropic_thinking.jpg
Normal file
After Width: | Height: | Size: 470 KiB |
BIN
docs/my-website/img/release_notes/error_logs.jpg
Normal file
After Width: | Height: | Size: 918 KiB |
BIN
docs/my-website/img/release_notes/v1632_release.jpg
Normal file
After Width: | Height: | Size: 386 KiB |
|
@ -20,12 +20,6 @@ import Image from '@theme/IdealImage';
|
|||
# v1.61.20-stable
|
||||
|
||||
|
||||
:::info
|
||||
|
||||
`v1.61.20-stable` will be live on 2025-02-04.
|
||||
|
||||
:::
|
||||
|
||||
These are the changes since `v1.61.13-stable`.
|
||||
|
||||
This release is primarily focused on:
|
||||
|
|
40
docs/my-website/release_notes/v1.63.0/index.md
Normal file
|
@ -0,0 +1,40 @@
|
|||
---
|
||||
title: v1.63.0 - Anthropic 'thinking' response update
|
||||
slug: v1.63.0
|
||||
date: 2025-03-05T10:00:00
|
||||
authors:
|
||||
- name: Krrish Dholakia
|
||||
title: CEO, LiteLLM
|
||||
url: https://www.linkedin.com/in/krish-d/
|
||||
image_url: https://media.licdn.com/dms/image/v2/D4D03AQGrlsJ3aqpHmQ/profile-displayphoto-shrink_400_400/B4DZSAzgP7HYAg-/0/1737327772964?e=1743638400&v=beta&t=39KOXMUFedvukiWWVPHf3qI45fuQD7lNglICwN31DrI
|
||||
- name: Ishaan Jaffer
|
||||
title: CTO, LiteLLM
|
||||
url: https://www.linkedin.com/in/reffajnaahsi/
|
||||
image_url: https://media.licdn.com/dms/image/v2/D4D03AQGiM7ZrUwqu_Q/profile-displayphoto-shrink_800_800/profile-displayphoto-shrink_800_800/0/1675971026692?e=1741824000&v=beta&t=eQnRdXPJo4eiINWTZARoYTfqh064pgZ-E21pQTSy8jc
|
||||
tags: [llm translation, thinking, reasoning_content, claude-3-7-sonnet]
|
||||
hide_table_of_contents: false
|
||||
---
|
||||
|
||||
v1.63.0 fixes Anthropic 'thinking' response on streaming to return the `signature` block. [Github Issue](https://github.com/BerriAI/litellm/issues/8964)
|
||||
|
||||
|
||||
|
||||
It also moves the response structure from `signature_delta` to `signature` to be the same as Anthropic. [Anthropic Docs](https://docs.anthropic.com/en/docs/build-with-claude/extended-thinking#implementing-extended-thinking)
|
||||
|
||||
|
||||
## Diff
|
||||
|
||||
```bash
|
||||
"message": {
|
||||
...
|
||||
"reasoning_content": "The capital of France is Paris.",
|
||||
"thinking_blocks": [
|
||||
{
|
||||
"type": "thinking",
|
||||
"thinking": "The capital of France is Paris.",
|
||||
- "signature_delta": "EqoBCkgIARABGAIiQL2UoU0b1OHYi+..." # 👈 OLD FORMAT
|
||||
+ "signature": "EqoBCkgIARABGAIiQL2UoU0b1OHYi+..." # 👈 KEY CHANGE
|
||||
}
|
||||
]
|
||||
}
|
||||
```
|
112
docs/my-website/release_notes/v1.63.2-stable/index.md
Normal file
|
@ -0,0 +1,112 @@
|
|||
---
|
||||
title: v1.63.2-stable
|
||||
slug: v1.63.2-stable
|
||||
date: 2025-03-08T10:00:00
|
||||
authors:
|
||||
- name: Krrish Dholakia
|
||||
title: CEO, LiteLLM
|
||||
url: https://www.linkedin.com/in/krish-d/
|
||||
image_url: https://media.licdn.com/dms/image/v2/D4D03AQGrlsJ3aqpHmQ/profile-displayphoto-shrink_400_400/B4DZSAzgP7HYAg-/0/1737327772964?e=1743638400&v=beta&t=39KOXMUFedvukiWWVPHf3qI45fuQD7lNglICwN31DrI
|
||||
- name: Ishaan Jaffer
|
||||
title: CTO, LiteLLM
|
||||
url: https://www.linkedin.com/in/reffajnaahsi/
|
||||
image_url: https://media.licdn.com/dms/image/v2/D4D03AQGiM7ZrUwqu_Q/profile-displayphoto-shrink_800_800/profile-displayphoto-shrink_800_800/0/1675971026692?e=1741824000&v=beta&t=eQnRdXPJo4eiINWTZARoYTfqh064pgZ-E21pQTSy8jc
|
||||
tags: [llm translation, thinking, reasoning_content, claude-3-7-sonnet]
|
||||
hide_table_of_contents: false
|
||||
---
|
||||
|
||||
import Image from '@theme/IdealImage';
|
||||
|
||||
|
||||
These are the changes since `v1.61.20-stable`.
|
||||
|
||||
This release is primarily focused on:
|
||||
- LLM Translation improvements (more `thinking` content improvements)
|
||||
- UI improvements (Error logs now shown on UI)
|
||||
|
||||
|
||||
:::info
|
||||
|
||||
This release will be live on 03/09/2025
|
||||
|
||||
:::
|
||||
|
||||
<Image img={require('../../img/release_notes/v1632_release.jpg')} />
|
||||
|
||||
|
||||
## Demo Instance
|
||||
|
||||
Here's a Demo Instance to test changes:
|
||||
- Instance: https://demo.litellm.ai/
|
||||
- Login Credentials:
|
||||
- Username: admin
|
||||
- Password: sk-1234
|
||||
|
||||
|
||||
## New Models / Updated Models
|
||||
|
||||
1. Add `supports_pdf_input` for specific Bedrock Claude models [PR](https://github.com/BerriAI/litellm/commit/f63cf0030679fe1a43d03fb196e815a0f28dae92)
|
||||
2. Add pricing for amazon `eu` models [PR](https://github.com/BerriAI/litellm/commits/main/model_prices_and_context_window.json)
|
||||
3. Fix Azure O1 mini pricing [PR](https://github.com/BerriAI/litellm/commit/52de1949ef2f76b8572df751f9c868a016d4832c)
|
||||
|
||||
## LLM Translation
|
||||
|
||||
<Image img={require('../../img/release_notes/anthropic_thinking.jpg')}/>
|
||||
|
||||
1. Support `/openai/` passthrough for Assistant endpoints. [Get Started](https://docs.litellm.ai/docs/pass_through/openai_passthrough)
|
||||
2. Bedrock Claude - fix tool calling transformation on invoke route. [Get Started](../../docs/providers/bedrock#usage---function-calling--tool-calling)
|
||||
3. Bedrock Claude - response_format support for claude on invoke route. [Get Started](../../docs/providers/bedrock#usage---structured-output--json-mode)
|
||||
4. Bedrock - pass `description` if set in response_format. [Get Started](../../docs/providers/bedrock#usage---structured-output--json-mode)
|
||||
5. Bedrock - Fix passing response_format: {"type": "text"}. [PR](https://github.com/BerriAI/litellm/commit/c84b489d5897755139aa7d4e9e54727ebe0fa540)
|
||||
6. OpenAI - Handle sending image_url as str to openai. [Get Started](https://docs.litellm.ai/docs/completion/vision)
|
||||
7. Deepseek - return 'reasoning_content' missing on streaming. [Get Started](https://docs.litellm.ai/docs/reasoning_content)
|
||||
8. Caching - Support caching on reasoning content. [Get Started](https://docs.litellm.ai/docs/proxy/caching)
|
||||
9. Bedrock - handle thinking blocks in assistant message. [Get Started](https://docs.litellm.ai/docs/providers/bedrock#usage---thinking--reasoning-content)
|
||||
10. Anthropic - Return `signature` on streaming. [Get Started](https://docs.litellm.ai/docs/providers/bedrock#usage---thinking--reasoning-content)
|
||||
- Note: We've also migrated from `signature_delta` to `signature`. [Read more](https://docs.litellm.ai/release_notes/v1.63.0)
|
||||
11. Support format param for specifying image type. [Get Started](../../docs/completion/vision.md#explicitly-specify-image-type)
|
||||
12. Anthropic - `/v1/messages` endpoint - `thinking` param support. [Get Started](../../docs/anthropic_unified.md)
|
||||
- Note: this refactors the [BETA] unified `/v1/messages` endpoint, to just work for the Anthropic API.
|
||||
13. Vertex AI - handle $id in response schema when calling vertex ai. [Get Started](https://docs.litellm.ai/docs/providers/vertex#json-schema)
|
||||
|
||||
## Spend Tracking Improvements
|
||||
|
||||
1. Batches API - Fix cost calculation to run on retrieve_batch. [Get Started](https://docs.litellm.ai/docs/batches)
|
||||
2. Batches API - Log batch models in spend logs / standard logging payload. [Get Started](../../docs/proxy/logging_spec.md#standardlogginghiddenparams)
|
||||
|
||||
## Management Endpoints / UI
|
||||
|
||||
<Image img={require('../../img/release_notes/error_logs.jpg')} />
|
||||
|
||||
1. Virtual Keys Page
|
||||
- Allow team/org filters to be searchable on the Create Key Page
|
||||
- Add created_by and updated_by fields to Keys table
|
||||
- Show 'user_email' on key table
|
||||
- Show 100 Keys Per Page, Use full height, increase width of key alias
|
||||
2. Logs Page
|
||||
- Show Error Logs on LiteLLM UI
|
||||
- Allow Internal Users to View their own logs
|
||||
3. Internal Users Page
|
||||
- Allow admin to control default model access for internal users
|
||||
7. Fix session handling with cookies
|
||||
|
||||
## Logging / Guardrail Integrations
|
||||
|
||||
1. Fix prometheus metrics w/ custom metrics, when keys containing team_id make requests. [PR](https://github.com/BerriAI/litellm/pull/8935)
|
||||
|
||||
## Performance / Loadbalancing / Reliability improvements
|
||||
|
||||
1. Cooldowns - Support cooldowns on models called with client side credentials. [Get Started](https://docs.litellm.ai/docs/proxy/clientside_auth#pass-user-llm-api-keys--api-base)
|
||||
2. Tag-based Routing - ensures tag-based routing across all endpoints (`/embeddings`, `/image_generation`, etc.). [Get Started](https://docs.litellm.ai/docs/proxy/tag_routing)
|
||||
|
||||
## General Proxy Improvements
|
||||
|
||||
1. Raise BadRequestError when unknown model passed in request
|
||||
2. Enforce model access restrictions on Azure OpenAI proxy route
|
||||
3. Reliability fix - Handle emoji’s in text - fix orjson error
|
||||
4. Model Access Patch - don't overwrite litellm.anthropic_models when running auth checks
|
||||
5. Enable setting timezone information in docker image
|
||||
|
||||
## Complete Git Diff
|
||||
|
||||
[Here's the complete git diff](https://github.com/BerriAI/litellm/compare/v1.61.20-stable...v1.63.2-stable)
|
|
@ -46,6 +46,7 @@ const sidebars = {
|
|||
"proxy/health",
|
||||
"proxy/debugging",
|
||||
"proxy/spending_monitoring",
|
||||
"proxy/master_key_rotations",
|
||||
],
|
||||
},
|
||||
"proxy/demo",
|
||||
|
@ -256,13 +257,19 @@ const sidebars = {
|
|||
"completion/batching",
|
||||
"completion/mock_requests",
|
||||
"completion/reliable_completions",
|
||||
'tutorials/litellm_proxy_aporia',
|
||||
|
||||
]
|
||||
},
|
||||
{
|
||||
type: "category",
|
||||
label: "Supported Endpoints",
|
||||
link: {
|
||||
type: "generated-index",
|
||||
title: "Supported Endpoints",
|
||||
description:
|
||||
"Learn how to deploy + call models from different providers on LiteLLM",
|
||||
slug: "/supported_endpoints",
|
||||
},
|
||||
items: [
|
||||
{
|
||||
type: "category",
|
||||
|
@ -281,6 +288,7 @@ const sidebars = {
|
|||
},
|
||||
"text_completion",
|
||||
"embedding/supported_embedding",
|
||||
"anthropic_unified",
|
||||
{
|
||||
type: "category",
|
||||
label: "Image",
|
||||
|
@ -350,23 +358,6 @@ const sidebars = {
|
|||
label: "LangChain, LlamaIndex, Instructor Integration",
|
||||
items: ["langchain/langchain", "tutorials/instructor"],
|
||||
},
|
||||
{
|
||||
type: "category",
|
||||
label: "Tutorials",
|
||||
items: [
|
||||
|
||||
'tutorials/azure_openai',
|
||||
'tutorials/instructor',
|
||||
"tutorials/gradio_integration",
|
||||
"tutorials/huggingface_codellama",
|
||||
"tutorials/huggingface_tutorial",
|
||||
"tutorials/TogetherAI_liteLLM",
|
||||
"tutorials/finetuned_chat_gpt",
|
||||
"tutorials/text_completion",
|
||||
"tutorials/first_playground",
|
||||
"tutorials/model_fallbacks",
|
||||
],
|
||||
},
|
||||
],
|
||||
},
|
||||
{
|
||||
|
@ -383,13 +374,6 @@ const sidebars = {
|
|||
"load_test_rpm",
|
||||
]
|
||||
},
|
||||
{
|
||||
type: "category",
|
||||
label: "Adding Providers",
|
||||
items: [
|
||||
"adding_provider/directory_structure",
|
||||
"adding_provider/new_rerank_provider"],
|
||||
},
|
||||
{
|
||||
type: "category",
|
||||
label: "Logging & Observability",
|
||||
|
@ -424,12 +408,50 @@ const sidebars = {
|
|||
"observability/opik_integration",
|
||||
],
|
||||
},
|
||||
{
|
||||
type: "category",
|
||||
label: "Tutorials",
|
||||
items: [
|
||||
"tutorials/openweb_ui",
|
||||
'tutorials/litellm_proxy_aporia',
|
||||
{
|
||||
type: "category",
|
||||
label: "LiteLLM Python SDK Tutorials",
|
||||
items: [
|
||||
|
||||
'tutorials/azure_openai',
|
||||
'tutorials/instructor',
|
||||
"tutorials/gradio_integration",
|
||||
"tutorials/huggingface_codellama",
|
||||
"tutorials/huggingface_tutorial",
|
||||
"tutorials/TogetherAI_liteLLM",
|
||||
"tutorials/finetuned_chat_gpt",
|
||||
"tutorials/text_completion",
|
||||
"tutorials/first_playground",
|
||||
"tutorials/model_fallbacks",
|
||||
],
|
||||
},
|
||||
]
|
||||
},
|
||||
{
|
||||
type: "category",
|
||||
label: "Contributing",
|
||||
items: [
|
||||
{
|
||||
type: "category",
|
||||
label: "Adding Providers",
|
||||
items: [
|
||||
"adding_provider/directory_structure",
|
||||
"adding_provider/new_rerank_provider"],
|
||||
},
|
||||
"extras/contributing",
|
||||
"contributing",
|
||||
]
|
||||
},
|
||||
{
|
||||
type: "category",
|
||||
label: "Extras",
|
||||
items: [
|
||||
"extras/contributing",
|
||||
"data_security",
|
||||
"data_retention",
|
||||
"migration_policy",
|
||||
|
@ -446,6 +468,7 @@ const sidebars = {
|
|||
items: [
|
||||
"projects/smolagents",
|
||||
"projects/Docq.AI",
|
||||
"projects/PDL",
|
||||
"projects/OpenInterpreter",
|
||||
"projects/Elroy",
|
||||
"projects/dbally",
|
||||
|
@ -461,9 +484,9 @@ const sidebars = {
|
|||
"projects/YiVal",
|
||||
"projects/LiteLLM Proxy",
|
||||
"projects/llm_cord",
|
||||
"projects/pgai",
|
||||
],
|
||||
},
|
||||
"contributing",
|
||||
"proxy/pii_masking",
|
||||
"extras/code_quality",
|
||||
"rules",
|
||||
|
|
|
@ -277,8 +277,6 @@ disable_end_user_cost_tracking_prometheus_only: Optional[bool] = None
|
|||
custom_prometheus_metadata_labels: List[str] = []
|
||||
#### REQUEST PRIORITIZATION ####
|
||||
priority_reservation: Optional[Dict[str, float]] = None
|
||||
|
||||
|
||||
force_ipv4: bool = (
|
||||
False # when True, litellm will force ipv4 for all LLM requests. Some users have seen httpx ConnectionError when using ipv6.
|
||||
)
|
||||
|
@ -800,9 +798,6 @@ from .llms.oobabooga.chat.transformation import OobaboogaConfig
|
|||
from .llms.maritalk import MaritalkConfig
|
||||
from .llms.openrouter.chat.transformation import OpenrouterConfig
|
||||
from .llms.anthropic.chat.transformation import AnthropicConfig
|
||||
from .llms.anthropic.experimental_pass_through.transformation import (
|
||||
AnthropicExperimentalPassThroughConfig,
|
||||
)
|
||||
from .llms.groq.stt.transformation import GroqSTTConfig
|
||||
from .llms.anthropic.completion.transformation import AnthropicTextConfig
|
||||
from .llms.triton.completion.transformation import TritonConfig
|
||||
|
@ -821,6 +816,9 @@ from .llms.infinity.rerank.transformation import InfinityRerankConfig
|
|||
from .llms.jina_ai.rerank.transformation import JinaAIRerankConfig
|
||||
from .llms.clarifai.chat.transformation import ClarifaiConfig
|
||||
from .llms.ai21.chat.transformation import AI21ChatConfig, AI21ChatConfig as AI21Config
|
||||
from .llms.anthropic.experimental_pass_through.messages.transformation import (
|
||||
AnthropicMessagesConfig,
|
||||
)
|
||||
from .llms.together_ai.chat import TogetherAIConfig
|
||||
from .llms.together_ai.completion.transformation import TogetherAITextCompletionConfig
|
||||
from .llms.cloudflare.chat.transformation import CloudflareChatConfig
|
||||
|
@ -1011,6 +1009,7 @@ from .assistants.main import *
|
|||
from .batches.main import *
|
||||
from .batch_completion.main import * # type: ignore
|
||||
from .rerank_api.main import *
|
||||
from .llms.anthropic.experimental_pass_through.messages.handler import *
|
||||
from .realtime_api.main import _arealtime
|
||||
from .fine_tuning.main import *
|
||||
from .files.main import *
|
||||
|
|
|
@ -1,186 +0,0 @@
|
|||
# What is this?
|
||||
## Translates OpenAI call to Anthropic `/v1/messages` format
|
||||
import traceback
|
||||
from typing import Any, Optional
|
||||
|
||||
import litellm
|
||||
from litellm import ChatCompletionRequest, verbose_logger
|
||||
from litellm.integrations.custom_logger import CustomLogger
|
||||
from litellm.types.llms.anthropic import AnthropicMessagesRequest, AnthropicResponse
|
||||
from litellm.types.utils import AdapterCompletionStreamWrapper, ModelResponse
|
||||
|
||||
|
||||
class AnthropicAdapter(CustomLogger):
|
||||
def __init__(self) -> None:
|
||||
super().__init__()
|
||||
|
||||
def translate_completion_input_params(
|
||||
self, kwargs
|
||||
) -> Optional[ChatCompletionRequest]:
|
||||
"""
|
||||
- translate params, where needed
|
||||
- pass rest, as is
|
||||
"""
|
||||
request_body = AnthropicMessagesRequest(**kwargs) # type: ignore
|
||||
|
||||
translated_body = litellm.AnthropicExperimentalPassThroughConfig().translate_anthropic_to_openai(
|
||||
anthropic_message_request=request_body
|
||||
)
|
||||
|
||||
return translated_body
|
||||
|
||||
def translate_completion_output_params(
|
||||
self, response: ModelResponse
|
||||
) -> Optional[AnthropicResponse]:
|
||||
|
||||
return litellm.AnthropicExperimentalPassThroughConfig().translate_openai_response_to_anthropic(
|
||||
response=response
|
||||
)
|
||||
|
||||
def translate_completion_output_params_streaming(
|
||||
self, completion_stream: Any
|
||||
) -> AdapterCompletionStreamWrapper | None:
|
||||
return AnthropicStreamWrapper(completion_stream=completion_stream)
|
||||
|
||||
|
||||
anthropic_adapter = AnthropicAdapter()
|
||||
|
||||
|
||||
class AnthropicStreamWrapper(AdapterCompletionStreamWrapper):
|
||||
"""
|
||||
- first chunk return 'message_start'
|
||||
- content block must be started and stopped
|
||||
- finish_reason must map exactly to anthropic reason, else anthropic client won't be able to parse it.
|
||||
"""
|
||||
|
||||
sent_first_chunk: bool = False
|
||||
sent_content_block_start: bool = False
|
||||
sent_content_block_finish: bool = False
|
||||
sent_last_message: bool = False
|
||||
holding_chunk: Optional[Any] = None
|
||||
|
||||
def __next__(self):
|
||||
try:
|
||||
if self.sent_first_chunk is False:
|
||||
self.sent_first_chunk = True
|
||||
return {
|
||||
"type": "message_start",
|
||||
"message": {
|
||||
"id": "msg_1nZdL29xx5MUA1yADyHTEsnR8uuvGzszyY",
|
||||
"type": "message",
|
||||
"role": "assistant",
|
||||
"content": [],
|
||||
"model": "claude-3-5-sonnet-20240620",
|
||||
"stop_reason": None,
|
||||
"stop_sequence": None,
|
||||
"usage": {"input_tokens": 25, "output_tokens": 1},
|
||||
},
|
||||
}
|
||||
if self.sent_content_block_start is False:
|
||||
self.sent_content_block_start = True
|
||||
return {
|
||||
"type": "content_block_start",
|
||||
"index": 0,
|
||||
"content_block": {"type": "text", "text": ""},
|
||||
}
|
||||
|
||||
for chunk in self.completion_stream:
|
||||
if chunk == "None" or chunk is None:
|
||||
raise Exception
|
||||
|
||||
processed_chunk = litellm.AnthropicExperimentalPassThroughConfig().translate_streaming_openai_response_to_anthropic(
|
||||
response=chunk
|
||||
)
|
||||
if (
|
||||
processed_chunk["type"] == "message_delta"
|
||||
and self.sent_content_block_finish is False
|
||||
):
|
||||
self.holding_chunk = processed_chunk
|
||||
self.sent_content_block_finish = True
|
||||
return {
|
||||
"type": "content_block_stop",
|
||||
"index": 0,
|
||||
}
|
||||
elif self.holding_chunk is not None:
|
||||
return_chunk = self.holding_chunk
|
||||
self.holding_chunk = processed_chunk
|
||||
return return_chunk
|
||||
else:
|
||||
return processed_chunk
|
||||
if self.holding_chunk is not None:
|
||||
return_chunk = self.holding_chunk
|
||||
self.holding_chunk = None
|
||||
return return_chunk
|
||||
if self.sent_last_message is False:
|
||||
self.sent_last_message = True
|
||||
return {"type": "message_stop"}
|
||||
raise StopIteration
|
||||
except StopIteration:
|
||||
if self.sent_last_message is False:
|
||||
self.sent_last_message = True
|
||||
return {"type": "message_stop"}
|
||||
raise StopIteration
|
||||
except Exception as e:
|
||||
verbose_logger.error(
|
||||
"Anthropic Adapter - {}\n{}".format(e, traceback.format_exc())
|
||||
)
|
||||
|
||||
async def __anext__(self):
|
||||
try:
|
||||
if self.sent_first_chunk is False:
|
||||
self.sent_first_chunk = True
|
||||
return {
|
||||
"type": "message_start",
|
||||
"message": {
|
||||
"id": "msg_1nZdL29xx5MUA1yADyHTEsnR8uuvGzszyY",
|
||||
"type": "message",
|
||||
"role": "assistant",
|
||||
"content": [],
|
||||
"model": "claude-3-5-sonnet-20240620",
|
||||
"stop_reason": None,
|
||||
"stop_sequence": None,
|
||||
"usage": {"input_tokens": 25, "output_tokens": 1},
|
||||
},
|
||||
}
|
||||
if self.sent_content_block_start is False:
|
||||
self.sent_content_block_start = True
|
||||
return {
|
||||
"type": "content_block_start",
|
||||
"index": 0,
|
||||
"content_block": {"type": "text", "text": ""},
|
||||
}
|
||||
async for chunk in self.completion_stream:
|
||||
if chunk == "None" or chunk is None:
|
||||
raise Exception
|
||||
processed_chunk = litellm.AnthropicExperimentalPassThroughConfig().translate_streaming_openai_response_to_anthropic(
|
||||
response=chunk
|
||||
)
|
||||
if (
|
||||
processed_chunk["type"] == "message_delta"
|
||||
and self.sent_content_block_finish is False
|
||||
):
|
||||
self.holding_chunk = processed_chunk
|
||||
self.sent_content_block_finish = True
|
||||
return {
|
||||
"type": "content_block_stop",
|
||||
"index": 0,
|
||||
}
|
||||
elif self.holding_chunk is not None:
|
||||
return_chunk = self.holding_chunk
|
||||
self.holding_chunk = processed_chunk
|
||||
return return_chunk
|
||||
else:
|
||||
return processed_chunk
|
||||
if self.holding_chunk is not None:
|
||||
return_chunk = self.holding_chunk
|
||||
self.holding_chunk = None
|
||||
return return_chunk
|
||||
if self.sent_last_message is False:
|
||||
self.sent_last_message = True
|
||||
return {"type": "message_stop"}
|
||||
raise StopIteration
|
||||
except StopIteration:
|
||||
if self.sent_last_message is False:
|
||||
self.sent_last_message = True
|
||||
return {"type": "message_stop"}
|
||||
raise StopAsyncIteration
|
|
@ -1,76 +1,16 @@
|
|||
import asyncio
|
||||
import datetime
|
||||
import json
|
||||
import threading
|
||||
from typing import Any, List, Literal, Optional
|
||||
from typing import Any, List, Literal, Tuple
|
||||
|
||||
import litellm
|
||||
from litellm._logging import verbose_logger
|
||||
from litellm.constants import (
|
||||
BATCH_STATUS_POLL_INTERVAL_SECONDS,
|
||||
BATCH_STATUS_POLL_MAX_ATTEMPTS,
|
||||
)
|
||||
from litellm.files.main import afile_content
|
||||
from litellm.litellm_core_utils.litellm_logging import Logging as LiteLLMLoggingObj
|
||||
from litellm.types.llms.openai import Batch
|
||||
from litellm.types.utils import StandardLoggingPayload, Usage
|
||||
|
||||
|
||||
async def batches_async_logging(
|
||||
batch_id: str,
|
||||
custom_llm_provider: Literal["openai", "azure", "vertex_ai"] = "openai",
|
||||
logging_obj: Optional[LiteLLMLoggingObj] = None,
|
||||
**kwargs,
|
||||
):
|
||||
"""
|
||||
Async Job waits for the batch to complete and then logs the completed batch usage - cost, total tokens, prompt tokens, completion tokens
|
||||
|
||||
|
||||
Polls retrieve_batch until it returns a batch with status "completed" or "failed"
|
||||
"""
|
||||
from .main import aretrieve_batch
|
||||
|
||||
verbose_logger.debug(
|
||||
".....in _batches_async_logging... polling retrieve to get batch status"
|
||||
)
|
||||
if logging_obj is None:
|
||||
raise ValueError(
|
||||
"logging_obj is None cannot calculate cost / log batch creation event"
|
||||
)
|
||||
for _ in range(BATCH_STATUS_POLL_MAX_ATTEMPTS):
|
||||
try:
|
||||
start_time = datetime.datetime.now()
|
||||
batch: Batch = await aretrieve_batch(batch_id, custom_llm_provider)
|
||||
verbose_logger.debug(
|
||||
"in _batches_async_logging... batch status= %s", batch.status
|
||||
)
|
||||
|
||||
if batch.status == "completed":
|
||||
end_time = datetime.datetime.now()
|
||||
await _handle_completed_batch(
|
||||
batch=batch,
|
||||
custom_llm_provider=custom_llm_provider,
|
||||
logging_obj=logging_obj,
|
||||
start_time=start_time,
|
||||
end_time=end_time,
|
||||
**kwargs,
|
||||
)
|
||||
break
|
||||
elif batch.status == "failed":
|
||||
pass
|
||||
except Exception as e:
|
||||
verbose_logger.error("error in batches_async_logging", e)
|
||||
await asyncio.sleep(BATCH_STATUS_POLL_INTERVAL_SECONDS)
|
||||
from litellm.types.utils import CallTypes, Usage
|
||||
|
||||
|
||||
async def _handle_completed_batch(
|
||||
batch: Batch,
|
||||
custom_llm_provider: Literal["openai", "azure", "vertex_ai"],
|
||||
logging_obj: LiteLLMLoggingObj,
|
||||
start_time: datetime.datetime,
|
||||
end_time: datetime.datetime,
|
||||
**kwargs,
|
||||
) -> None:
|
||||
) -> Tuple[float, Usage, List[str]]:
|
||||
"""Helper function to process a completed batch and handle logging"""
|
||||
# Get batch results
|
||||
file_content_dictionary = await _get_batch_output_file_content_as_dictionary(
|
||||
|
@ -87,52 +27,25 @@ async def _handle_completed_batch(
|
|||
custom_llm_provider=custom_llm_provider,
|
||||
)
|
||||
|
||||
# Handle logging
|
||||
await _log_completed_batch(
|
||||
logging_obj=logging_obj,
|
||||
batch_usage=batch_usage,
|
||||
batch_cost=batch_cost,
|
||||
start_time=start_time,
|
||||
end_time=end_time,
|
||||
**kwargs,
|
||||
)
|
||||
batch_models = _get_batch_models_from_file_content(file_content_dictionary)
|
||||
|
||||
return batch_cost, batch_usage, batch_models
|
||||
|
||||
|
||||
async def _log_completed_batch(
|
||||
logging_obj: LiteLLMLoggingObj,
|
||||
batch_usage: Usage,
|
||||
batch_cost: float,
|
||||
start_time: datetime.datetime,
|
||||
end_time: datetime.datetime,
|
||||
**kwargs,
|
||||
) -> None:
|
||||
"""Helper function to handle all logging operations for a completed batch"""
|
||||
logging_obj.call_type = "batch_success"
|
||||
|
||||
standard_logging_object = _create_standard_logging_object_for_completed_batch(
|
||||
kwargs=kwargs,
|
||||
start_time=start_time,
|
||||
end_time=end_time,
|
||||
logging_obj=logging_obj,
|
||||
batch_usage_object=batch_usage,
|
||||
response_cost=batch_cost,
|
||||
)
|
||||
|
||||
logging_obj.model_call_details["standard_logging_object"] = standard_logging_object
|
||||
|
||||
# Launch async and sync logging handlers
|
||||
asyncio.create_task(
|
||||
logging_obj.async_success_handler(
|
||||
result=None,
|
||||
start_time=start_time,
|
||||
end_time=end_time,
|
||||
cache_hit=None,
|
||||
)
|
||||
)
|
||||
threading.Thread(
|
||||
target=logging_obj.success_handler,
|
||||
args=(None, start_time, end_time),
|
||||
).start()
|
||||
def _get_batch_models_from_file_content(
|
||||
file_content_dictionary: List[dict],
|
||||
) -> List[str]:
|
||||
"""
|
||||
Get the models from the file content
|
||||
"""
|
||||
batch_models = []
|
||||
for _item in file_content_dictionary:
|
||||
if _batch_response_was_successful(_item):
|
||||
_response_body = _get_response_from_batch_job_output_file(_item)
|
||||
_model = _response_body.get("model")
|
||||
if _model:
|
||||
batch_models.append(_model)
|
||||
return batch_models
|
||||
|
||||
|
||||
async def _batch_cost_calculator(
|
||||
|
@ -159,6 +72,8 @@ async def _get_batch_output_file_content_as_dictionary(
|
|||
"""
|
||||
Get the batch output file content as a list of dictionaries
|
||||
"""
|
||||
from litellm.files.main import afile_content
|
||||
|
||||
if custom_llm_provider == "vertex_ai":
|
||||
raise ValueError("Vertex AI does not support file content retrieval")
|
||||
|
||||
|
@ -208,6 +123,7 @@ def _get_batch_job_cost_from_file_content(
|
|||
total_cost += litellm.completion_cost(
|
||||
completion_response=_response_body,
|
||||
custom_llm_provider=custom_llm_provider,
|
||||
call_type=CallTypes.aretrieve_batch.value,
|
||||
)
|
||||
verbose_logger.debug("total_cost=%s", total_cost)
|
||||
return total_cost
|
||||
|
@ -264,30 +180,3 @@ def _batch_response_was_successful(batch_job_output_file: dict) -> bool:
|
|||
"""
|
||||
_response: dict = batch_job_output_file.get("response", None) or {}
|
||||
return _response.get("status_code", None) == 200
|
||||
|
||||
|
||||
def _create_standard_logging_object_for_completed_batch(
|
||||
kwargs: dict,
|
||||
start_time: datetime.datetime,
|
||||
end_time: datetime.datetime,
|
||||
logging_obj: LiteLLMLoggingObj,
|
||||
batch_usage_object: Usage,
|
||||
response_cost: float,
|
||||
) -> StandardLoggingPayload:
|
||||
"""
|
||||
Create a standard logging object for a completed batch
|
||||
"""
|
||||
standard_logging_object = logging_obj.model_call_details.get(
|
||||
"standard_logging_object", None
|
||||
)
|
||||
|
||||
if standard_logging_object is None:
|
||||
raise ValueError("unable to create standard logging object for completed batch")
|
||||
|
||||
# Add Completed Batch Job Usage and Response Cost
|
||||
standard_logging_object["call_type"] = "batch_success"
|
||||
standard_logging_object["response_cost"] = response_cost
|
||||
standard_logging_object["total_tokens"] = batch_usage_object.total_tokens
|
||||
standard_logging_object["prompt_tokens"] = batch_usage_object.prompt_tokens
|
||||
standard_logging_object["completion_tokens"] = batch_usage_object.completion_tokens
|
||||
return standard_logging_object
|
||||
|
|
|
@ -31,10 +31,9 @@ from litellm.types.llms.openai import (
|
|||
RetrieveBatchRequest,
|
||||
)
|
||||
from litellm.types.router import GenericLiteLLMParams
|
||||
from litellm.types.utils import LiteLLMBatch
|
||||
from litellm.utils import client, get_litellm_params, supports_httpx_timeout
|
||||
|
||||
from .batch_utils import batches_async_logging
|
||||
|
||||
####### ENVIRONMENT VARIABLES ###################
|
||||
openai_batches_instance = OpenAIBatchesAPI()
|
||||
azure_batches_instance = AzureBatchesAPI()
|
||||
|
@ -85,17 +84,6 @@ async def acreate_batch(
|
|||
else:
|
||||
response = init_response
|
||||
|
||||
# Start async logging job
|
||||
if response is not None:
|
||||
asyncio.create_task(
|
||||
batches_async_logging(
|
||||
logging_obj=kwargs.get("litellm_logging_obj", None),
|
||||
batch_id=response.id,
|
||||
custom_llm_provider=custom_llm_provider,
|
||||
**kwargs,
|
||||
)
|
||||
)
|
||||
|
||||
return response
|
||||
except Exception as e:
|
||||
raise e
|
||||
|
@ -111,7 +99,7 @@ def create_batch(
|
|||
extra_headers: Optional[Dict[str, str]] = None,
|
||||
extra_body: Optional[Dict[str, str]] = None,
|
||||
**kwargs,
|
||||
) -> Union[Batch, Coroutine[Any, Any, Batch]]:
|
||||
) -> Union[LiteLLMBatch, Coroutine[Any, Any, LiteLLMBatch]]:
|
||||
"""
|
||||
Creates and executes a batch from an uploaded file of request
|
||||
|
||||
|
@ -119,21 +107,26 @@ def create_batch(
|
|||
"""
|
||||
try:
|
||||
optional_params = GenericLiteLLMParams(**kwargs)
|
||||
litellm_call_id = kwargs.get("litellm_call_id", None)
|
||||
proxy_server_request = kwargs.get("proxy_server_request", None)
|
||||
model_info = kwargs.get("model_info", None)
|
||||
_is_async = kwargs.pop("acreate_batch", False) is True
|
||||
litellm_logging_obj: LiteLLMLoggingObj = kwargs.get("litellm_logging_obj", None)
|
||||
### TIMEOUT LOGIC ###
|
||||
timeout = optional_params.timeout or kwargs.get("request_timeout", 600) or 600
|
||||
litellm_params = get_litellm_params(
|
||||
custom_llm_provider=custom_llm_provider,
|
||||
litellm_call_id=kwargs.get("litellm_call_id", None),
|
||||
litellm_trace_id=kwargs.get("litellm_trace_id"),
|
||||
litellm_metadata=kwargs.get("litellm_metadata"),
|
||||
)
|
||||
litellm_logging_obj.update_environment_variables(
|
||||
model=None,
|
||||
user=None,
|
||||
optional_params=optional_params.model_dump(),
|
||||
litellm_params=litellm_params,
|
||||
litellm_params={
|
||||
"litellm_call_id": litellm_call_id,
|
||||
"proxy_server_request": proxy_server_request,
|
||||
"model_info": model_info,
|
||||
"metadata": metadata,
|
||||
"preset_cache_key": None,
|
||||
"stream_response": {},
|
||||
**optional_params.model_dump(exclude_unset=True),
|
||||
},
|
||||
custom_llm_provider=custom_llm_provider,
|
||||
)
|
||||
|
||||
|
@ -261,7 +254,7 @@ def create_batch(
|
|||
response=httpx.Response(
|
||||
status_code=400,
|
||||
content="Unsupported provider",
|
||||
request=httpx.Request(method="create_thread", url="https://github.com/BerriAI/litellm"), # type: ignore
|
||||
request=httpx.Request(method="create_batch", url="https://github.com/BerriAI/litellm"), # type: ignore
|
||||
),
|
||||
)
|
||||
return response
|
||||
|
@ -269,6 +262,7 @@ def create_batch(
|
|||
raise e
|
||||
|
||||
|
||||
@client
|
||||
async def aretrieve_batch(
|
||||
batch_id: str,
|
||||
custom_llm_provider: Literal["openai", "azure", "vertex_ai"] = "openai",
|
||||
|
@ -276,7 +270,7 @@ async def aretrieve_batch(
|
|||
extra_headers: Optional[Dict[str, str]] = None,
|
||||
extra_body: Optional[Dict[str, str]] = None,
|
||||
**kwargs,
|
||||
) -> Batch:
|
||||
) -> LiteLLMBatch:
|
||||
"""
|
||||
Async: Retrieves a batch.
|
||||
|
||||
|
@ -310,6 +304,7 @@ async def aretrieve_batch(
|
|||
raise e
|
||||
|
||||
|
||||
@client
|
||||
def retrieve_batch(
|
||||
batch_id: str,
|
||||
custom_llm_provider: Literal["openai", "azure", "vertex_ai"] = "openai",
|
||||
|
@ -317,7 +312,7 @@ def retrieve_batch(
|
|||
extra_headers: Optional[Dict[str, str]] = None,
|
||||
extra_body: Optional[Dict[str, str]] = None,
|
||||
**kwargs,
|
||||
) -> Union[Batch, Coroutine[Any, Any, Batch]]:
|
||||
) -> Union[LiteLLMBatch, Coroutine[Any, Any, LiteLLMBatch]]:
|
||||
"""
|
||||
Retrieves a batch.
|
||||
|
||||
|
@ -325,9 +320,23 @@ def retrieve_batch(
|
|||
"""
|
||||
try:
|
||||
optional_params = GenericLiteLLMParams(**kwargs)
|
||||
|
||||
litellm_logging_obj: LiteLLMLoggingObj = kwargs.get("litellm_logging_obj", None)
|
||||
### TIMEOUT LOGIC ###
|
||||
timeout = optional_params.timeout or kwargs.get("request_timeout", 600) or 600
|
||||
# set timeout for 10 minutes by default
|
||||
litellm_params = get_litellm_params(
|
||||
custom_llm_provider=custom_llm_provider,
|
||||
litellm_call_id=kwargs.get("litellm_call_id", None),
|
||||
litellm_trace_id=kwargs.get("litellm_trace_id"),
|
||||
litellm_metadata=kwargs.get("litellm_metadata"),
|
||||
)
|
||||
litellm_logging_obj.update_environment_variables(
|
||||
model=None,
|
||||
user=None,
|
||||
optional_params=optional_params.model_dump(),
|
||||
litellm_params=litellm_params,
|
||||
custom_llm_provider=custom_llm_provider,
|
||||
)
|
||||
|
||||
if (
|
||||
timeout is not None
|
||||
|
|
|
@ -247,7 +247,6 @@ class LLMCachingHandler:
|
|||
pass
|
||||
else:
|
||||
call_type = original_function.__name__
|
||||
|
||||
cached_result = self._convert_cached_result_to_model_response(
|
||||
cached_result=cached_result,
|
||||
call_type=call_type,
|
||||
|
@ -725,6 +724,7 @@ class LLMCachingHandler:
|
|||
"""
|
||||
Sync internal method to add the result to the cache
|
||||
"""
|
||||
|
||||
new_kwargs = kwargs.copy()
|
||||
new_kwargs.update(
|
||||
convert_args_to_kwargs(
|
||||
|
@ -738,6 +738,7 @@ class LLMCachingHandler:
|
|||
if self._should_store_result_in_cache(
|
||||
original_function=self.original_function, kwargs=new_kwargs
|
||||
):
|
||||
|
||||
litellm.cache.add_cache(result, **new_kwargs)
|
||||
|
||||
return
|
||||
|
|
|
@ -239,6 +239,15 @@ def cost_per_token( # noqa: PLR0915
|
|||
custom_llm_provider=custom_llm_provider,
|
||||
billed_units=rerank_billed_units,
|
||||
)
|
||||
elif (
|
||||
call_type == "aretrieve_batch"
|
||||
or call_type == "retrieve_batch"
|
||||
or call_type == CallTypes.aretrieve_batch
|
||||
or call_type == CallTypes.retrieve_batch
|
||||
):
|
||||
return batch_cost_calculator(
|
||||
usage=usage_block, model=model, custom_llm_provider=custom_llm_provider
|
||||
)
|
||||
elif call_type == "atranscription" or call_type == "transcription":
|
||||
return openai_cost_per_second(
|
||||
model=model,
|
||||
|
@ -399,9 +408,12 @@ def _select_model_name_for_cost_calc(
|
|||
if base_model is not None:
|
||||
return_model = base_model
|
||||
|
||||
completion_response_model: Optional[str] = getattr(
|
||||
completion_response, "model", None
|
||||
)
|
||||
completion_response_model: Optional[str] = None
|
||||
if completion_response is not None:
|
||||
if isinstance(completion_response, BaseModel):
|
||||
completion_response_model = getattr(completion_response, "model", None)
|
||||
elif isinstance(completion_response, dict):
|
||||
completion_response_model = completion_response.get("model", None)
|
||||
hidden_params: Optional[dict] = getattr(completion_response, "_hidden_params", None)
|
||||
if completion_response_model is None and hidden_params is not None:
|
||||
if (
|
||||
|
@ -957,3 +969,54 @@ def default_image_cost_calculator(
|
|||
)
|
||||
|
||||
return cost_info["input_cost_per_pixel"] * height * width * n
|
||||
|
||||
|
||||
def batch_cost_calculator(
|
||||
usage: Usage,
|
||||
model: str,
|
||||
custom_llm_provider: Optional[str] = None,
|
||||
) -> Tuple[float, float]:
|
||||
"""
|
||||
Calculate the cost of a batch job
|
||||
"""
|
||||
|
||||
_, custom_llm_provider, _, _ = litellm.get_llm_provider(
|
||||
model=model, custom_llm_provider=custom_llm_provider
|
||||
)
|
||||
|
||||
verbose_logger.info(
|
||||
"Calculating batch cost per token. model=%s, custom_llm_provider=%s",
|
||||
model,
|
||||
custom_llm_provider,
|
||||
)
|
||||
|
||||
try:
|
||||
model_info: Optional[ModelInfo] = litellm.get_model_info(
|
||||
model=model, custom_llm_provider=custom_llm_provider
|
||||
)
|
||||
except Exception:
|
||||
model_info = None
|
||||
|
||||
if not model_info:
|
||||
return 0.0, 0.0
|
||||
|
||||
input_cost_per_token_batches = model_info.get("input_cost_per_token_batches")
|
||||
input_cost_per_token = model_info.get("input_cost_per_token")
|
||||
output_cost_per_token_batches = model_info.get("output_cost_per_token_batches")
|
||||
output_cost_per_token = model_info.get("output_cost_per_token")
|
||||
total_prompt_cost = 0.0
|
||||
total_completion_cost = 0.0
|
||||
if input_cost_per_token_batches:
|
||||
total_prompt_cost = usage.prompt_tokens * input_cost_per_token_batches
|
||||
elif input_cost_per_token:
|
||||
total_prompt_cost = (
|
||||
usage.prompt_tokens * (input_cost_per_token) / 2
|
||||
) # batch cost is usually half of the regular token cost
|
||||
if output_cost_per_token_batches:
|
||||
total_completion_cost = usage.completion_tokens * output_cost_per_token_batches
|
||||
elif output_cost_per_token:
|
||||
total_completion_cost = (
|
||||
usage.completion_tokens * (output_cost_per_token) / 2
|
||||
) # batch cost is usually half of the regular token cost
|
||||
|
||||
return total_prompt_cost, total_completion_cost
|
||||
|
|
|
@ -816,7 +816,7 @@ def file_content(
|
|||
)
|
||||
else:
|
||||
raise litellm.exceptions.BadRequestError(
|
||||
message="LiteLLM doesn't support {} for 'file_content'. Only 'openai' and 'azure' are supported.".format(
|
||||
message="LiteLLM doesn't support {} for 'custom_llm_provider'. Supported providers are 'openai', 'azure', 'vertex_ai'.".format(
|
||||
custom_llm_provider
|
||||
),
|
||||
model="n/a",
|
||||
|
|
|
@ -40,6 +40,7 @@ in_memory_dynamic_logger_cache = DynamicLoggingCache()
|
|||
def langfuse_client_init(
|
||||
langfuse_public_key=None,
|
||||
langfuse_secret=None,
|
||||
langfuse_secret_key=None,
|
||||
langfuse_host=None,
|
||||
flush_interval=1,
|
||||
) -> LangfuseClass:
|
||||
|
@ -67,7 +68,10 @@ def langfuse_client_init(
|
|||
)
|
||||
|
||||
# Instance variables
|
||||
secret_key = langfuse_secret or os.getenv("LANGFUSE_SECRET_KEY")
|
||||
|
||||
secret_key = (
|
||||
langfuse_secret or langfuse_secret_key or os.getenv("LANGFUSE_SECRET_KEY")
|
||||
)
|
||||
public_key = langfuse_public_key or os.getenv("LANGFUSE_PUBLIC_KEY")
|
||||
langfuse_host = langfuse_host or os.getenv(
|
||||
"LANGFUSE_HOST", "https://cloud.langfuse.com"
|
||||
|
@ -190,6 +194,7 @@ class LangfusePromptManagement(LangFuseLogger, PromptManagementBase, CustomLogge
|
|||
langfuse_client = langfuse_client_init(
|
||||
langfuse_public_key=dynamic_callback_params.get("langfuse_public_key"),
|
||||
langfuse_secret=dynamic_callback_params.get("langfuse_secret"),
|
||||
langfuse_secret_key=dynamic_callback_params.get("langfuse_secret_key"),
|
||||
langfuse_host=dynamic_callback_params.get("langfuse_host"),
|
||||
)
|
||||
langfuse_prompt_client = self._get_prompt_from_id(
|
||||
|
@ -206,6 +211,7 @@ class LangfusePromptManagement(LangFuseLogger, PromptManagementBase, CustomLogge
|
|||
langfuse_client = langfuse_client_init(
|
||||
langfuse_public_key=dynamic_callback_params.get("langfuse_public_key"),
|
||||
langfuse_secret=dynamic_callback_params.get("langfuse_secret"),
|
||||
langfuse_secret_key=dynamic_callback_params.get("langfuse_secret_key"),
|
||||
langfuse_host=dynamic_callback_params.get("langfuse_host"),
|
||||
)
|
||||
langfuse_prompt_client = self._get_prompt_from_id(
|
||||
|
|
|
@ -73,8 +73,19 @@ def remove_index_from_tool_calls(
|
|||
def get_litellm_metadata_from_kwargs(kwargs: dict):
|
||||
"""
|
||||
Helper to get litellm metadata from all litellm request kwargs
|
||||
|
||||
Return `litellm_metadata` if it exists, otherwise return `metadata`
|
||||
"""
|
||||
return kwargs.get("litellm_params", {}).get("metadata", {})
|
||||
litellm_params = kwargs.get("litellm_params", {})
|
||||
if litellm_params:
|
||||
metadata = litellm_params.get("metadata", {})
|
||||
litellm_metadata = litellm_params.get("litellm_metadata", {})
|
||||
if litellm_metadata:
|
||||
return litellm_metadata
|
||||
elif metadata:
|
||||
return metadata
|
||||
|
||||
return {}
|
||||
|
||||
|
||||
# Helper functions used for OTEL logging
|
||||
|
|
|
@ -57,6 +57,7 @@ def get_litellm_params(
|
|||
prompt_variables: Optional[dict] = None,
|
||||
async_call: Optional[bool] = None,
|
||||
ssl_verify: Optional[bool] = None,
|
||||
merge_reasoning_content_in_choices: Optional[bool] = None,
|
||||
**kwargs,
|
||||
) -> dict:
|
||||
litellm_params = {
|
||||
|
@ -97,5 +98,6 @@ def get_litellm_params(
|
|||
"prompt_variables": prompt_variables,
|
||||
"async_call": async_call,
|
||||
"ssl_verify": ssl_verify,
|
||||
"merge_reasoning_content_in_choices": merge_reasoning_content_in_choices,
|
||||
}
|
||||
return litellm_params
|
||||
|
|
|
@ -25,6 +25,7 @@ from litellm import (
|
|||
turn_off_message_logging,
|
||||
)
|
||||
from litellm._logging import _is_debugging_on, verbose_logger
|
||||
from litellm.batches.batch_utils import _handle_completed_batch
|
||||
from litellm.caching.caching import DualCache, InMemoryCache
|
||||
from litellm.caching.caching_handler import LLMCachingHandler
|
||||
from litellm.cost_calculator import _select_model_name_for_cost_calc
|
||||
|
@ -50,9 +51,11 @@ from litellm.types.utils import (
|
|||
CallTypes,
|
||||
EmbeddingResponse,
|
||||
ImageResponse,
|
||||
LiteLLMBatch,
|
||||
LiteLLMLoggingBaseClass,
|
||||
ModelResponse,
|
||||
ModelResponseStream,
|
||||
RawRequestTypedDict,
|
||||
StandardCallbackDynamicParams,
|
||||
StandardLoggingAdditionalHeaders,
|
||||
StandardLoggingHiddenParams,
|
||||
|
@ -203,6 +206,7 @@ class Logging(LiteLLMLoggingBaseClass):
|
|||
] = None,
|
||||
applied_guardrails: Optional[List[str]] = None,
|
||||
kwargs: Optional[Dict] = None,
|
||||
log_raw_request_response: bool = False,
|
||||
):
|
||||
_input: Optional[str] = messages # save original value of messages
|
||||
if messages is not None:
|
||||
|
@ -231,6 +235,7 @@ class Logging(LiteLLMLoggingBaseClass):
|
|||
self.sync_streaming_chunks: List[Any] = (
|
||||
[]
|
||||
) # for generating complete stream response
|
||||
self.log_raw_request_response = log_raw_request_response
|
||||
|
||||
# Initialize dynamic callbacks
|
||||
self.dynamic_input_callbacks: Optional[
|
||||
|
@ -451,6 +456,18 @@ class Logging(LiteLLMLoggingBaseClass):
|
|||
|
||||
return model, messages, non_default_params
|
||||
|
||||
def _get_raw_request_body(self, data: Optional[Union[dict, str]]) -> dict:
|
||||
if data is None:
|
||||
return {"error": "Received empty dictionary for raw request body"}
|
||||
if isinstance(data, str):
|
||||
try:
|
||||
return json.loads(data)
|
||||
except Exception:
|
||||
return {
|
||||
"error": "Unable to parse raw request body. Got - {}".format(data)
|
||||
}
|
||||
return data
|
||||
|
||||
def _pre_call(self, input, api_key, model=None, additional_args={}):
|
||||
"""
|
||||
Common helper function across the sync + async pre-call function
|
||||
|
@ -466,6 +483,7 @@ class Logging(LiteLLMLoggingBaseClass):
|
|||
self.model_call_details["model"] = model
|
||||
|
||||
def pre_call(self, input, api_key, model=None, additional_args={}): # noqa: PLR0915
|
||||
|
||||
# Log the exact input to the LLM API
|
||||
litellm.error_logs["PRE_CALL"] = locals()
|
||||
try:
|
||||
|
@ -483,28 +501,54 @@ class Logging(LiteLLMLoggingBaseClass):
|
|||
additional_args=additional_args,
|
||||
)
|
||||
# log raw request to provider (like LangFuse) -- if opted in.
|
||||
if log_raw_request_response is True:
|
||||
if (
|
||||
self.log_raw_request_response is True
|
||||
or log_raw_request_response is True
|
||||
):
|
||||
|
||||
_litellm_params = self.model_call_details.get("litellm_params", {})
|
||||
_metadata = _litellm_params.get("metadata", {}) or {}
|
||||
try:
|
||||
# [Non-blocking Extra Debug Information in metadata]
|
||||
if (
|
||||
turn_off_message_logging is not None
|
||||
and turn_off_message_logging is True
|
||||
):
|
||||
if turn_off_message_logging is True:
|
||||
|
||||
_metadata["raw_request"] = (
|
||||
"redacted by litellm. \
|
||||
'litellm.turn_off_message_logging=True'"
|
||||
)
|
||||
else:
|
||||
|
||||
curl_command = self._get_request_curl_command(
|
||||
api_base=additional_args.get("api_base", ""),
|
||||
headers=additional_args.get("headers", {}),
|
||||
additional_args=additional_args,
|
||||
data=additional_args.get("complete_input_dict", {}),
|
||||
)
|
||||
|
||||
_metadata["raw_request"] = str(curl_command)
|
||||
# split up, so it's easier to parse in the UI
|
||||
self.model_call_details["raw_request_typed_dict"] = (
|
||||
RawRequestTypedDict(
|
||||
raw_request_api_base=str(
|
||||
additional_args.get("api_base") or ""
|
||||
),
|
||||
raw_request_body=self._get_raw_request_body(
|
||||
additional_args.get("complete_input_dict", {})
|
||||
),
|
||||
raw_request_headers=self._get_masked_headers(
|
||||
additional_args.get("headers", {}) or {},
|
||||
ignore_sensitive_headers=True,
|
||||
),
|
||||
error=None,
|
||||
)
|
||||
)
|
||||
except Exception as e:
|
||||
self.model_call_details["raw_request_typed_dict"] = (
|
||||
RawRequestTypedDict(
|
||||
error=str(e),
|
||||
)
|
||||
)
|
||||
traceback.print_exc()
|
||||
_metadata["raw_request"] = (
|
||||
"Unable to Log \
|
||||
raw request: {}".format(
|
||||
|
@ -637,9 +681,14 @@ class Logging(LiteLLMLoggingBaseClass):
|
|||
)
|
||||
verbose_logger.debug(f"\033[92m{curl_command}\033[0m\n")
|
||||
|
||||
def _get_request_body(self, data: dict) -> str:
|
||||
return str(data)
|
||||
|
||||
def _get_request_curl_command(
|
||||
self, api_base: str, headers: dict, additional_args: dict, data: dict
|
||||
self, api_base: str, headers: Optional[dict], additional_args: dict, data: dict
|
||||
) -> str:
|
||||
if headers is None:
|
||||
headers = {}
|
||||
curl_command = "\n\nPOST Request Sent from LiteLLM:\n"
|
||||
curl_command += "curl -X POST \\\n"
|
||||
curl_command += f"{api_base} \\\n"
|
||||
|
@ -647,11 +696,10 @@ class Logging(LiteLLMLoggingBaseClass):
|
|||
formatted_headers = " ".join(
|
||||
[f"-H '{k}: {v}'" for k, v in masked_headers.items()]
|
||||
)
|
||||
|
||||
curl_command += (
|
||||
f"{formatted_headers} \\\n" if formatted_headers.strip() != "" else ""
|
||||
)
|
||||
curl_command += f"-d '{str(data)}'\n"
|
||||
curl_command += f"-d '{self._get_request_body(data)}'\n"
|
||||
if additional_args.get("request_str", None) is not None:
|
||||
# print the sagemaker / bedrock client request
|
||||
curl_command = "\nRequest Sent from LiteLLM:\n"
|
||||
|
@ -660,12 +708,20 @@ class Logging(LiteLLMLoggingBaseClass):
|
|||
curl_command = str(self.model_call_details)
|
||||
return curl_command
|
||||
|
||||
def _get_masked_headers(self, headers: dict):
|
||||
def _get_masked_headers(
|
||||
self, headers: dict, ignore_sensitive_headers: bool = False
|
||||
) -> dict:
|
||||
"""
|
||||
Internal debugging helper function
|
||||
|
||||
Masks the headers of the request sent from LiteLLM
|
||||
"""
|
||||
sensitive_keywords = [
|
||||
"authorization",
|
||||
"token",
|
||||
"key",
|
||||
"secret",
|
||||
]
|
||||
return {
|
||||
k: (
|
||||
(v[:-44] + "*" * 44)
|
||||
|
@ -673,6 +729,11 @@ class Logging(LiteLLMLoggingBaseClass):
|
|||
else "*****"
|
||||
)
|
||||
for k, v in headers.items()
|
||||
if not ignore_sensitive_headers
|
||||
or not any(
|
||||
sensitive_keyword in k.lower()
|
||||
for sensitive_keyword in sensitive_keywords
|
||||
)
|
||||
}
|
||||
|
||||
def post_call(
|
||||
|
@ -871,6 +932,24 @@ class Logging(LiteLLMLoggingBaseClass):
|
|||
|
||||
return None
|
||||
|
||||
async def _response_cost_calculator_async(
|
||||
self,
|
||||
result: Union[
|
||||
ModelResponse,
|
||||
ModelResponseStream,
|
||||
EmbeddingResponse,
|
||||
ImageResponse,
|
||||
TranscriptionResponse,
|
||||
TextCompletionResponse,
|
||||
HttpxBinaryResponseContent,
|
||||
RerankResponse,
|
||||
Batch,
|
||||
FineTuningJob,
|
||||
],
|
||||
cache_hit: Optional[bool] = None,
|
||||
) -> Optional[float]:
|
||||
return self._response_cost_calculator(result=result, cache_hit=cache_hit)
|
||||
|
||||
def should_run_callback(
|
||||
self, callback: litellm.CALLBACK_TYPES, litellm_params: dict, event_hook: str
|
||||
) -> bool:
|
||||
|
@ -912,6 +991,9 @@ class Logging(LiteLLMLoggingBaseClass):
|
|||
self.model_call_details["log_event_type"] = "successful_api_call"
|
||||
self.model_call_details["end_time"] = end_time
|
||||
self.model_call_details["cache_hit"] = cache_hit
|
||||
|
||||
if self.call_type == CallTypes.anthropic_messages.value:
|
||||
result = self._handle_anthropic_messages_response_logging(result=result)
|
||||
## if model in model cost map - log the response cost
|
||||
## else set cost to None
|
||||
if (
|
||||
|
@ -928,8 +1010,8 @@ class Logging(LiteLLMLoggingBaseClass):
|
|||
or isinstance(result, TextCompletionResponse)
|
||||
or isinstance(result, HttpxBinaryResponseContent) # tts
|
||||
or isinstance(result, RerankResponse)
|
||||
or isinstance(result, Batch)
|
||||
or isinstance(result, FineTuningJob)
|
||||
or isinstance(result, LiteLLMBatch)
|
||||
):
|
||||
## HIDDEN PARAMS ##
|
||||
hidden_params = getattr(result, "_hidden_params", {})
|
||||
|
@ -1525,6 +1607,20 @@ class Logging(LiteLLMLoggingBaseClass):
|
|||
print_verbose(
|
||||
"Logging Details LiteLLM-Async Success Call, cache_hit={}".format(cache_hit)
|
||||
)
|
||||
|
||||
## CALCULATE COST FOR BATCH JOBS
|
||||
if self.call_type == CallTypes.aretrieve_batch.value and isinstance(
|
||||
result, LiteLLMBatch
|
||||
):
|
||||
|
||||
response_cost, batch_usage, batch_models = await _handle_completed_batch(
|
||||
batch=result, custom_llm_provider=self.custom_llm_provider
|
||||
)
|
||||
|
||||
result._hidden_params["response_cost"] = response_cost
|
||||
result._hidden_params["batch_models"] = batch_models
|
||||
result.usage = batch_usage
|
||||
|
||||
start_time, end_time, result = self._success_handler_helper_fn(
|
||||
start_time=start_time,
|
||||
end_time=end_time,
|
||||
|
@ -1532,6 +1628,7 @@ class Logging(LiteLLMLoggingBaseClass):
|
|||
cache_hit=cache_hit,
|
||||
standard_logging_object=kwargs.get("standard_logging_object", None),
|
||||
)
|
||||
|
||||
## BUILD COMPLETE STREAMED RESPONSE
|
||||
if "async_complete_streaming_response" in self.model_call_details:
|
||||
return # break out of this.
|
||||
|
@ -2270,6 +2367,37 @@ class Logging(LiteLLMLoggingBaseClass):
|
|||
return complete_streaming_response
|
||||
return None
|
||||
|
||||
def _handle_anthropic_messages_response_logging(self, result: Any) -> ModelResponse:
|
||||
"""
|
||||
Handles logging for Anthropic messages responses.
|
||||
|
||||
Args:
|
||||
result: The response object from the model call
|
||||
|
||||
Returns:
|
||||
The the response object from the model call
|
||||
|
||||
- For Non-streaming responses, we need to transform the response to a ModelResponse object.
|
||||
- For streaming responses, anthropic_messages handler calls success_handler with a assembled ModelResponse.
|
||||
"""
|
||||
if self.stream and isinstance(result, ModelResponse):
|
||||
return result
|
||||
|
||||
result = litellm.AnthropicConfig().transform_response(
|
||||
raw_response=self.model_call_details["httpx_response"],
|
||||
model_response=litellm.ModelResponse(),
|
||||
model=self.model,
|
||||
messages=[],
|
||||
logging_obj=self,
|
||||
optional_params={},
|
||||
api_key="",
|
||||
request_data={},
|
||||
encoding=litellm.encoding,
|
||||
json_mode=False,
|
||||
litellm_params={},
|
||||
)
|
||||
return result
|
||||
|
||||
|
||||
def set_callbacks(callback_list, function_id=None): # noqa: PLR0915
|
||||
"""
|
||||
|
@ -3086,6 +3214,7 @@ class StandardLoggingPayloadSetup:
|
|||
response_cost=None,
|
||||
additional_headers=None,
|
||||
litellm_overhead_time_ms=None,
|
||||
batch_models=None,
|
||||
)
|
||||
if hidden_params is not None:
|
||||
for key in StandardLoggingHiddenParams.__annotations__.keys():
|
||||
|
@ -3199,6 +3328,7 @@ def get_standard_logging_object_payload(
|
|||
api_base=None,
|
||||
response_cost=None,
|
||||
litellm_overhead_time_ms=None,
|
||||
batch_models=None,
|
||||
)
|
||||
)
|
||||
|
||||
|
@ -3483,6 +3613,7 @@ def create_dummy_standard_logging_payload() -> StandardLoggingPayload:
|
|||
response_cost=None,
|
||||
additional_headers=None,
|
||||
litellm_overhead_time_ms=None,
|
||||
batch_models=None,
|
||||
)
|
||||
|
||||
# Convert numeric values to appropriate types
|
||||
|
|
|
@ -9,6 +9,7 @@ from typing import Dict, Iterable, List, Literal, Optional, Tuple, Union
|
|||
import litellm
|
||||
from litellm._logging import verbose_logger
|
||||
from litellm.constants import RESPONSE_FORMAT_TOOL_NAME
|
||||
from litellm.types.llms.openai import ChatCompletionThinkingBlock
|
||||
from litellm.types.utils import (
|
||||
ChatCompletionDeltaToolCall,
|
||||
ChatCompletionMessageToolCall,
|
||||
|
@ -128,12 +129,7 @@ def convert_to_streaming_response(response_object: Optional[dict] = None):
|
|||
model_response_object = ModelResponse(stream=True)
|
||||
choice_list = []
|
||||
for idx, choice in enumerate(response_object["choices"]):
|
||||
delta = Delta(
|
||||
content=choice["message"].get("content", None),
|
||||
role=choice["message"]["role"],
|
||||
function_call=choice["message"].get("function_call", None),
|
||||
tool_calls=choice["message"].get("tool_calls", None),
|
||||
)
|
||||
delta = Delta(**choice["message"])
|
||||
finish_reason = choice.get("finish_reason", None)
|
||||
if finish_reason is None:
|
||||
# gpt-4 vision can return 'finish_reason' or 'finish_details'
|
||||
|
@ -243,6 +239,24 @@ def _parse_content_for_reasoning(
|
|||
return None, message_text
|
||||
|
||||
|
||||
def _extract_reasoning_content(message: dict) -> Tuple[Optional[str], Optional[str]]:
|
||||
"""
|
||||
Extract reasoning content and main content from a message.
|
||||
|
||||
Args:
|
||||
message (dict): The message dictionary that may contain reasoning_content
|
||||
|
||||
Returns:
|
||||
tuple[Optional[str], Optional[str]]: A tuple of (reasoning_content, content)
|
||||
"""
|
||||
if "reasoning_content" in message:
|
||||
return message["reasoning_content"], message["content"]
|
||||
elif "reasoning" in message:
|
||||
return message["reasoning"], message["content"]
|
||||
else:
|
||||
return _parse_content_for_reasoning(message.get("content"))
|
||||
|
||||
|
||||
class LiteLLMResponseObjectHandler:
|
||||
|
||||
@staticmethod
|
||||
|
@ -456,11 +470,16 @@ def convert_to_model_response_object( # noqa: PLR0915
|
|||
provider_specific_fields[field] = choice["message"][field]
|
||||
|
||||
# Handle reasoning models that display `reasoning_content` within `content`
|
||||
|
||||
reasoning_content, content = _parse_content_for_reasoning(
|
||||
choice["message"].get("content")
|
||||
reasoning_content, content = _extract_reasoning_content(
|
||||
choice["message"]
|
||||
)
|
||||
|
||||
# Handle thinking models that display `thinking_blocks` within `content`
|
||||
thinking_blocks: Optional[List[ChatCompletionThinkingBlock]] = None
|
||||
if "thinking_blocks" in choice["message"]:
|
||||
thinking_blocks = choice["message"]["thinking_blocks"]
|
||||
provider_specific_fields["thinking_blocks"] = thinking_blocks
|
||||
|
||||
if reasoning_content:
|
||||
provider_specific_fields["reasoning_content"] = (
|
||||
reasoning_content
|
||||
|
@ -474,6 +493,7 @@ def convert_to_model_response_object( # noqa: PLR0915
|
|||
audio=choice["message"].get("audio", None),
|
||||
provider_specific_fields=provider_specific_fields,
|
||||
reasoning_content=reasoning_content,
|
||||
thinking_blocks=thinking_blocks,
|
||||
)
|
||||
finish_reason = choice.get("finish_reason", None)
|
||||
if finish_reason is None:
|
||||
|
|
|
@ -187,53 +187,125 @@ def ollama_pt(
|
|||
final_prompt_value="### Response:",
|
||||
messages=messages,
|
||||
)
|
||||
elif "llava" in model:
|
||||
prompt = ""
|
||||
images = []
|
||||
for message in messages:
|
||||
if isinstance(message["content"], str):
|
||||
prompt += message["content"]
|
||||
elif isinstance(message["content"], list):
|
||||
# see https://docs.litellm.ai/docs/providers/openai#openai-vision-models
|
||||
for element in message["content"]:
|
||||
if isinstance(element, dict):
|
||||
if element["type"] == "text":
|
||||
prompt += element["text"]
|
||||
elif element["type"] == "image_url":
|
||||
base64_image = convert_to_ollama_image(
|
||||
element["image_url"]["url"]
|
||||
)
|
||||
images.append(base64_image)
|
||||
return {"prompt": prompt, "images": images}
|
||||
else:
|
||||
user_message_types = {"user", "tool", "function"}
|
||||
msg_i = 0
|
||||
images = []
|
||||
prompt = ""
|
||||
for message in messages:
|
||||
role = message["role"]
|
||||
content = message.get("content", "")
|
||||
while msg_i < len(messages):
|
||||
init_msg_i = msg_i
|
||||
user_content_str = ""
|
||||
## MERGE CONSECUTIVE USER CONTENT ##
|
||||
while (
|
||||
msg_i < len(messages) and messages[msg_i]["role"] in user_message_types
|
||||
):
|
||||
msg_content = messages[msg_i].get("content")
|
||||
if msg_content:
|
||||
if isinstance(msg_content, list):
|
||||
for m in msg_content:
|
||||
if m.get("type", "") == "image_url":
|
||||
if isinstance(m["image_url"], str):
|
||||
images.append(m["image_url"])
|
||||
elif isinstance(m["image_url"], dict):
|
||||
images.append(m["image_url"]["url"])
|
||||
elif m.get("type", "") == "text":
|
||||
user_content_str += m["text"]
|
||||
else:
|
||||
# Tool message content will always be a string
|
||||
user_content_str += msg_content
|
||||
|
||||
if "tool_calls" in message:
|
||||
tool_calls = []
|
||||
msg_i += 1
|
||||
|
||||
for call in message["tool_calls"]:
|
||||
if user_content_str:
|
||||
prompt += f"### User:\n{user_content_str}\n\n"
|
||||
|
||||
assistant_content_str = ""
|
||||
## MERGE CONSECUTIVE ASSISTANT CONTENT ##
|
||||
while msg_i < len(messages) and messages[msg_i]["role"] == "assistant":
|
||||
msg_content = messages[msg_i].get("content")
|
||||
if msg_content:
|
||||
if isinstance(msg_content, list):
|
||||
for m in msg_content:
|
||||
if m.get("type", "") == "text":
|
||||
assistant_content_str += m["text"]
|
||||
elif isinstance(msg_content, str):
|
||||
# Tool message content will always be a string
|
||||
assistant_content_str += msg_content
|
||||
|
||||
tool_calls = messages[msg_i].get("tool_calls")
|
||||
ollama_tool_calls = []
|
||||
if tool_calls:
|
||||
for call in tool_calls:
|
||||
call_id: str = call["id"]
|
||||
function_name: str = call["function"]["name"]
|
||||
arguments = json.loads(call["function"]["arguments"])
|
||||
|
||||
tool_calls.append(
|
||||
ollama_tool_calls.append(
|
||||
{
|
||||
"id": call_id,
|
||||
"type": "function",
|
||||
"function": {"name": function_name, "arguments": arguments},
|
||||
"function": {
|
||||
"name": function_name,
|
||||
"arguments": arguments,
|
||||
},
|
||||
}
|
||||
)
|
||||
|
||||
prompt += f"### Assistant:\nTool Calls: {json.dumps(tool_calls, indent=2)}\n\n"
|
||||
if ollama_tool_calls:
|
||||
assistant_content_str += (
|
||||
f"Tool Calls: {json.dumps(ollama_tool_calls, indent=2)}"
|
||||
)
|
||||
|
||||
elif "tool_call_id" in message:
|
||||
prompt += f"### User:\n{message['content']}\n\n"
|
||||
msg_i += 1
|
||||
|
||||
elif content:
|
||||
prompt += f"### {role.capitalize()}:\n{content}\n\n"
|
||||
if assistant_content_str:
|
||||
prompt += f"### Assistant:\n{assistant_content_str}\n\n"
|
||||
|
||||
if msg_i == init_msg_i: # prevent infinite loops
|
||||
raise litellm.BadRequestError(
|
||||
message=BAD_MESSAGE_ERROR_STR + f"passed in {messages[msg_i]}",
|
||||
model=model,
|
||||
llm_provider="ollama",
|
||||
)
|
||||
# prompt = ""
|
||||
# images = []
|
||||
# for message in messages:
|
||||
# if isinstance(message["content"], str):
|
||||
# prompt += message["content"]
|
||||
# elif isinstance(message["content"], list):
|
||||
# # see https://docs.litellm.ai/docs/providers/openai#openai-vision-models
|
||||
# for element in message["content"]:
|
||||
# if isinstance(element, dict):
|
||||
# if element["type"] == "text":
|
||||
# prompt += element["text"]
|
||||
# elif element["type"] == "image_url":
|
||||
# base64_image = convert_to_ollama_image(
|
||||
# element["image_url"]["url"]
|
||||
# )
|
||||
# images.append(base64_image)
|
||||
|
||||
# if "tool_calls" in message:
|
||||
# tool_calls = []
|
||||
|
||||
# for call in message["tool_calls"]:
|
||||
# call_id: str = call["id"]
|
||||
# function_name: str = call["function"]["name"]
|
||||
# arguments = json.loads(call["function"]["arguments"])
|
||||
|
||||
# tool_calls.append(
|
||||
# {
|
||||
# "id": call_id,
|
||||
# "type": "function",
|
||||
# "function": {"name": function_name, "arguments": arguments},
|
||||
# }
|
||||
# )
|
||||
|
||||
# prompt += f"### Assistant:\nTool Calls: {json.dumps(tool_calls, indent=2)}\n\n"
|
||||
|
||||
# elif "tool_call_id" in message:
|
||||
# prompt += f"### User:\n{message['content']}\n\n"
|
||||
|
||||
return {"prompt": prompt, "images": images}
|
||||
|
||||
return prompt
|
||||
|
||||
|
@ -680,12 +752,13 @@ def convert_generic_image_chunk_to_openai_image_obj(
|
|||
Return:
|
||||
"data:image/jpeg;base64,{base64_image}"
|
||||
"""
|
||||
return "data:{};{},{}".format(
|
||||
image_chunk["media_type"], image_chunk["type"], image_chunk["data"]
|
||||
)
|
||||
media_type = image_chunk["media_type"]
|
||||
return "data:{};{},{}".format(media_type, image_chunk["type"], image_chunk["data"])
|
||||
|
||||
|
||||
def convert_to_anthropic_image_obj(openai_image_url: str) -> GenericImageParsingChunk:
|
||||
def convert_to_anthropic_image_obj(
|
||||
openai_image_url: str, format: Optional[str]
|
||||
) -> GenericImageParsingChunk:
|
||||
"""
|
||||
Input:
|
||||
"image_url": "data:image/jpeg;base64,{base64_image}",
|
||||
|
@ -702,6 +775,10 @@ def convert_to_anthropic_image_obj(openai_image_url: str) -> GenericImageParsing
|
|||
openai_image_url = convert_url_to_base64(url=openai_image_url)
|
||||
# Extract the media type and base64 data
|
||||
media_type, base64_data = openai_image_url.split("data:")[1].split(";base64,")
|
||||
|
||||
if format:
|
||||
media_type = format
|
||||
else:
|
||||
media_type = media_type.replace("\\/", "/")
|
||||
|
||||
return GenericImageParsingChunk(
|
||||
|
@ -820,11 +897,12 @@ def anthropic_messages_pt_xml(messages: list):
|
|||
if isinstance(messages[msg_i]["content"], list):
|
||||
for m in messages[msg_i]["content"]:
|
||||
if m.get("type", "") == "image_url":
|
||||
format = m["image_url"].get("format")
|
||||
user_content.append(
|
||||
{
|
||||
"type": "image",
|
||||
"source": convert_to_anthropic_image_obj(
|
||||
m["image_url"]["url"]
|
||||
m["image_url"]["url"], format=format
|
||||
),
|
||||
}
|
||||
)
|
||||
|
@ -1156,10 +1234,13 @@ def convert_to_anthropic_tool_result(
|
|||
)
|
||||
elif content["type"] == "image_url":
|
||||
if isinstance(content["image_url"], str):
|
||||
image_chunk = convert_to_anthropic_image_obj(content["image_url"])
|
||||
else:
|
||||
image_chunk = convert_to_anthropic_image_obj(
|
||||
content["image_url"]["url"]
|
||||
content["image_url"], format=None
|
||||
)
|
||||
else:
|
||||
format = content["image_url"].get("format")
|
||||
image_chunk = convert_to_anthropic_image_obj(
|
||||
content["image_url"]["url"], format=format
|
||||
)
|
||||
anthropic_content_list.append(
|
||||
AnthropicMessagesImageParam(
|
||||
|
@ -1282,6 +1363,7 @@ def add_cache_control_to_content(
|
|||
AnthropicMessagesImageParam,
|
||||
AnthropicMessagesTextParam,
|
||||
AnthropicMessagesDocumentParam,
|
||||
ChatCompletionThinkingBlock,
|
||||
],
|
||||
orignal_content_element: Union[dict, AllMessageValues],
|
||||
):
|
||||
|
@ -1317,6 +1399,7 @@ def _anthropic_content_element_factory(
|
|||
data=image_chunk["data"],
|
||||
),
|
||||
)
|
||||
|
||||
return _anthropic_content_element
|
||||
|
||||
|
||||
|
@ -1368,13 +1451,16 @@ def anthropic_messages_pt( # noqa: PLR0915
|
|||
for m in user_message_types_block["content"]:
|
||||
if m.get("type", "") == "image_url":
|
||||
m = cast(ChatCompletionImageObject, m)
|
||||
format: Optional[str] = None
|
||||
if isinstance(m["image_url"], str):
|
||||
image_chunk = convert_to_anthropic_image_obj(
|
||||
openai_image_url=m["image_url"]
|
||||
openai_image_url=m["image_url"], format=None
|
||||
)
|
||||
else:
|
||||
format = m["image_url"].get("format")
|
||||
image_chunk = convert_to_anthropic_image_obj(
|
||||
openai_image_url=m["image_url"]["url"]
|
||||
openai_image_url=m["image_url"]["url"],
|
||||
format=format,
|
||||
)
|
||||
|
||||
_anthropic_content_element = (
|
||||
|
@ -1454,12 +1540,23 @@ def anthropic_messages_pt( # noqa: PLR0915
|
|||
assistant_content_block["content"], list
|
||||
):
|
||||
for m in assistant_content_block["content"]:
|
||||
# handle text
|
||||
# handle thinking blocks
|
||||
thinking_block = cast(str, m.get("thinking", ""))
|
||||
text_block = cast(str, m.get("text", ""))
|
||||
if (
|
||||
m.get("type", "") == "text" and len(m.get("text", "")) > 0
|
||||
m.get("type", "") == "thinking" and len(thinking_block) > 0
|
||||
): # don't pass empty text blocks. anthropic api raises errors.
|
||||
anthropic_message: Union[
|
||||
ChatCompletionThinkingBlock,
|
||||
AnthropicMessagesTextParam,
|
||||
] = cast(ChatCompletionThinkingBlock, m)
|
||||
assistant_content.append(anthropic_message)
|
||||
# handle text
|
||||
elif (
|
||||
m.get("type", "") == "text" and len(text_block) > 0
|
||||
): # don't pass empty text blocks. anthropic api raises errors.
|
||||
anthropic_message = AnthropicMessagesTextParam(
|
||||
type="text", text=m.get("text")
|
||||
type="text", text=text_block
|
||||
)
|
||||
_cached_message = add_cache_control_to_content(
|
||||
anthropic_content_element=anthropic_message,
|
||||
|
@ -1512,6 +1609,7 @@ def anthropic_messages_pt( # noqa: PLR0915
|
|||
msg_i += 1
|
||||
|
||||
if assistant_content:
|
||||
|
||||
new_messages.append({"role": "assistant", "content": assistant_content})
|
||||
|
||||
if msg_i == init_msg_i: # prevent infinite loops
|
||||
|
@ -1520,17 +1618,6 @@ def anthropic_messages_pt( # noqa: PLR0915
|
|||
model=model,
|
||||
llm_provider=llm_provider,
|
||||
)
|
||||
if not new_messages or new_messages[0]["role"] != "user":
|
||||
if litellm.modify_params:
|
||||
new_messages.insert(
|
||||
0, {"role": "user", "content": [{"type": "text", "text": "."}]}
|
||||
)
|
||||
else:
|
||||
raise Exception(
|
||||
"Invalid first message={}. Should always start with 'role'='user' for Anthropic. System prompt is sent separately for Anthropic. set 'litellm.modify_params = True' or 'litellm_settings:modify_params = True' on proxy, to insert a placeholder user message - '.' as the first message, ".format(
|
||||
new_messages
|
||||
)
|
||||
)
|
||||
|
||||
if new_messages[-1]["role"] == "assistant":
|
||||
if isinstance(new_messages[-1]["content"], str):
|
||||
|
@ -2301,8 +2388,11 @@ class BedrockImageProcessor:
|
|||
)
|
||||
|
||||
@classmethod
|
||||
def process_image_sync(cls, image_url: str) -> BedrockContentBlock:
|
||||
def process_image_sync(
|
||||
cls, image_url: str, format: Optional[str] = None
|
||||
) -> BedrockContentBlock:
|
||||
"""Synchronous image processing."""
|
||||
|
||||
if "base64" in image_url:
|
||||
img_bytes, mime_type, image_format = cls._parse_base64_image(image_url)
|
||||
elif "http://" in image_url or "https://" in image_url:
|
||||
|
@ -2313,11 +2403,17 @@ class BedrockImageProcessor:
|
|||
"Unsupported image type. Expected either image url or base64 encoded string"
|
||||
)
|
||||
|
||||
if format:
|
||||
mime_type = format
|
||||
image_format = mime_type.split("/")[1]
|
||||
|
||||
image_format = cls._validate_format(mime_type, image_format)
|
||||
return cls._create_bedrock_block(img_bytes, mime_type, image_format)
|
||||
|
||||
@classmethod
|
||||
async def process_image_async(cls, image_url: str) -> BedrockContentBlock:
|
||||
async def process_image_async(
|
||||
cls, image_url: str, format: Optional[str]
|
||||
) -> BedrockContentBlock:
|
||||
"""Asynchronous image processing."""
|
||||
|
||||
if "base64" in image_url:
|
||||
|
@ -2332,6 +2428,10 @@ class BedrockImageProcessor:
|
|||
"Unsupported image type. Expected either image url or base64 encoded string"
|
||||
)
|
||||
|
||||
if format: # override with user-defined params
|
||||
mime_type = format
|
||||
image_format = mime_type.split("/")[1]
|
||||
|
||||
image_format = cls._validate_format(mime_type, image_format)
|
||||
return cls._create_bedrock_block(img_bytes, mime_type, image_format)
|
||||
|
||||
|
@ -2819,12 +2919,14 @@ class BedrockConverseMessagesProcessor:
|
|||
_part = BedrockContentBlock(text=element["text"])
|
||||
_parts.append(_part)
|
||||
elif element["type"] == "image_url":
|
||||
format: Optional[str] = None
|
||||
if isinstance(element["image_url"], dict):
|
||||
image_url = element["image_url"]["url"]
|
||||
format = element["image_url"].get("format")
|
||||
else:
|
||||
image_url = element["image_url"]
|
||||
_part = await BedrockImageProcessor.process_image_async( # type: ignore
|
||||
image_url=image_url
|
||||
image_url=image_url, format=format
|
||||
)
|
||||
_parts.append(_part) # type: ignore
|
||||
_cache_point_block = (
|
||||
|
@ -2924,7 +3026,14 @@ class BedrockConverseMessagesProcessor:
|
|||
assistants_parts: List[BedrockContentBlock] = []
|
||||
for element in _assistant_content:
|
||||
if isinstance(element, dict):
|
||||
if element["type"] == "text":
|
||||
if element["type"] == "thinking":
|
||||
thinking_block = BedrockConverseMessagesProcessor.translate_thinking_blocks_to_reasoning_content_blocks(
|
||||
thinking_blocks=[
|
||||
cast(ChatCompletionThinkingBlock, element)
|
||||
]
|
||||
)
|
||||
assistants_parts.extend(thinking_block)
|
||||
elif element["type"] == "text":
|
||||
assistants_part = BedrockContentBlock(
|
||||
text=element["text"]
|
||||
)
|
||||
|
@ -2974,7 +3083,7 @@ class BedrockConverseMessagesProcessor:
|
|||
reasoning_content_blocks: List[BedrockContentBlock] = []
|
||||
for thinking_block in thinking_blocks:
|
||||
reasoning_text = thinking_block.get("thinking")
|
||||
reasoning_signature = thinking_block.get("signature_delta")
|
||||
reasoning_signature = thinking_block.get("signature")
|
||||
text_block = BedrockConverseReasoningTextBlock(
|
||||
text=reasoning_text or "",
|
||||
)
|
||||
|
@ -3050,12 +3159,15 @@ def _bedrock_converse_messages_pt( # noqa: PLR0915
|
|||
_part = BedrockContentBlock(text=element["text"])
|
||||
_parts.append(_part)
|
||||
elif element["type"] == "image_url":
|
||||
format: Optional[str] = None
|
||||
if isinstance(element["image_url"], dict):
|
||||
image_url = element["image_url"]["url"]
|
||||
format = element["image_url"].get("format")
|
||||
else:
|
||||
image_url = element["image_url"]
|
||||
_part = BedrockImageProcessor.process_image_sync( # type: ignore
|
||||
image_url=image_url
|
||||
image_url=image_url,
|
||||
format=format,
|
||||
)
|
||||
_parts.append(_part) # type: ignore
|
||||
_cache_point_block = (
|
||||
|
@ -3157,7 +3269,14 @@ def _bedrock_converse_messages_pt( # noqa: PLR0915
|
|||
assistants_parts: List[BedrockContentBlock] = []
|
||||
for element in _assistant_content:
|
||||
if isinstance(element, dict):
|
||||
if element["type"] == "text":
|
||||
if element["type"] == "thinking":
|
||||
thinking_block = BedrockConverseMessagesProcessor.translate_thinking_blocks_to_reasoning_content_blocks(
|
||||
thinking_blocks=[
|
||||
cast(ChatCompletionThinkingBlock, element)
|
||||
]
|
||||
)
|
||||
assistants_parts.extend(thinking_block)
|
||||
elif element["type"] == "text":
|
||||
assistants_part = BedrockContentBlock(text=element["text"])
|
||||
assistants_parts.append(assistants_part)
|
||||
elif element["type"] == "image_url":
|
||||
|
|
|
@ -15,6 +15,7 @@ from litellm import verbose_logger
|
|||
from litellm.litellm_core_utils.redact_messages import LiteLLMLoggingObject
|
||||
from litellm.litellm_core_utils.thread_pool_executor import executor
|
||||
from litellm.types.llms.openai import ChatCompletionChunk
|
||||
from litellm.types.router import GenericLiteLLMParams
|
||||
from litellm.types.utils import Delta
|
||||
from litellm.types.utils import GenericStreamingChunk as GChunk
|
||||
from litellm.types.utils import (
|
||||
|
@ -70,6 +71,17 @@ class CustomStreamWrapper:
|
|||
self.completion_stream = completion_stream
|
||||
self.sent_first_chunk = False
|
||||
self.sent_last_chunk = False
|
||||
|
||||
litellm_params: GenericLiteLLMParams = GenericLiteLLMParams(
|
||||
**self.logging_obj.model_call_details.get("litellm_params", {})
|
||||
)
|
||||
self.merge_reasoning_content_in_choices: bool = (
|
||||
litellm_params.merge_reasoning_content_in_choices or False
|
||||
)
|
||||
self.sent_first_thinking_block = False
|
||||
self.sent_last_thinking_block = False
|
||||
self.thinking_content = ""
|
||||
|
||||
self.system_fingerprint: Optional[str] = None
|
||||
self.received_finish_reason: Optional[str] = None
|
||||
self.intermittent_finish_reason: Optional[str] = (
|
||||
|
@ -87,12 +99,7 @@ class CustomStreamWrapper:
|
|||
self.holding_chunk = ""
|
||||
self.complete_response = ""
|
||||
self.response_uptil_now = ""
|
||||
_model_info = (
|
||||
self.logging_obj.model_call_details.get("litellm_params", {}).get(
|
||||
"model_info", {}
|
||||
)
|
||||
or {}
|
||||
)
|
||||
_model_info: Dict = litellm_params.model_info or {}
|
||||
|
||||
_api_base = get_api_base(
|
||||
model=model or "",
|
||||
|
@ -630,7 +637,10 @@ class CustomStreamWrapper:
|
|||
if isinstance(chunk, bytes):
|
||||
chunk = chunk.decode("utf-8")
|
||||
if "text_output" in chunk:
|
||||
response = chunk.replace("data: ", "").strip()
|
||||
response = (
|
||||
CustomStreamWrapper._strip_sse_data_from_chunk(chunk) or ""
|
||||
)
|
||||
response = response.strip()
|
||||
parsed_response = json.loads(response)
|
||||
else:
|
||||
return {
|
||||
|
@ -755,16 +765,12 @@ class CustomStreamWrapper:
|
|||
setattr(model_response, k, v)
|
||||
return model_response
|
||||
|
||||
def return_processed_chunk_logic( # noqa
|
||||
def is_chunk_non_empty(
|
||||
self,
|
||||
completion_obj: Dict[str, Any],
|
||||
model_response: ModelResponseStream,
|
||||
response_obj: Dict[str, Any],
|
||||
):
|
||||
|
||||
print_verbose(
|
||||
f"completion_obj: {completion_obj}, model_response.choices[0]: {model_response.choices[0]}, response_obj: {response_obj}"
|
||||
)
|
||||
) -> bool:
|
||||
if (
|
||||
"content" in completion_obj
|
||||
and (
|
||||
|
@ -780,6 +786,10 @@ class CustomStreamWrapper:
|
|||
"function_call" in completion_obj
|
||||
and completion_obj["function_call"] is not None
|
||||
)
|
||||
or (
|
||||
"reasoning_content" in model_response.choices[0].delta
|
||||
and model_response.choices[0].delta.reasoning_content is not None
|
||||
)
|
||||
or (model_response.choices[0].delta.provider_specific_fields is not None)
|
||||
or (
|
||||
"provider_specific_fields" in model_response
|
||||
|
@ -789,8 +799,27 @@ class CustomStreamWrapper:
|
|||
"provider_specific_fields" in response_obj
|
||||
and response_obj["provider_specific_fields"] is not None
|
||||
)
|
||||
): # cannot set content of an OpenAI Object to be an empty string
|
||||
):
|
||||
return True
|
||||
else:
|
||||
return False
|
||||
|
||||
def return_processed_chunk_logic( # noqa
|
||||
self,
|
||||
completion_obj: Dict[str, Any],
|
||||
model_response: ModelResponseStream,
|
||||
response_obj: Dict[str, Any],
|
||||
):
|
||||
|
||||
print_verbose(
|
||||
f"completion_obj: {completion_obj}, model_response.choices[0]: {model_response.choices[0]}, response_obj: {response_obj}"
|
||||
)
|
||||
is_chunk_non_empty = self.is_chunk_non_empty(
|
||||
completion_obj, model_response, response_obj
|
||||
)
|
||||
if (
|
||||
is_chunk_non_empty
|
||||
): # cannot set content of an OpenAI Object to be an empty string
|
||||
self.safety_checker()
|
||||
hold, model_response_str = self.check_special_tokens(
|
||||
chunk=completion_obj["content"],
|
||||
|
@ -806,7 +835,7 @@ class CustomStreamWrapper:
|
|||
for choice in original_chunk.choices:
|
||||
try:
|
||||
if isinstance(choice, BaseModel):
|
||||
choice_json = choice.model_dump()
|
||||
choice_json = choice.model_dump() # type: ignore
|
||||
choice_json.pop(
|
||||
"finish_reason", None
|
||||
) # for mistral etc. which return a value in their last chunk (not-openai compatible).
|
||||
|
@ -854,6 +883,10 @@ class CustomStreamWrapper:
|
|||
_index: Optional[int] = completion_obj.get("index")
|
||||
if _index is not None:
|
||||
model_response.choices[0].index = _index
|
||||
|
||||
self._optional_combine_thinking_block_in_choices(
|
||||
model_response=model_response
|
||||
)
|
||||
print_verbose(f"returning model_response: {model_response}")
|
||||
return model_response
|
||||
else:
|
||||
|
@ -910,6 +943,48 @@ class CustomStreamWrapper:
|
|||
self.chunks.append(model_response)
|
||||
return
|
||||
|
||||
def _optional_combine_thinking_block_in_choices(
|
||||
self, model_response: ModelResponseStream
|
||||
) -> None:
|
||||
"""
|
||||
UI's Like OpenWebUI expect to get 1 chunk with <think>...</think> tags in the chunk content
|
||||
|
||||
In place updates the model_response object with reasoning_content in content with <think>...</think> tags
|
||||
|
||||
Enabled when `merge_reasoning_content_in_choices=True` passed in request params
|
||||
|
||||
|
||||
"""
|
||||
if self.merge_reasoning_content_in_choices is True:
|
||||
reasoning_content = getattr(
|
||||
model_response.choices[0].delta, "reasoning_content", None
|
||||
)
|
||||
if reasoning_content:
|
||||
if self.sent_first_thinking_block is False:
|
||||
model_response.choices[0].delta.content += (
|
||||
"<think>" + reasoning_content
|
||||
)
|
||||
self.sent_first_thinking_block = True
|
||||
elif (
|
||||
self.sent_first_thinking_block is True
|
||||
and hasattr(model_response.choices[0].delta, "reasoning_content")
|
||||
and model_response.choices[0].delta.reasoning_content
|
||||
):
|
||||
model_response.choices[0].delta.content = reasoning_content
|
||||
elif (
|
||||
self.sent_first_thinking_block is True
|
||||
and not self.sent_last_thinking_block
|
||||
and model_response.choices[0].delta.content
|
||||
):
|
||||
model_response.choices[0].delta.content = (
|
||||
"</think>" + model_response.choices[0].delta.content
|
||||
)
|
||||
self.sent_last_thinking_block = True
|
||||
|
||||
if hasattr(model_response.choices[0].delta, "reasoning_content"):
|
||||
del model_response.choices[0].delta.reasoning_content
|
||||
return
|
||||
|
||||
def chunk_creator(self, chunk: Any): # type: ignore # noqa: PLR0915
|
||||
model_response = self.model_response_creator()
|
||||
response_obj: Dict[str, Any] = {}
|
||||
|
@ -1756,6 +1831,42 @@ class CustomStreamWrapper:
|
|||
extra_kwargs={},
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
def _strip_sse_data_from_chunk(chunk: Optional[str]) -> Optional[str]:
|
||||
"""
|
||||
Strips the 'data: ' prefix from Server-Sent Events (SSE) chunks.
|
||||
|
||||
Some providers like sagemaker send it as `data:`, need to handle both
|
||||
|
||||
SSE messages are prefixed with 'data: ' which is part of the protocol,
|
||||
not the actual content from the LLM. This method removes that prefix
|
||||
and returns the actual content.
|
||||
|
||||
Args:
|
||||
chunk: The SSE chunk that may contain the 'data: ' prefix (string or bytes)
|
||||
|
||||
Returns:
|
||||
The chunk with the 'data: ' prefix removed, or the original chunk
|
||||
if no prefix was found. Returns None if input is None.
|
||||
|
||||
See OpenAI Python Ref for this: https://github.com/openai/openai-python/blob/041bf5a8ec54da19aad0169671793c2078bd6173/openai/api_requestor.py#L100
|
||||
"""
|
||||
if chunk is None:
|
||||
return None
|
||||
|
||||
if isinstance(chunk, str):
|
||||
# OpenAI sends `data: `
|
||||
if chunk.startswith("data: "):
|
||||
# Strip the prefix and any leading whitespace that might follow it
|
||||
_length_of_sse_data_prefix = len("data: ")
|
||||
return chunk[_length_of_sse_data_prefix:]
|
||||
elif chunk.startswith("data:"):
|
||||
# Sagemaker sends `data:`, no trailing whitespace
|
||||
_length_of_sse_data_prefix = len("data:")
|
||||
return chunk[_length_of_sse_data_prefix:]
|
||||
|
||||
return chunk
|
||||
|
||||
|
||||
def calculate_total_usage(chunks: List[ModelResponse]) -> Usage:
|
||||
"""Assume most recent usage chunk has total usage uptil then."""
|
||||
|
|
|
@ -474,7 +474,10 @@ class ModelResponseIterator:
|
|||
if len(self.content_blocks) == 0:
|
||||
return False
|
||||
|
||||
if self.content_blocks[0]["delta"]["type"] == "text_delta":
|
||||
if (
|
||||
self.content_blocks[0]["delta"]["type"] == "text_delta"
|
||||
or self.content_blocks[0]["delta"]["type"] == "thinking_delta"
|
||||
):
|
||||
return False
|
||||
|
||||
for block in self.content_blocks:
|
||||
|
@ -527,6 +530,7 @@ class ModelResponseIterator:
|
|||
provider_specific_fields = {}
|
||||
content_block = ContentBlockDelta(**chunk) # type: ignore
|
||||
thinking_blocks: List[ChatCompletionThinkingBlock] = []
|
||||
|
||||
self.content_blocks.append(content_block)
|
||||
if "text" in content_block["delta"]:
|
||||
text = content_block["delta"]["text"]
|
||||
|
@ -544,13 +548,13 @@ class ModelResponseIterator:
|
|||
provider_specific_fields["citation"] = content_block["delta"]["citation"]
|
||||
elif (
|
||||
"thinking" in content_block["delta"]
|
||||
or "signature_delta" == content_block["delta"]
|
||||
or "signature" in content_block["delta"]
|
||||
):
|
||||
thinking_blocks = [
|
||||
ChatCompletionThinkingBlock(
|
||||
type="thinking",
|
||||
thinking=content_block["delta"].get("thinking"),
|
||||
signature_delta=content_block["delta"].get("signature"),
|
||||
thinking=content_block["delta"].get("thinking") or "",
|
||||
signature=content_block["delta"].get("signature"),
|
||||
)
|
||||
]
|
||||
provider_specific_fields["thinking_blocks"] = thinking_blocks
|
||||
|
@ -616,9 +620,11 @@ class ModelResponseIterator:
|
|||
"index": self.tool_index,
|
||||
}
|
||||
elif type_chunk == "content_block_stop":
|
||||
|
||||
ContentBlockStop(**chunk) # type: ignore
|
||||
# check if tool call content block
|
||||
is_empty = self.check_empty_tool_call_args()
|
||||
|
||||
if is_empty:
|
||||
tool_use = {
|
||||
"id": None,
|
||||
|
|
|
@ -0,0 +1,179 @@
|
|||
"""
|
||||
- call /messages on Anthropic API
|
||||
- Make streaming + non-streaming request - just pass it through direct to Anthropic. No need to do anything special here
|
||||
- Ensure requests are logged in the DB - stream + non-stream
|
||||
|
||||
"""
|
||||
|
||||
import json
|
||||
from typing import Any, AsyncIterator, Dict, Optional, Union, cast
|
||||
|
||||
import httpx
|
||||
|
||||
import litellm
|
||||
from litellm.litellm_core_utils.litellm_logging import Logging as LiteLLMLoggingObj
|
||||
from litellm.llms.base_llm.anthropic_messages.transformation import (
|
||||
BaseAnthropicMessagesConfig,
|
||||
)
|
||||
from litellm.llms.custom_httpx.http_handler import (
|
||||
AsyncHTTPHandler,
|
||||
get_async_httpx_client,
|
||||
)
|
||||
from litellm.types.router import GenericLiteLLMParams
|
||||
from litellm.types.utils import ProviderSpecificHeader
|
||||
from litellm.utils import ProviderConfigManager, client
|
||||
|
||||
|
||||
class AnthropicMessagesHandler:
|
||||
|
||||
@staticmethod
|
||||
async def _handle_anthropic_streaming(
|
||||
response: httpx.Response,
|
||||
request_body: dict,
|
||||
litellm_logging_obj: LiteLLMLoggingObj,
|
||||
) -> AsyncIterator:
|
||||
"""Helper function to handle Anthropic streaming responses using the existing logging handlers"""
|
||||
from datetime import datetime
|
||||
|
||||
from litellm.proxy.pass_through_endpoints.streaming_handler import (
|
||||
PassThroughStreamingHandler,
|
||||
)
|
||||
from litellm.proxy.pass_through_endpoints.success_handler import (
|
||||
PassThroughEndpointLogging,
|
||||
)
|
||||
from litellm.proxy.pass_through_endpoints.types import EndpointType
|
||||
|
||||
# Create success handler object
|
||||
passthrough_success_handler_obj = PassThroughEndpointLogging()
|
||||
|
||||
# Use the existing streaming handler for Anthropic
|
||||
start_time = datetime.now()
|
||||
return PassThroughStreamingHandler.chunk_processor(
|
||||
response=response,
|
||||
request_body=request_body,
|
||||
litellm_logging_obj=litellm_logging_obj,
|
||||
endpoint_type=EndpointType.ANTHROPIC,
|
||||
start_time=start_time,
|
||||
passthrough_success_handler_obj=passthrough_success_handler_obj,
|
||||
url_route="/v1/messages",
|
||||
)
|
||||
|
||||
|
||||
@client
|
||||
async def anthropic_messages(
|
||||
api_key: str,
|
||||
model: str,
|
||||
stream: bool = False,
|
||||
api_base: Optional[str] = None,
|
||||
client: Optional[AsyncHTTPHandler] = None,
|
||||
custom_llm_provider: Optional[str] = None,
|
||||
**kwargs,
|
||||
) -> Union[Dict[str, Any], AsyncIterator]:
|
||||
"""
|
||||
Makes Anthropic `/v1/messages` API calls In the Anthropic API Spec
|
||||
"""
|
||||
# Use provided client or create a new one
|
||||
optional_params = GenericLiteLLMParams(**kwargs)
|
||||
model, _custom_llm_provider, dynamic_api_key, dynamic_api_base = (
|
||||
litellm.get_llm_provider(
|
||||
model=model,
|
||||
custom_llm_provider=custom_llm_provider,
|
||||
api_base=optional_params.api_base,
|
||||
api_key=optional_params.api_key,
|
||||
)
|
||||
)
|
||||
anthropic_messages_provider_config: Optional[BaseAnthropicMessagesConfig] = (
|
||||
ProviderConfigManager.get_provider_anthropic_messages_config(
|
||||
model=model,
|
||||
provider=litellm.LlmProviders(_custom_llm_provider),
|
||||
)
|
||||
)
|
||||
if anthropic_messages_provider_config is None:
|
||||
raise ValueError(
|
||||
f"Anthropic messages provider config not found for model: {model}"
|
||||
)
|
||||
if client is None or not isinstance(client, AsyncHTTPHandler):
|
||||
async_httpx_client = get_async_httpx_client(
|
||||
llm_provider=litellm.LlmProviders.ANTHROPIC
|
||||
)
|
||||
else:
|
||||
async_httpx_client = client
|
||||
|
||||
litellm_logging_obj: LiteLLMLoggingObj = kwargs.get("litellm_logging_obj", None)
|
||||
|
||||
# Prepare headers
|
||||
provider_specific_header = cast(
|
||||
Optional[ProviderSpecificHeader], kwargs.get("provider_specific_header", None)
|
||||
)
|
||||
extra_headers = (
|
||||
provider_specific_header.get("extra_headers", {})
|
||||
if provider_specific_header
|
||||
else {}
|
||||
)
|
||||
headers = anthropic_messages_provider_config.validate_environment(
|
||||
headers=extra_headers or {},
|
||||
model=model,
|
||||
api_key=api_key,
|
||||
)
|
||||
|
||||
litellm_logging_obj.update_environment_variables(
|
||||
model=model,
|
||||
optional_params=dict(optional_params),
|
||||
litellm_params={
|
||||
"metadata": kwargs.get("metadata", {}),
|
||||
"preset_cache_key": None,
|
||||
"stream_response": {},
|
||||
**optional_params.model_dump(exclude_unset=True),
|
||||
},
|
||||
custom_llm_provider=_custom_llm_provider,
|
||||
)
|
||||
litellm_logging_obj.model_call_details.update(kwargs)
|
||||
|
||||
# Prepare request body
|
||||
request_body = kwargs.copy()
|
||||
request_body = {
|
||||
k: v
|
||||
for k, v in request_body.items()
|
||||
if k
|
||||
in anthropic_messages_provider_config.get_supported_anthropic_messages_params(
|
||||
model=model
|
||||
)
|
||||
}
|
||||
request_body["stream"] = stream
|
||||
request_body["model"] = model
|
||||
litellm_logging_obj.stream = stream
|
||||
|
||||
# Make the request
|
||||
request_url = anthropic_messages_provider_config.get_complete_url(
|
||||
api_base=api_base, model=model
|
||||
)
|
||||
|
||||
litellm_logging_obj.pre_call(
|
||||
input=[{"role": "user", "content": json.dumps(request_body)}],
|
||||
api_key="",
|
||||
additional_args={
|
||||
"complete_input_dict": request_body,
|
||||
"api_base": str(request_url),
|
||||
"headers": headers,
|
||||
},
|
||||
)
|
||||
|
||||
response = await async_httpx_client.post(
|
||||
url=request_url,
|
||||
headers=headers,
|
||||
data=json.dumps(request_body),
|
||||
stream=stream,
|
||||
)
|
||||
response.raise_for_status()
|
||||
|
||||
# used for logging + cost tracking
|
||||
litellm_logging_obj.model_call_details["httpx_response"] = response
|
||||
|
||||
if stream:
|
||||
return await AnthropicMessagesHandler._handle_anthropic_streaming(
|
||||
response=response,
|
||||
request_body=request_body,
|
||||
litellm_logging_obj=litellm_logging_obj,
|
||||
)
|
||||
else:
|
||||
return response.json()
|
|
@ -0,0 +1,47 @@
|
|||
from typing import Optional
|
||||
|
||||
from litellm.llms.base_llm.anthropic_messages.transformation import (
|
||||
BaseAnthropicMessagesConfig,
|
||||
)
|
||||
|
||||
DEFAULT_ANTHROPIC_API_BASE = "https://api.anthropic.com"
|
||||
DEFAULT_ANTHROPIC_API_VERSION = "2023-06-01"
|
||||
|
||||
|
||||
class AnthropicMessagesConfig(BaseAnthropicMessagesConfig):
|
||||
def get_supported_anthropic_messages_params(self, model: str) -> list:
|
||||
return [
|
||||
"messages",
|
||||
"model",
|
||||
"system",
|
||||
"max_tokens",
|
||||
"stop_sequences",
|
||||
"temperature",
|
||||
"top_p",
|
||||
"top_k",
|
||||
"tools",
|
||||
"tool_choice",
|
||||
"thinking",
|
||||
# TODO: Add Anthropic `metadata` support
|
||||
# "metadata",
|
||||
]
|
||||
|
||||
def get_complete_url(self, api_base: Optional[str], model: str) -> str:
|
||||
api_base = api_base or DEFAULT_ANTHROPIC_API_BASE
|
||||
if not api_base.endswith("/v1/messages"):
|
||||
api_base = f"{api_base}/v1/messages"
|
||||
return api_base
|
||||
|
||||
def validate_environment(
|
||||
self,
|
||||
headers: dict,
|
||||
model: str,
|
||||
api_key: Optional[str] = None,
|
||||
) -> dict:
|
||||
if "x-api-key" not in headers:
|
||||
headers["x-api-key"] = api_key
|
||||
if "anthropic-version" not in headers:
|
||||
headers["anthropic-version"] = DEFAULT_ANTHROPIC_API_VERSION
|
||||
if "content-type" not in headers:
|
||||
headers["content-type"] = "application/json"
|
||||
return headers
|
|
@ -1,412 +0,0 @@
|
|||
import json
|
||||
from typing import List, Literal, Optional, Tuple, Union
|
||||
|
||||
from openai.types.chat.chat_completion_chunk import Choice as OpenAIStreamingChoice
|
||||
|
||||
from litellm.types.llms.anthropic import (
|
||||
AllAnthropicToolsValues,
|
||||
AnthopicMessagesAssistantMessageParam,
|
||||
AnthropicFinishReason,
|
||||
AnthropicMessagesRequest,
|
||||
AnthropicMessagesToolChoice,
|
||||
AnthropicMessagesUserMessageParam,
|
||||
AnthropicResponse,
|
||||
AnthropicResponseContentBlockText,
|
||||
AnthropicResponseContentBlockToolUse,
|
||||
AnthropicResponseUsageBlock,
|
||||
ContentBlockDelta,
|
||||
ContentJsonBlockDelta,
|
||||
ContentTextBlockDelta,
|
||||
MessageBlockDelta,
|
||||
MessageDelta,
|
||||
UsageDelta,
|
||||
)
|
||||
from litellm.types.llms.openai import (
|
||||
AllMessageValues,
|
||||
ChatCompletionAssistantMessage,
|
||||
ChatCompletionAssistantToolCall,
|
||||
ChatCompletionImageObject,
|
||||
ChatCompletionImageUrlObject,
|
||||
ChatCompletionRequest,
|
||||
ChatCompletionSystemMessage,
|
||||
ChatCompletionTextObject,
|
||||
ChatCompletionToolCallFunctionChunk,
|
||||
ChatCompletionToolChoiceFunctionParam,
|
||||
ChatCompletionToolChoiceObjectParam,
|
||||
ChatCompletionToolChoiceValues,
|
||||
ChatCompletionToolMessage,
|
||||
ChatCompletionToolParam,
|
||||
ChatCompletionToolParamFunctionChunk,
|
||||
ChatCompletionUserMessage,
|
||||
)
|
||||
from litellm.types.utils import Choices, ModelResponse, Usage
|
||||
|
||||
|
||||
class AnthropicExperimentalPassThroughConfig:
|
||||
def __init__(self):
|
||||
pass
|
||||
|
||||
### FOR [BETA] `/v1/messages` endpoint support
|
||||
|
||||
def translatable_anthropic_params(self) -> List:
|
||||
"""
|
||||
Which anthropic params, we need to translate to the openai format.
|
||||
"""
|
||||
return ["messages", "metadata", "system", "tool_choice", "tools"]
|
||||
|
||||
def translate_anthropic_messages_to_openai( # noqa: PLR0915
|
||||
self,
|
||||
messages: List[
|
||||
Union[
|
||||
AnthropicMessagesUserMessageParam,
|
||||
AnthopicMessagesAssistantMessageParam,
|
||||
]
|
||||
],
|
||||
) -> List:
|
||||
new_messages: List[AllMessageValues] = []
|
||||
for m in messages:
|
||||
user_message: Optional[ChatCompletionUserMessage] = None
|
||||
tool_message_list: List[ChatCompletionToolMessage] = []
|
||||
new_user_content_list: List[
|
||||
Union[ChatCompletionTextObject, ChatCompletionImageObject]
|
||||
] = []
|
||||
## USER MESSAGE ##
|
||||
if m["role"] == "user":
|
||||
## translate user message
|
||||
message_content = m.get("content")
|
||||
if message_content and isinstance(message_content, str):
|
||||
user_message = ChatCompletionUserMessage(
|
||||
role="user", content=message_content
|
||||
)
|
||||
elif message_content and isinstance(message_content, list):
|
||||
for content in message_content:
|
||||
if content["type"] == "text":
|
||||
text_obj = ChatCompletionTextObject(
|
||||
type="text", text=content["text"]
|
||||
)
|
||||
new_user_content_list.append(text_obj)
|
||||
elif content["type"] == "image":
|
||||
image_url = ChatCompletionImageUrlObject(
|
||||
url=f"data:{content['type']};base64,{content['source']}"
|
||||
)
|
||||
image_obj = ChatCompletionImageObject(
|
||||
type="image_url", image_url=image_url
|
||||
)
|
||||
|
||||
new_user_content_list.append(image_obj)
|
||||
elif content["type"] == "tool_result":
|
||||
if "content" not in content:
|
||||
tool_result = ChatCompletionToolMessage(
|
||||
role="tool",
|
||||
tool_call_id=content["tool_use_id"],
|
||||
content="",
|
||||
)
|
||||
tool_message_list.append(tool_result)
|
||||
elif isinstance(content["content"], str):
|
||||
tool_result = ChatCompletionToolMessage(
|
||||
role="tool",
|
||||
tool_call_id=content["tool_use_id"],
|
||||
content=content["content"],
|
||||
)
|
||||
tool_message_list.append(tool_result)
|
||||
elif isinstance(content["content"], list):
|
||||
for c in content["content"]:
|
||||
if c["type"] == "text":
|
||||
tool_result = ChatCompletionToolMessage(
|
||||
role="tool",
|
||||
tool_call_id=content["tool_use_id"],
|
||||
content=c["text"],
|
||||
)
|
||||
tool_message_list.append(tool_result)
|
||||
elif c["type"] == "image":
|
||||
image_str = (
|
||||
f"data:{c['type']};base64,{c['source']}"
|
||||
)
|
||||
tool_result = ChatCompletionToolMessage(
|
||||
role="tool",
|
||||
tool_call_id=content["tool_use_id"],
|
||||
content=image_str,
|
||||
)
|
||||
tool_message_list.append(tool_result)
|
||||
|
||||
if user_message is not None:
|
||||
new_messages.append(user_message)
|
||||
|
||||
if len(new_user_content_list) > 0:
|
||||
new_messages.append({"role": "user", "content": new_user_content_list}) # type: ignore
|
||||
|
||||
if len(tool_message_list) > 0:
|
||||
new_messages.extend(tool_message_list)
|
||||
|
||||
## ASSISTANT MESSAGE ##
|
||||
assistant_message_str: Optional[str] = None
|
||||
tool_calls: List[ChatCompletionAssistantToolCall] = []
|
||||
if m["role"] == "assistant":
|
||||
if isinstance(m["content"], str):
|
||||
assistant_message_str = m["content"]
|
||||
elif isinstance(m["content"], list):
|
||||
for content in m["content"]:
|
||||
if content["type"] == "text":
|
||||
if assistant_message_str is None:
|
||||
assistant_message_str = content["text"]
|
||||
else:
|
||||
assistant_message_str += content["text"]
|
||||
elif content["type"] == "tool_use":
|
||||
function_chunk = ChatCompletionToolCallFunctionChunk(
|
||||
name=content["name"],
|
||||
arguments=json.dumps(content["input"]),
|
||||
)
|
||||
|
||||
tool_calls.append(
|
||||
ChatCompletionAssistantToolCall(
|
||||
id=content["id"],
|
||||
type="function",
|
||||
function=function_chunk,
|
||||
)
|
||||
)
|
||||
|
||||
if assistant_message_str is not None or len(tool_calls) > 0:
|
||||
assistant_message = ChatCompletionAssistantMessage(
|
||||
role="assistant",
|
||||
content=assistant_message_str,
|
||||
)
|
||||
if len(tool_calls) > 0:
|
||||
assistant_message["tool_calls"] = tool_calls
|
||||
new_messages.append(assistant_message)
|
||||
|
||||
return new_messages
|
||||
|
||||
def translate_anthropic_tool_choice_to_openai(
|
||||
self, tool_choice: AnthropicMessagesToolChoice
|
||||
) -> ChatCompletionToolChoiceValues:
|
||||
if tool_choice["type"] == "any":
|
||||
return "required"
|
||||
elif tool_choice["type"] == "auto":
|
||||
return "auto"
|
||||
elif tool_choice["type"] == "tool":
|
||||
tc_function_param = ChatCompletionToolChoiceFunctionParam(
|
||||
name=tool_choice.get("name", "")
|
||||
)
|
||||
return ChatCompletionToolChoiceObjectParam(
|
||||
type="function", function=tc_function_param
|
||||
)
|
||||
else:
|
||||
raise ValueError(
|
||||
"Incompatible tool choice param submitted - {}".format(tool_choice)
|
||||
)
|
||||
|
||||
def translate_anthropic_tools_to_openai(
|
||||
self, tools: List[AllAnthropicToolsValues]
|
||||
) -> List[ChatCompletionToolParam]:
|
||||
new_tools: List[ChatCompletionToolParam] = []
|
||||
mapped_tool_params = ["name", "input_schema", "description"]
|
||||
for tool in tools:
|
||||
function_chunk = ChatCompletionToolParamFunctionChunk(
|
||||
name=tool["name"],
|
||||
)
|
||||
if "input_schema" in tool:
|
||||
function_chunk["parameters"] = tool["input_schema"] # type: ignore
|
||||
if "description" in tool:
|
||||
function_chunk["description"] = tool["description"] # type: ignore
|
||||
|
||||
for k, v in tool.items():
|
||||
if k not in mapped_tool_params: # pass additional computer kwargs
|
||||
function_chunk.setdefault("parameters", {}).update({k: v})
|
||||
new_tools.append(
|
||||
ChatCompletionToolParam(type="function", function=function_chunk)
|
||||
)
|
||||
|
||||
return new_tools
|
||||
|
||||
def translate_anthropic_to_openai(
|
||||
self, anthropic_message_request: AnthropicMessagesRequest
|
||||
) -> ChatCompletionRequest:
|
||||
"""
|
||||
This is used by the beta Anthropic Adapter, for translating anthropic `/v1/messages` requests to the openai format.
|
||||
"""
|
||||
new_messages: List[AllMessageValues] = []
|
||||
|
||||
## CONVERT ANTHROPIC MESSAGES TO OPENAI
|
||||
new_messages = self.translate_anthropic_messages_to_openai(
|
||||
messages=anthropic_message_request["messages"]
|
||||
)
|
||||
## ADD SYSTEM MESSAGE TO MESSAGES
|
||||
if "system" in anthropic_message_request:
|
||||
new_messages.insert(
|
||||
0,
|
||||
ChatCompletionSystemMessage(
|
||||
role="system", content=anthropic_message_request["system"]
|
||||
),
|
||||
)
|
||||
|
||||
new_kwargs: ChatCompletionRequest = {
|
||||
"model": anthropic_message_request["model"],
|
||||
"messages": new_messages,
|
||||
}
|
||||
## CONVERT METADATA (user_id)
|
||||
if "metadata" in anthropic_message_request:
|
||||
if "user_id" in anthropic_message_request["metadata"]:
|
||||
new_kwargs["user"] = anthropic_message_request["metadata"]["user_id"]
|
||||
|
||||
# Pass litellm proxy specific metadata
|
||||
if "litellm_metadata" in anthropic_message_request:
|
||||
# metadata will be passed to litellm.acompletion(), it's a litellm_param
|
||||
new_kwargs["metadata"] = anthropic_message_request.pop("litellm_metadata")
|
||||
|
||||
## CONVERT TOOL CHOICE
|
||||
if "tool_choice" in anthropic_message_request:
|
||||
new_kwargs["tool_choice"] = self.translate_anthropic_tool_choice_to_openai(
|
||||
tool_choice=anthropic_message_request["tool_choice"]
|
||||
)
|
||||
## CONVERT TOOLS
|
||||
if "tools" in anthropic_message_request:
|
||||
new_kwargs["tools"] = self.translate_anthropic_tools_to_openai(
|
||||
tools=anthropic_message_request["tools"]
|
||||
)
|
||||
|
||||
translatable_params = self.translatable_anthropic_params()
|
||||
for k, v in anthropic_message_request.items():
|
||||
if k not in translatable_params: # pass remaining params as is
|
||||
new_kwargs[k] = v # type: ignore
|
||||
|
||||
return new_kwargs
|
||||
|
||||
def _translate_openai_content_to_anthropic(
|
||||
self, choices: List[Choices]
|
||||
) -> List[
|
||||
Union[AnthropicResponseContentBlockText, AnthropicResponseContentBlockToolUse]
|
||||
]:
|
||||
new_content: List[
|
||||
Union[
|
||||
AnthropicResponseContentBlockText, AnthropicResponseContentBlockToolUse
|
||||
]
|
||||
] = []
|
||||
for choice in choices:
|
||||
if (
|
||||
choice.message.tool_calls is not None
|
||||
and len(choice.message.tool_calls) > 0
|
||||
):
|
||||
for tool_call in choice.message.tool_calls:
|
||||
new_content.append(
|
||||
AnthropicResponseContentBlockToolUse(
|
||||
type="tool_use",
|
||||
id=tool_call.id,
|
||||
name=tool_call.function.name or "",
|
||||
input=json.loads(tool_call.function.arguments),
|
||||
)
|
||||
)
|
||||
elif choice.message.content is not None:
|
||||
new_content.append(
|
||||
AnthropicResponseContentBlockText(
|
||||
type="text", text=choice.message.content
|
||||
)
|
||||
)
|
||||
|
||||
return new_content
|
||||
|
||||
def _translate_openai_finish_reason_to_anthropic(
|
||||
self, openai_finish_reason: str
|
||||
) -> AnthropicFinishReason:
|
||||
if openai_finish_reason == "stop":
|
||||
return "end_turn"
|
||||
elif openai_finish_reason == "length":
|
||||
return "max_tokens"
|
||||
elif openai_finish_reason == "tool_calls":
|
||||
return "tool_use"
|
||||
return "end_turn"
|
||||
|
||||
def translate_openai_response_to_anthropic(
|
||||
self, response: ModelResponse
|
||||
) -> AnthropicResponse:
|
||||
## translate content block
|
||||
anthropic_content = self._translate_openai_content_to_anthropic(choices=response.choices) # type: ignore
|
||||
## extract finish reason
|
||||
anthropic_finish_reason = self._translate_openai_finish_reason_to_anthropic(
|
||||
openai_finish_reason=response.choices[0].finish_reason # type: ignore
|
||||
)
|
||||
# extract usage
|
||||
usage: Usage = getattr(response, "usage")
|
||||
anthropic_usage = AnthropicResponseUsageBlock(
|
||||
input_tokens=usage.prompt_tokens or 0,
|
||||
output_tokens=usage.completion_tokens or 0,
|
||||
)
|
||||
translated_obj = AnthropicResponse(
|
||||
id=response.id,
|
||||
type="message",
|
||||
role="assistant",
|
||||
model=response.model or "unknown-model",
|
||||
stop_sequence=None,
|
||||
usage=anthropic_usage,
|
||||
content=anthropic_content,
|
||||
stop_reason=anthropic_finish_reason,
|
||||
)
|
||||
|
||||
return translated_obj
|
||||
|
||||
def _translate_streaming_openai_chunk_to_anthropic(
|
||||
self, choices: List[OpenAIStreamingChoice]
|
||||
) -> Tuple[
|
||||
Literal["text_delta", "input_json_delta"],
|
||||
Union[ContentTextBlockDelta, ContentJsonBlockDelta],
|
||||
]:
|
||||
text: str = ""
|
||||
partial_json: Optional[str] = None
|
||||
for choice in choices:
|
||||
if choice.delta.content is not None:
|
||||
text += choice.delta.content
|
||||
elif choice.delta.tool_calls is not None:
|
||||
partial_json = ""
|
||||
for tool in choice.delta.tool_calls:
|
||||
if (
|
||||
tool.function is not None
|
||||
and tool.function.arguments is not None
|
||||
):
|
||||
partial_json += tool.function.arguments
|
||||
|
||||
if partial_json is not None:
|
||||
return "input_json_delta", ContentJsonBlockDelta(
|
||||
type="input_json_delta", partial_json=partial_json
|
||||
)
|
||||
else:
|
||||
return "text_delta", ContentTextBlockDelta(type="text_delta", text=text)
|
||||
|
||||
def translate_streaming_openai_response_to_anthropic(
|
||||
self, response: ModelResponse
|
||||
) -> Union[ContentBlockDelta, MessageBlockDelta]:
|
||||
## base case - final chunk w/ finish reason
|
||||
if response.choices[0].finish_reason is not None:
|
||||
delta = MessageDelta(
|
||||
stop_reason=self._translate_openai_finish_reason_to_anthropic(
|
||||
response.choices[0].finish_reason
|
||||
),
|
||||
)
|
||||
if getattr(response, "usage", None) is not None:
|
||||
litellm_usage_chunk: Optional[Usage] = response.usage # type: ignore
|
||||
elif (
|
||||
hasattr(response, "_hidden_params")
|
||||
and "usage" in response._hidden_params
|
||||
):
|
||||
litellm_usage_chunk = response._hidden_params["usage"]
|
||||
else:
|
||||
litellm_usage_chunk = None
|
||||
if litellm_usage_chunk is not None:
|
||||
usage_delta = UsageDelta(
|
||||
input_tokens=litellm_usage_chunk.prompt_tokens or 0,
|
||||
output_tokens=litellm_usage_chunk.completion_tokens or 0,
|
||||
)
|
||||
else:
|
||||
usage_delta = UsageDelta(input_tokens=0, output_tokens=0)
|
||||
return MessageBlockDelta(
|
||||
type="message_delta", delta=delta, usage=usage_delta
|
||||
)
|
||||
(
|
||||
type_of_content,
|
||||
content_block_delta,
|
||||
) = self._translate_streaming_openai_chunk_to_anthropic(
|
||||
choices=response.choices # type: ignore
|
||||
)
|
||||
return ContentBlockDelta(
|
||||
type="content_block_delta",
|
||||
index=response.choices[0].index,
|
||||
delta=content_block_delta,
|
||||
)
|
|
@ -2,7 +2,7 @@
|
|||
Azure Batches API Handler
|
||||
"""
|
||||
|
||||
from typing import Any, Coroutine, Optional, Union
|
||||
from typing import Any, Coroutine, Optional, Union, cast
|
||||
|
||||
import httpx
|
||||
|
||||
|
@ -14,6 +14,7 @@ from litellm.types.llms.openai import (
|
|||
CreateBatchRequest,
|
||||
RetrieveBatchRequest,
|
||||
)
|
||||
from litellm.types.utils import LiteLLMBatch
|
||||
|
||||
|
||||
class AzureBatchesAPI:
|
||||
|
@ -64,9 +65,9 @@ class AzureBatchesAPI:
|
|||
self,
|
||||
create_batch_data: CreateBatchRequest,
|
||||
azure_client: AsyncAzureOpenAI,
|
||||
) -> Batch:
|
||||
) -> LiteLLMBatch:
|
||||
response = await azure_client.batches.create(**create_batch_data)
|
||||
return response
|
||||
return LiteLLMBatch(**response.model_dump())
|
||||
|
||||
def create_batch(
|
||||
self,
|
||||
|
@ -78,7 +79,7 @@ class AzureBatchesAPI:
|
|||
timeout: Union[float, httpx.Timeout],
|
||||
max_retries: Optional[int],
|
||||
client: Optional[Union[AzureOpenAI, AsyncAzureOpenAI]] = None,
|
||||
) -> Union[Batch, Coroutine[Any, Any, Batch]]:
|
||||
) -> Union[LiteLLMBatch, Coroutine[Any, Any, LiteLLMBatch]]:
|
||||
azure_client: Optional[Union[AzureOpenAI, AsyncAzureOpenAI]] = (
|
||||
self.get_azure_openai_client(
|
||||
api_key=api_key,
|
||||
|
@ -103,16 +104,16 @@ class AzureBatchesAPI:
|
|||
return self.acreate_batch( # type: ignore
|
||||
create_batch_data=create_batch_data, azure_client=azure_client
|
||||
)
|
||||
response = azure_client.batches.create(**create_batch_data)
|
||||
return response
|
||||
response = cast(AzureOpenAI, azure_client).batches.create(**create_batch_data)
|
||||
return LiteLLMBatch(**response.model_dump())
|
||||
|
||||
async def aretrieve_batch(
|
||||
self,
|
||||
retrieve_batch_data: RetrieveBatchRequest,
|
||||
client: AsyncAzureOpenAI,
|
||||
) -> Batch:
|
||||
) -> LiteLLMBatch:
|
||||
response = await client.batches.retrieve(**retrieve_batch_data)
|
||||
return response
|
||||
return LiteLLMBatch(**response.model_dump())
|
||||
|
||||
def retrieve_batch(
|
||||
self,
|
||||
|
@ -149,8 +150,10 @@ class AzureBatchesAPI:
|
|||
return self.aretrieve_batch( # type: ignore
|
||||
retrieve_batch_data=retrieve_batch_data, client=azure_client
|
||||
)
|
||||
response = azure_client.batches.retrieve(**retrieve_batch_data)
|
||||
return response
|
||||
response = cast(AzureOpenAI, azure_client).batches.retrieve(
|
||||
**retrieve_batch_data
|
||||
)
|
||||
return LiteLLMBatch(**response.model_dump())
|
||||
|
||||
async def acancel_batch(
|
||||
self,
|
||||
|
|
35
litellm/llms/base_llm/anthropic_messages/transformation.py
Normal file
|
@ -0,0 +1,35 @@
|
|||
from abc import ABC, abstractmethod
|
||||
from typing import TYPE_CHECKING, Any, Optional
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from litellm.litellm_core_utils.litellm_logging import Logging as _LiteLLMLoggingObj
|
||||
|
||||
LiteLLMLoggingObj = _LiteLLMLoggingObj
|
||||
else:
|
||||
LiteLLMLoggingObj = Any
|
||||
|
||||
|
||||
class BaseAnthropicMessagesConfig(ABC):
|
||||
@abstractmethod
|
||||
def validate_environment(
|
||||
self,
|
||||
headers: dict,
|
||||
model: str,
|
||||
api_key: Optional[str] = None,
|
||||
) -> dict:
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def get_complete_url(self, api_base: Optional[str], model: str) -> str:
|
||||
"""
|
||||
OPTIONAL
|
||||
|
||||
Get the complete url for the request
|
||||
|
||||
Some providers need `model` in `api_base`
|
||||
"""
|
||||
return api_base or ""
|
||||
|
||||
@abstractmethod
|
||||
def get_supported_anthropic_messages_params(self, model: str) -> list:
|
||||
pass
|
|
@ -554,6 +554,7 @@ class BaseAWSLLM:
|
|||
aws_access_key_id = optional_params.pop("aws_access_key_id", None)
|
||||
aws_session_token = optional_params.pop("aws_session_token", None)
|
||||
aws_region_name = self._get_aws_region_name(optional_params, model)
|
||||
optional_params.pop("aws_region_name", None)
|
||||
aws_role_name = optional_params.pop("aws_role_name", None)
|
||||
aws_session_name = optional_params.pop("aws_session_name", None)
|
||||
aws_profile_name = optional_params.pop("aws_profile_name", None)
|
||||
|
|
|
@ -272,7 +272,7 @@ class AmazonConverseConfig(BaseConfig):
|
|||
optional_params["temperature"] = value
|
||||
if param == "top_p":
|
||||
optional_params["topP"] = value
|
||||
if param == "tools":
|
||||
if param == "tools" and isinstance(value, list):
|
||||
optional_params = self._add_tools_to_optional_params(
|
||||
optional_params=optional_params, tools=value
|
||||
)
|
||||
|
@ -598,7 +598,7 @@ class AmazonConverseConfig(BaseConfig):
|
|||
if _text is not None:
|
||||
_thinking_block["thinking"] = _text
|
||||
if _signature is not None:
|
||||
_thinking_block["signature_delta"] = _signature
|
||||
_thinking_block["signature"] = _signature
|
||||
thinking_blocks_list.append(_thinking_block)
|
||||
return thinking_blocks_list
|
||||
|
||||
|
|
|
@ -1260,6 +1260,9 @@ class AWSEventStreamDecoder:
|
|||
_thinking_block = ChatCompletionThinkingBlock(type="thinking")
|
||||
if "text" in thinking_block:
|
||||
_thinking_block["thinking"] = thinking_block["text"]
|
||||
elif "signature" in thinking_block:
|
||||
_thinking_block["signature"] = thinking_block["signature"]
|
||||
_thinking_block["thinking"] = "" # consistent with anthropic response
|
||||
thinking_blocks_list.append(_thinking_block)
|
||||
return thinking_blocks_list
|
||||
|
||||
|
@ -1322,6 +1325,12 @@ class AWSEventStreamDecoder:
|
|||
thinking_blocks = self.translate_thinking_blocks(
|
||||
delta_obj["reasoningContent"]
|
||||
)
|
||||
if (
|
||||
thinking_blocks
|
||||
and len(thinking_blocks) > 0
|
||||
and reasoning_content is None
|
||||
):
|
||||
reasoning_content = "" # set to non-empty string to ensure consistency with Anthropic
|
||||
elif (
|
||||
"contentBlockIndex" in chunk_data
|
||||
): # stop block, no 'start' or 'delta' object
|
||||
|
|
|
@ -10,6 +10,8 @@ import litellm
|
|||
from litellm._logging import verbose_logger
|
||||
from litellm.litellm_core_utils.litellm_logging import Logging as LitellmLogging
|
||||
from litellm.llms.custom_httpx.http_handler import (
|
||||
AsyncHTTPHandler,
|
||||
HTTPHandler,
|
||||
_get_httpx_client,
|
||||
get_async_httpx_client,
|
||||
)
|
||||
|
@ -51,6 +53,7 @@ class BedrockImageGeneration(BaseAWSLLM):
|
|||
aimg_generation: bool = False,
|
||||
api_base: Optional[str] = None,
|
||||
extra_headers: Optional[dict] = None,
|
||||
client: Optional[Union[HTTPHandler, AsyncHTTPHandler]] = None,
|
||||
):
|
||||
prepared_request = self._prepare_request(
|
||||
model=model,
|
||||
|
@ -69,8 +72,14 @@ class BedrockImageGeneration(BaseAWSLLM):
|
|||
logging_obj=logging_obj,
|
||||
prompt=prompt,
|
||||
model_response=model_response,
|
||||
client=(
|
||||
client
|
||||
if client is not None and isinstance(client, AsyncHTTPHandler)
|
||||
else None
|
||||
),
|
||||
)
|
||||
|
||||
if client is None or not isinstance(client, HTTPHandler):
|
||||
client = _get_httpx_client()
|
||||
try:
|
||||
response = client.post(url=prepared_request.endpoint_url, headers=prepared_request.prepped.headers, data=prepared_request.body) # type: ignore
|
||||
|
@ -99,13 +108,14 @@ class BedrockImageGeneration(BaseAWSLLM):
|
|||
logging_obj: LitellmLogging,
|
||||
prompt: str,
|
||||
model_response: ImageResponse,
|
||||
client: Optional[AsyncHTTPHandler] = None,
|
||||
) -> ImageResponse:
|
||||
"""
|
||||
Asynchronous handler for bedrock image generation
|
||||
|
||||
Awaits the response from the bedrock image generation endpoint
|
||||
"""
|
||||
async_client = get_async_httpx_client(
|
||||
async_client = client or get_async_httpx_client(
|
||||
llm_provider=litellm.LlmProviders.BEDROCK,
|
||||
params={"timeout": timeout},
|
||||
)
|
||||
|
|
|
@ -84,7 +84,9 @@ class CodestralTextCompletionConfig(OpenAITextCompletionConfig):
|
|||
finish_reason = None
|
||||
logprobs = None
|
||||
|
||||
chunk_data = chunk_data.replace("data:", "")
|
||||
chunk_data = (
|
||||
litellm.CustomStreamWrapper._strip_sse_data_from_chunk(chunk_data) or ""
|
||||
)
|
||||
chunk_data = chunk_data.strip()
|
||||
if len(chunk_data) == 0 or chunk_data == "[DONE]":
|
||||
return {
|
||||
|
|
|
@ -873,7 +873,7 @@ class BaseLLMHTTPHandler:
|
|||
elif isinstance(audio_file, bytes):
|
||||
# Assume it's already binary data
|
||||
binary_data = audio_file
|
||||
elif isinstance(audio_file, io.BufferedReader):
|
||||
elif isinstance(audio_file, io.BufferedReader) or isinstance(audio_file, io.BytesIO):
|
||||
# Handle file-like objects
|
||||
binary_data = audio_file.read()
|
||||
|
||||
|
|
|
@ -89,7 +89,7 @@ class ModelResponseIterator:
|
|||
raise RuntimeError(f"Error receiving chunk from stream: {e}")
|
||||
|
||||
try:
|
||||
chunk = chunk.replace("data:", "")
|
||||
chunk = litellm.CustomStreamWrapper._strip_sse_data_from_chunk(chunk) or ""
|
||||
chunk = chunk.strip()
|
||||
if len(chunk) > 0:
|
||||
json_chunk = json.loads(chunk)
|
||||
|
@ -134,7 +134,7 @@ class ModelResponseIterator:
|
|||
raise RuntimeError(f"Error receiving chunk from stream: {e}")
|
||||
|
||||
try:
|
||||
chunk = chunk.replace("data:", "")
|
||||
chunk = litellm.CustomStreamWrapper._strip_sse_data_from_chunk(chunk) or ""
|
||||
chunk = chunk.strip()
|
||||
if chunk == "[DONE]":
|
||||
raise StopAsyncIteration
|
||||
|
|
|
@ -114,12 +114,16 @@ class GoogleAIStudioGeminiConfig(VertexGeminiConfig):
|
|||
if element.get("type") == "image_url":
|
||||
img_element = element
|
||||
_image_url: Optional[str] = None
|
||||
format: Optional[str] = None
|
||||
if isinstance(img_element.get("image_url"), dict):
|
||||
_image_url = img_element["image_url"].get("url") # type: ignore
|
||||
format = img_element["image_url"].get("format") # type: ignore
|
||||
else:
|
||||
_image_url = img_element.get("image_url") # type: ignore
|
||||
if _image_url and "https://" in _image_url:
|
||||
image_obj = convert_to_anthropic_image_obj(_image_url)
|
||||
image_obj = convert_to_anthropic_image_obj(
|
||||
_image_url, format=format
|
||||
)
|
||||
img_element["image_url"] = ( # type: ignore
|
||||
convert_generic_image_chunk_to_openai_image_obj(
|
||||
image_obj
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
import json
|
||||
import time
|
||||
import uuid
|
||||
from typing import Any, List, Optional
|
||||
from typing import Any, List, Optional, Union
|
||||
|
||||
import aiohttp
|
||||
import httpx
|
||||
|
@ -9,7 +9,11 @@ from pydantic import BaseModel
|
|||
|
||||
import litellm
|
||||
from litellm import verbose_logger
|
||||
from litellm.llms.custom_httpx.http_handler import get_async_httpx_client
|
||||
from litellm.llms.custom_httpx.http_handler import (
|
||||
AsyncHTTPHandler,
|
||||
HTTPHandler,
|
||||
get_async_httpx_client,
|
||||
)
|
||||
from litellm.llms.openai.chat.gpt_transformation import OpenAIGPTConfig
|
||||
from litellm.types.llms.ollama import OllamaToolCall, OllamaToolCallFunction
|
||||
from litellm.types.llms.openai import ChatCompletionAssistantToolCall
|
||||
|
@ -205,6 +209,7 @@ def get_ollama_response( # noqa: PLR0915
|
|||
api_key: Optional[str] = None,
|
||||
acompletion: bool = False,
|
||||
encoding=None,
|
||||
client: Optional[Union[HTTPHandler, AsyncHTTPHandler]] = None,
|
||||
):
|
||||
if api_base.endswith("/api/chat"):
|
||||
url = api_base
|
||||
|
@ -301,7 +306,11 @@ def get_ollama_response( # noqa: PLR0915
|
|||
headers: Optional[dict] = None
|
||||
if api_key is not None:
|
||||
headers = {"Authorization": "Bearer {}".format(api_key)}
|
||||
response = litellm.module_level_client.post(
|
||||
|
||||
sync_client = litellm.module_level_client
|
||||
if client is not None and isinstance(client, HTTPHandler):
|
||||
sync_client = client
|
||||
response = sync_client.post(
|
||||
url=url,
|
||||
json=data,
|
||||
headers=headers,
|
||||
|
|
|
@ -20,7 +20,11 @@ from litellm.llms.base_llm.base_model_iterator import BaseModelResponseIterator
|
|||
from litellm.llms.base_llm.base_utils import BaseLLMModelInfo
|
||||
from litellm.llms.base_llm.chat.transformation import BaseConfig, BaseLLMException
|
||||
from litellm.secret_managers.main import get_secret_str
|
||||
from litellm.types.llms.openai import AllMessageValues, ChatCompletionImageObject
|
||||
from litellm.types.llms.openai import (
|
||||
AllMessageValues,
|
||||
ChatCompletionImageObject,
|
||||
ChatCompletionImageUrlObject,
|
||||
)
|
||||
from litellm.types.utils import ModelResponse, ModelResponseStream
|
||||
from litellm.utils import convert_to_model_response_object
|
||||
|
||||
|
@ -189,6 +193,16 @@ class OpenAIGPTConfig(BaseLLMModelInfo, BaseConfig):
|
|||
content_item["image_url"] = {
|
||||
"url": content_item["image_url"],
|
||||
}
|
||||
elif isinstance(content_item["image_url"], dict):
|
||||
litellm_specific_params = {"format"}
|
||||
new_image_url_obj = ChatCompletionImageUrlObject(
|
||||
**{ # type: ignore
|
||||
k: v
|
||||
for k, v in content_item["image_url"].items()
|
||||
if k not in litellm_specific_params
|
||||
}
|
||||
)
|
||||
content_item["image_url"] = new_image_url_obj
|
||||
return messages
|
||||
|
||||
def transform_request(
|
||||
|
|
|
@ -37,6 +37,7 @@ from litellm.llms.custom_httpx.http_handler import _DEFAULT_TTL_FOR_HTTPX_CLIENT
|
|||
from litellm.types.utils import (
|
||||
EmbeddingResponse,
|
||||
ImageResponse,
|
||||
LiteLLMBatch,
|
||||
ModelResponse,
|
||||
ModelResponseStream,
|
||||
)
|
||||
|
@ -1755,9 +1756,9 @@ class OpenAIBatchesAPI(BaseLLM):
|
|||
self,
|
||||
create_batch_data: CreateBatchRequest,
|
||||
openai_client: AsyncOpenAI,
|
||||
) -> Batch:
|
||||
) -> LiteLLMBatch:
|
||||
response = await openai_client.batches.create(**create_batch_data)
|
||||
return response
|
||||
return LiteLLMBatch(**response.model_dump())
|
||||
|
||||
def create_batch(
|
||||
self,
|
||||
|
@ -1769,7 +1770,7 @@ class OpenAIBatchesAPI(BaseLLM):
|
|||
max_retries: Optional[int],
|
||||
organization: Optional[str],
|
||||
client: Optional[Union[OpenAI, AsyncOpenAI]] = None,
|
||||
) -> Union[Batch, Coroutine[Any, Any, Batch]]:
|
||||
) -> Union[LiteLLMBatch, Coroutine[Any, Any, LiteLLMBatch]]:
|
||||
openai_client: Optional[Union[OpenAI, AsyncOpenAI]] = self.get_openai_client(
|
||||
api_key=api_key,
|
||||
api_base=api_base,
|
||||
|
@ -1792,17 +1793,18 @@ class OpenAIBatchesAPI(BaseLLM):
|
|||
return self.acreate_batch( # type: ignore
|
||||
create_batch_data=create_batch_data, openai_client=openai_client
|
||||
)
|
||||
response = openai_client.batches.create(**create_batch_data)
|
||||
return response
|
||||
response = cast(OpenAI, openai_client).batches.create(**create_batch_data)
|
||||
|
||||
return LiteLLMBatch(**response.model_dump())
|
||||
|
||||
async def aretrieve_batch(
|
||||
self,
|
||||
retrieve_batch_data: RetrieveBatchRequest,
|
||||
openai_client: AsyncOpenAI,
|
||||
) -> Batch:
|
||||
) -> LiteLLMBatch:
|
||||
verbose_logger.debug("retrieving batch, args= %s", retrieve_batch_data)
|
||||
response = await openai_client.batches.retrieve(**retrieve_batch_data)
|
||||
return response
|
||||
return LiteLLMBatch(**response.model_dump())
|
||||
|
||||
def retrieve_batch(
|
||||
self,
|
||||
|
@ -1837,8 +1839,8 @@ class OpenAIBatchesAPI(BaseLLM):
|
|||
return self.aretrieve_batch( # type: ignore
|
||||
retrieve_batch_data=retrieve_batch_data, openai_client=openai_client
|
||||
)
|
||||
response = openai_client.batches.retrieve(**retrieve_batch_data)
|
||||
return response
|
||||
response = cast(OpenAI, openai_client).batches.retrieve(**retrieve_batch_data)
|
||||
return LiteLLMBatch(**response.model_dump())
|
||||
|
||||
async def acancel_batch(
|
||||
self,
|
||||
|
|
|
@ -6,7 +6,16 @@ Calls done in OpenAI/openai.py as OpenRouter is openai-compatible.
|
|||
Docs: https://openrouter.ai/docs/parameters
|
||||
"""
|
||||
|
||||
from typing import Any, AsyncIterator, Iterator, Optional, Union
|
||||
|
||||
import httpx
|
||||
|
||||
from litellm.llms.base_llm.base_model_iterator import BaseModelResponseIterator
|
||||
from litellm.llms.base_llm.chat.transformation import BaseLLMException
|
||||
from litellm.types.utils import ModelResponse, ModelResponseStream
|
||||
|
||||
from ...openai.chat.gpt_transformation import OpenAIGPTConfig
|
||||
from ..common_utils import OpenRouterException
|
||||
|
||||
|
||||
class OpenrouterConfig(OpenAIGPTConfig):
|
||||
|
@ -37,3 +46,43 @@ class OpenrouterConfig(OpenAIGPTConfig):
|
|||
extra_body # openai client supports `extra_body` param
|
||||
)
|
||||
return mapped_openai_params
|
||||
|
||||
def get_error_class(
|
||||
self, error_message: str, status_code: int, headers: Union[dict, httpx.Headers]
|
||||
) -> BaseLLMException:
|
||||
return OpenRouterException(
|
||||
message=error_message,
|
||||
status_code=status_code,
|
||||
headers=headers,
|
||||
)
|
||||
|
||||
def get_model_response_iterator(
|
||||
self,
|
||||
streaming_response: Union[Iterator[str], AsyncIterator[str], ModelResponse],
|
||||
sync_stream: bool,
|
||||
json_mode: Optional[bool] = False,
|
||||
) -> Any:
|
||||
return OpenRouterChatCompletionStreamingHandler(
|
||||
streaming_response=streaming_response,
|
||||
sync_stream=sync_stream,
|
||||
json_mode=json_mode,
|
||||
)
|
||||
|
||||
|
||||
class OpenRouterChatCompletionStreamingHandler(BaseModelResponseIterator):
|
||||
|
||||
def chunk_parser(self, chunk: dict) -> ModelResponseStream:
|
||||
try:
|
||||
new_choices = []
|
||||
for choice in chunk["choices"]:
|
||||
choice["delta"]["reasoning_content"] = choice["delta"].get("reasoning")
|
||||
new_choices.append(choice)
|
||||
return ModelResponseStream(
|
||||
id=chunk["id"],
|
||||
object="chat.completion.chunk",
|
||||
created=chunk["created"],
|
||||
model=chunk["model"],
|
||||
choices=new_choices,
|
||||
)
|
||||
except Exception as e:
|
||||
raise e
|
||||
|
|
5
litellm/llms/openrouter/common_utils.py
Normal file
|
@ -0,0 +1,5 @@
|
|||
from litellm.llms.base_llm.chat.transformation import BaseLLMException
|
||||
|
||||
|
||||
class OpenRouterException(BaseLLMException):
|
||||
pass
|
|
@ -3,6 +3,7 @@ from typing import AsyncIterator, Iterator, List, Optional, Union
|
|||
|
||||
import httpx
|
||||
|
||||
import litellm
|
||||
from litellm import verbose_logger
|
||||
from litellm.llms.base_llm.chat.transformation import BaseLLMException
|
||||
from litellm.types.utils import GenericStreamingChunk as GChunk
|
||||
|
@ -78,7 +79,11 @@ class AWSEventStreamDecoder:
|
|||
message = self._parse_message_from_event(event)
|
||||
if message:
|
||||
# remove data: prefix and "\n\n" at the end
|
||||
message = message.replace("data:", "").replace("\n\n", "")
|
||||
message = (
|
||||
litellm.CustomStreamWrapper._strip_sse_data_from_chunk(message)
|
||||
or ""
|
||||
)
|
||||
message = message.replace("\n\n", "")
|
||||
|
||||
# Accumulate JSON data
|
||||
accumulated_json += message
|
||||
|
@ -127,7 +132,11 @@ class AWSEventStreamDecoder:
|
|||
if message:
|
||||
verbose_logger.debug("sagemaker parsed chunk bytes %s", message)
|
||||
# remove data: prefix and "\n\n" at the end
|
||||
message = message.replace("data:", "").replace("\n\n", "")
|
||||
message = (
|
||||
litellm.CustomStreamWrapper._strip_sse_data_from_chunk(message)
|
||||
or ""
|
||||
)
|
||||
message = message.replace("\n\n", "")
|
||||
|
||||
# Accumulate JSON data
|
||||
accumulated_json += message
|
||||
|
|
|
@ -9,11 +9,12 @@ from litellm.llms.custom_httpx.http_handler import (
|
|||
get_async_httpx_client,
|
||||
)
|
||||
from litellm.llms.vertex_ai.gemini.vertex_and_google_ai_studio_gemini import VertexLLM
|
||||
from litellm.types.llms.openai import Batch, CreateBatchRequest
|
||||
from litellm.types.llms.openai import CreateBatchRequest
|
||||
from litellm.types.llms.vertex_ai import (
|
||||
VERTEX_CREDENTIALS_TYPES,
|
||||
VertexAIBatchPredictionJob,
|
||||
)
|
||||
from litellm.types.utils import LiteLLMBatch
|
||||
|
||||
from .transformation import VertexAIBatchTransformation
|
||||
|
||||
|
@ -33,7 +34,7 @@ class VertexAIBatchPrediction(VertexLLM):
|
|||
vertex_location: Optional[str],
|
||||
timeout: Union[float, httpx.Timeout],
|
||||
max_retries: Optional[int],
|
||||
) -> Union[Batch, Coroutine[Any, Any, Batch]]:
|
||||
) -> Union[LiteLLMBatch, Coroutine[Any, Any, LiteLLMBatch]]:
|
||||
|
||||
sync_handler = _get_httpx_client()
|
||||
|
||||
|
@ -101,7 +102,7 @@ class VertexAIBatchPrediction(VertexLLM):
|
|||
vertex_batch_request: VertexAIBatchPredictionJob,
|
||||
api_base: str,
|
||||
headers: Dict[str, str],
|
||||
) -> Batch:
|
||||
) -> LiteLLMBatch:
|
||||
client = get_async_httpx_client(
|
||||
llm_provider=litellm.LlmProviders.VERTEX_AI,
|
||||
)
|
||||
|
@ -138,7 +139,7 @@ class VertexAIBatchPrediction(VertexLLM):
|
|||
vertex_location: Optional[str],
|
||||
timeout: Union[float, httpx.Timeout],
|
||||
max_retries: Optional[int],
|
||||
) -> Union[Batch, Coroutine[Any, Any, Batch]]:
|
||||
) -> Union[LiteLLMBatch, Coroutine[Any, Any, LiteLLMBatch]]:
|
||||
sync_handler = _get_httpx_client()
|
||||
|
||||
access_token, project_id = self._ensure_access_token(
|
||||
|
@ -199,7 +200,7 @@ class VertexAIBatchPrediction(VertexLLM):
|
|||
self,
|
||||
api_base: str,
|
||||
headers: Dict[str, str],
|
||||
) -> Batch:
|
||||
) -> LiteLLMBatch:
|
||||
client = get_async_httpx_client(
|
||||
llm_provider=litellm.LlmProviders.VERTEX_AI,
|
||||
)
|
||||
|
|
|
@ -4,8 +4,9 @@ from typing import Dict
|
|||
from litellm.llms.vertex_ai.common_utils import (
|
||||
_convert_vertex_datetime_to_openai_datetime,
|
||||
)
|
||||
from litellm.types.llms.openai import Batch, BatchJobStatus, CreateBatchRequest
|
||||
from litellm.types.llms.openai import BatchJobStatus, CreateBatchRequest
|
||||
from litellm.types.llms.vertex_ai import *
|
||||
from litellm.types.utils import LiteLLMBatch
|
||||
|
||||
|
||||
class VertexAIBatchTransformation:
|
||||
|
@ -47,8 +48,8 @@ class VertexAIBatchTransformation:
|
|||
@classmethod
|
||||
def transform_vertex_ai_batch_response_to_openai_batch_response(
|
||||
cls, response: VertexBatchPredictionResponse
|
||||
) -> Batch:
|
||||
return Batch(
|
||||
) -> LiteLLMBatch:
|
||||
return LiteLLMBatch(
|
||||
id=cls._get_batch_id_from_vertex_ai_batch_response(response),
|
||||
completion_window="24hrs",
|
||||
created_at=_convert_vertex_datetime_to_openai_datetime(
|
||||
|
|
|
@ -170,6 +170,9 @@ def _build_vertex_schema(parameters: dict):
|
|||
strip_field(
|
||||
parameters, field_name="$schema"
|
||||
) # 5. Remove $schema - json schema value, not supported by OpenAPI - causes vertex errors.
|
||||
strip_field(
|
||||
parameters, field_name="$id"
|
||||
) # 6. Remove id - json schema value, not supported by OpenAPI - causes vertex errors.
|
||||
|
||||
return parameters
|
||||
|
||||
|
|
|
@ -55,10 +55,11 @@ else:
|
|||
LiteLLMLoggingObj = Any
|
||||
|
||||
|
||||
def _process_gemini_image(image_url: str) -> PartType:
|
||||
def _process_gemini_image(image_url: str, format: Optional[str] = None) -> PartType:
|
||||
"""
|
||||
Given an image URL, return the appropriate PartType for Gemini
|
||||
"""
|
||||
|
||||
try:
|
||||
# GCS URIs
|
||||
if "gs://" in image_url:
|
||||
|
@ -66,6 +67,7 @@ def _process_gemini_image(image_url: str) -> PartType:
|
|||
extension_with_dot = os.path.splitext(image_url)[-1] # Ex: ".png"
|
||||
extension = extension_with_dot[1:] # Ex: "png"
|
||||
|
||||
if not format:
|
||||
file_type = get_file_type_from_extension(extension)
|
||||
|
||||
# Validate the file type is supported by Gemini
|
||||
|
@ -73,18 +75,22 @@ def _process_gemini_image(image_url: str) -> PartType:
|
|||
raise Exception(f"File type not supported by gemini - {file_type}")
|
||||
|
||||
mime_type = get_file_mime_type_for_file_type(file_type)
|
||||
else:
|
||||
mime_type = format
|
||||
file_data = FileDataType(mime_type=mime_type, file_uri=image_url)
|
||||
|
||||
return PartType(file_data=file_data)
|
||||
elif (
|
||||
"https://" in image_url
|
||||
and (image_type := _get_image_mime_type_from_url(image_url)) is not None
|
||||
and (image_type := format or _get_image_mime_type_from_url(image_url))
|
||||
is not None
|
||||
):
|
||||
|
||||
file_data = FileDataType(file_uri=image_url, mime_type=image_type)
|
||||
return PartType(file_data=file_data)
|
||||
elif "http://" in image_url or "https://" in image_url or "base64" in image_url:
|
||||
# https links for unsupported mime types and base64 images
|
||||
image = convert_to_anthropic_image_obj(image_url)
|
||||
image = convert_to_anthropic_image_obj(image_url, format=format)
|
||||
_blob = BlobType(data=image["data"], mime_type=image["media_type"])
|
||||
return PartType(inline_data=_blob)
|
||||
raise Exception("Invalid image received - {}".format(image_url))
|
||||
|
@ -159,11 +165,15 @@ def _gemini_convert_messages_with_history( # noqa: PLR0915
|
|||
elif element["type"] == "image_url":
|
||||
element = cast(ChatCompletionImageObject, element)
|
||||
img_element = element
|
||||
format: Optional[str] = None
|
||||
if isinstance(img_element["image_url"], dict):
|
||||
image_url = img_element["image_url"]["url"]
|
||||
format = img_element["image_url"].get("format")
|
||||
else:
|
||||
image_url = img_element["image_url"]
|
||||
_part = _process_gemini_image(image_url=image_url)
|
||||
_part = _process_gemini_image(
|
||||
image_url=image_url, format=format
|
||||
)
|
||||
_parts.append(_part)
|
||||
user_content.extend(_parts)
|
||||
elif (
|
||||
|
|
|
@ -1023,7 +1023,6 @@ class VertexLLM(VertexBase):
|
|||
gemini_api_key: Optional[str] = None,
|
||||
extra_headers: Optional[dict] = None,
|
||||
) -> Union[ModelResponse, CustomStreamWrapper]:
|
||||
|
||||
should_use_v1beta1_features = self.is_using_v1beta1_features(
|
||||
optional_params=optional_params
|
||||
)
|
||||
|
@ -1409,7 +1408,8 @@ class ModelResponseIterator:
|
|||
return self.chunk_parser(chunk=json_chunk)
|
||||
|
||||
def handle_accumulated_json_chunk(self, chunk: str) -> GenericStreamingChunk:
|
||||
message = chunk.replace("data:", "").replace("\n\n", "")
|
||||
chunk = litellm.CustomStreamWrapper._strip_sse_data_from_chunk(chunk) or ""
|
||||
message = chunk.replace("\n\n", "")
|
||||
|
||||
# Accumulate JSON data
|
||||
self.accumulated_json += message
|
||||
|
@ -1432,7 +1432,7 @@ class ModelResponseIterator:
|
|||
|
||||
def _common_chunk_parsing_logic(self, chunk: str) -> GenericStreamingChunk:
|
||||
try:
|
||||
chunk = chunk.replace("data:", "")
|
||||
chunk = litellm.CustomStreamWrapper._strip_sse_data_from_chunk(chunk) or ""
|
||||
if len(chunk) > 0:
|
||||
"""
|
||||
Check if initial chunk valid json
|
||||
|
|
|
@ -1159,6 +1159,9 @@ def completion( # type: ignore # noqa: PLR0915
|
|||
prompt_id=prompt_id,
|
||||
prompt_variables=prompt_variables,
|
||||
ssl_verify=ssl_verify,
|
||||
merge_reasoning_content_in_choices=kwargs.get(
|
||||
"merge_reasoning_content_in_choices", None
|
||||
),
|
||||
)
|
||||
logging.update_environment_variables(
|
||||
model=model,
|
||||
|
@ -2271,23 +2274,22 @@ def completion( # type: ignore # noqa: PLR0915
|
|||
data = {"model": model, "messages": messages, **optional_params}
|
||||
|
||||
## COMPLETION CALL
|
||||
response = openai_like_chat_completion.completion(
|
||||
response = base_llm_http_handler.completion(
|
||||
model=model,
|
||||
stream=stream,
|
||||
messages=messages,
|
||||
headers=headers,
|
||||
api_key=api_key,
|
||||
acompletion=acompletion,
|
||||
api_base=api_base,
|
||||
model_response=model_response,
|
||||
print_verbose=print_verbose,
|
||||
optional_params=optional_params,
|
||||
litellm_params=litellm_params,
|
||||
logger_fn=logger_fn,
|
||||
logging_obj=logging,
|
||||
acompletion=acompletion,
|
||||
timeout=timeout, # type: ignore
|
||||
custom_llm_provider="openrouter",
|
||||
custom_prompt_dict=custom_prompt_dict,
|
||||
timeout=timeout,
|
||||
headers=headers,
|
||||
encoding=encoding,
|
||||
api_key=api_key,
|
||||
logging_obj=logging, # model call logging done inside the class as we make need to modify I/O to fit aleph alpha's requirements
|
||||
client=client,
|
||||
)
|
||||
## LOGGING
|
||||
logging.post_call(
|
||||
|
@ -2853,6 +2855,7 @@ def completion( # type: ignore # noqa: PLR0915
|
|||
acompletion=acompletion,
|
||||
model_response=model_response,
|
||||
encoding=encoding,
|
||||
client=client,
|
||||
)
|
||||
if acompletion is True or optional_params.get("stream", False) is True:
|
||||
return generator
|
||||
|
@ -4521,6 +4524,7 @@ def image_generation( # noqa: PLR0915
|
|||
non_default_params = {
|
||||
k: v for k, v in kwargs.items() if k not in default_params
|
||||
} # model-specific params - pass them straight to the model/provider
|
||||
|
||||
optional_params = get_optional_params_image_gen(
|
||||
model=model,
|
||||
n=n,
|
||||
|
@ -4532,6 +4536,7 @@ def image_generation( # noqa: PLR0915
|
|||
custom_llm_provider=custom_llm_provider,
|
||||
**non_default_params,
|
||||
)
|
||||
|
||||
logging: Logging = litellm_logging_obj
|
||||
logging.update_environment_variables(
|
||||
model=model,
|
||||
|
@ -4630,6 +4635,7 @@ def image_generation( # noqa: PLR0915
|
|||
optional_params=optional_params,
|
||||
model_response=model_response,
|
||||
aimg_generation=aimg_generation,
|
||||
client=client,
|
||||
)
|
||||
elif custom_llm_provider == "vertex_ai":
|
||||
vertex_ai_project = (
|
||||
|
|
|
@ -1068,9 +1068,9 @@
|
|||
"max_tokens": 65536,
|
||||
"max_input_tokens": 128000,
|
||||
"max_output_tokens": 65536,
|
||||
"input_cost_per_token": 0.000003,
|
||||
"output_cost_per_token": 0.000012,
|
||||
"cache_read_input_token_cost": 0.0000015,
|
||||
"input_cost_per_token": 0.00000121,
|
||||
"output_cost_per_token": 0.00000484,
|
||||
"cache_read_input_token_cost": 0.000000605,
|
||||
"litellm_provider": "azure",
|
||||
"mode": "chat",
|
||||
"supports_function_calling": true,
|
||||
|
@ -1082,9 +1082,9 @@
|
|||
"max_tokens": 65536,
|
||||
"max_input_tokens": 128000,
|
||||
"max_output_tokens": 65536,
|
||||
"input_cost_per_token": 0.000003,
|
||||
"output_cost_per_token": 0.000012,
|
||||
"cache_read_input_token_cost": 0.0000015,
|
||||
"input_cost_per_token": 0.00000121,
|
||||
"output_cost_per_token": 0.00000484,
|
||||
"cache_read_input_token_cost": 0.000000605,
|
||||
"litellm_provider": "azure",
|
||||
"mode": "chat",
|
||||
"supports_function_calling": true,
|
||||
|
@ -2795,6 +2795,7 @@
|
|||
"supports_vision": true,
|
||||
"tool_use_system_prompt_tokens": 264,
|
||||
"supports_assistant_prefill": true,
|
||||
"supports_pdf_input": true,
|
||||
"supports_prompt_caching": true,
|
||||
"supports_response_schema": true,
|
||||
"deprecation_date": "2025-10-01",
|
||||
|
@ -2814,6 +2815,7 @@
|
|||
"supports_vision": true,
|
||||
"tool_use_system_prompt_tokens": 264,
|
||||
"supports_assistant_prefill": true,
|
||||
"supports_pdf_input": true,
|
||||
"supports_prompt_caching": true,
|
||||
"supports_response_schema": true,
|
||||
"deprecation_date": "2025-10-01",
|
||||
|
@ -2888,6 +2890,7 @@
|
|||
"supports_vision": true,
|
||||
"tool_use_system_prompt_tokens": 159,
|
||||
"supports_assistant_prefill": true,
|
||||
"supports_pdf_input": true,
|
||||
"supports_prompt_caching": true,
|
||||
"supports_response_schema": true,
|
||||
"deprecation_date": "2025-06-01",
|
||||
|
@ -2907,15 +2910,16 @@
|
|||
"supports_vision": true,
|
||||
"tool_use_system_prompt_tokens": 159,
|
||||
"supports_assistant_prefill": true,
|
||||
"supports_pdf_input": true,
|
||||
"supports_prompt_caching": true,
|
||||
"supports_response_schema": true,
|
||||
"deprecation_date": "2025-06-01",
|
||||
"supports_tool_choice": true
|
||||
},
|
||||
"claude-3-7-sonnet-latest": {
|
||||
"max_tokens": 8192,
|
||||
"max_tokens": 128000,
|
||||
"max_input_tokens": 200000,
|
||||
"max_output_tokens": 8192,
|
||||
"max_output_tokens": 128000,
|
||||
"input_cost_per_token": 0.000003,
|
||||
"output_cost_per_token": 0.000015,
|
||||
"cache_creation_input_token_cost": 0.00000375,
|
||||
|
@ -2926,15 +2930,16 @@
|
|||
"supports_vision": true,
|
||||
"tool_use_system_prompt_tokens": 159,
|
||||
"supports_assistant_prefill": true,
|
||||
"supports_pdf_input": true,
|
||||
"supports_prompt_caching": true,
|
||||
"supports_response_schema": true,
|
||||
"deprecation_date": "2025-06-01",
|
||||
"supports_tool_choice": true
|
||||
},
|
||||
"claude-3-7-sonnet-20250219": {
|
||||
"max_tokens": 8192,
|
||||
"max_tokens": 128000,
|
||||
"max_input_tokens": 200000,
|
||||
"max_output_tokens": 8192,
|
||||
"max_output_tokens": 128000,
|
||||
"input_cost_per_token": 0.000003,
|
||||
"output_cost_per_token": 0.000015,
|
||||
"cache_creation_input_token_cost": 0.00000375,
|
||||
|
@ -2945,6 +2950,7 @@
|
|||
"supports_vision": true,
|
||||
"tool_use_system_prompt_tokens": 159,
|
||||
"supports_assistant_prefill": true,
|
||||
"supports_pdf_input": true,
|
||||
"supports_prompt_caching": true,
|
||||
"supports_response_schema": true,
|
||||
"deprecation_date": "2026-02-01",
|
||||
|
@ -4159,6 +4165,7 @@
|
|||
"litellm_provider": "vertex_ai-anthropic_models",
|
||||
"mode": "chat",
|
||||
"supports_function_calling": true,
|
||||
"supports_pdf_input": true,
|
||||
"supports_vision": true,
|
||||
"supports_assistant_prefill": true,
|
||||
"supports_tool_choice": true
|
||||
|
@ -4172,6 +4179,7 @@
|
|||
"litellm_provider": "vertex_ai-anthropic_models",
|
||||
"mode": "chat",
|
||||
"supports_function_calling": true,
|
||||
"supports_pdf_input": true,
|
||||
"supports_vision": true,
|
||||
"supports_assistant_prefill": true,
|
||||
"supports_tool_choice": true
|
||||
|
@ -4185,6 +4193,7 @@
|
|||
"litellm_provider": "vertex_ai-anthropic_models",
|
||||
"mode": "chat",
|
||||
"supports_function_calling": true,
|
||||
"supports_pdf_input": true,
|
||||
"supports_vision": true,
|
||||
"supports_assistant_prefill": true,
|
||||
"supports_tool_choice": true
|
||||
|
@ -4198,6 +4207,7 @@
|
|||
"litellm_provider": "vertex_ai-anthropic_models",
|
||||
"mode": "chat",
|
||||
"supports_function_calling": true,
|
||||
"supports_pdf_input": true,
|
||||
"supports_vision": true,
|
||||
"supports_assistant_prefill": true,
|
||||
"supports_tool_choice": true
|
||||
|
@ -4213,6 +4223,7 @@
|
|||
"litellm_provider": "vertex_ai-anthropic_models",
|
||||
"mode": "chat",
|
||||
"supports_function_calling": true,
|
||||
"supports_pdf_input": true,
|
||||
"supports_vision": true,
|
||||
"tool_use_system_prompt_tokens": 159,
|
||||
"supports_assistant_prefill": true,
|
||||
|
@ -4256,6 +4267,7 @@
|
|||
"litellm_provider": "vertex_ai-anthropic_models",
|
||||
"mode": "chat",
|
||||
"supports_function_calling": true,
|
||||
"supports_pdf_input": true,
|
||||
"supports_assistant_prefill": true,
|
||||
"supports_tool_choice": true
|
||||
},
|
||||
|
@ -4268,6 +4280,7 @@
|
|||
"litellm_provider": "vertex_ai-anthropic_models",
|
||||
"mode": "chat",
|
||||
"supports_function_calling": true,
|
||||
"supports_pdf_input": true,
|
||||
"supports_assistant_prefill": true,
|
||||
"supports_tool_choice": true
|
||||
},
|
||||
|
@ -6044,6 +6057,26 @@
|
|||
"mode": "chat",
|
||||
"supports_tool_choice": true
|
||||
},
|
||||
"jamba-large-1.6": {
|
||||
"max_tokens": 256000,
|
||||
"max_input_tokens": 256000,
|
||||
"max_output_tokens": 256000,
|
||||
"input_cost_per_token": 0.000002,
|
||||
"output_cost_per_token": 0.000008,
|
||||
"litellm_provider": "ai21",
|
||||
"mode": "chat",
|
||||
"supports_tool_choice": true
|
||||
},
|
||||
"jamba-mini-1.6": {
|
||||
"max_tokens": 256000,
|
||||
"max_input_tokens": 256000,
|
||||
"max_output_tokens": 256000,
|
||||
"input_cost_per_token": 0.0000002,
|
||||
"output_cost_per_token": 0.0000004,
|
||||
"litellm_provider": "ai21",
|
||||
"mode": "chat",
|
||||
"supports_tool_choice": true
|
||||
},
|
||||
"jamba-1.5-mini": {
|
||||
"max_tokens": 256000,
|
||||
"max_input_tokens": 256000,
|
||||
|
@ -6432,6 +6465,18 @@
|
|||
"supports_prompt_caching": true,
|
||||
"supports_response_schema": true
|
||||
},
|
||||
"eu.amazon.nova-micro-v1:0": {
|
||||
"max_tokens": 4096,
|
||||
"max_input_tokens": 300000,
|
||||
"max_output_tokens": 4096,
|
||||
"input_cost_per_token": 0.000000046,
|
||||
"output_cost_per_token": 0.000000184,
|
||||
"litellm_provider": "bedrock_converse",
|
||||
"mode": "chat",
|
||||
"supports_function_calling": true,
|
||||
"supports_prompt_caching": true,
|
||||
"supports_response_schema": true
|
||||
},
|
||||
"amazon.nova-lite-v1:0": {
|
||||
"max_tokens": 4096,
|
||||
"max_input_tokens": 128000,
|
||||
|
@ -6460,6 +6505,20 @@
|
|||
"supports_prompt_caching": true,
|
||||
"supports_response_schema": true
|
||||
},
|
||||
"eu.amazon.nova-lite-v1:0": {
|
||||
"max_tokens": 4096,
|
||||
"max_input_tokens": 128000,
|
||||
"max_output_tokens": 4096,
|
||||
"input_cost_per_token": 0.000000078,
|
||||
"output_cost_per_token": 0.000000312,
|
||||
"litellm_provider": "bedrock_converse",
|
||||
"mode": "chat",
|
||||
"supports_function_calling": true,
|
||||
"supports_vision": true,
|
||||
"supports_pdf_input": true,
|
||||
"supports_prompt_caching": true,
|
||||
"supports_response_schema": true
|
||||
},
|
||||
"amazon.nova-pro-v1:0": {
|
||||
"max_tokens": 4096,
|
||||
"max_input_tokens": 300000,
|
||||
|
@ -6488,6 +6547,21 @@
|
|||
"supports_prompt_caching": true,
|
||||
"supports_response_schema": true
|
||||
},
|
||||
"eu.amazon.nova-pro-v1:0": {
|
||||
"max_tokens": 4096,
|
||||
"max_input_tokens": 300000,
|
||||
"max_output_tokens": 4096,
|
||||
"input_cost_per_token": 0.00000105,
|
||||
"output_cost_per_token": 0.0000042,
|
||||
"litellm_provider": "bedrock_converse",
|
||||
"mode": "chat",
|
||||
"supports_function_calling": true,
|
||||
"supports_vision": true,
|
||||
"supports_pdf_input": true,
|
||||
"supports_prompt_caching": true,
|
||||
"supports_response_schema": true,
|
||||
"source": "https://aws.amazon.com/bedrock/pricing/"
|
||||
},
|
||||
"anthropic.claude-3-sonnet-20240229-v1:0": {
|
||||
"max_tokens": 4096,
|
||||
"max_input_tokens": 200000,
|
||||
|
|
|
@ -0,0 +1 @@
|
|||
(self.webpackChunk_N_E=self.webpackChunk_N_E||[]).push([[461],{32922:function(e,t,n){Promise.resolve().then(n.bind(n,12011))},12011:function(e,t,n){"use strict";n.r(t),n.d(t,{default:function(){return S}});var s=n(57437),o=n(2265),a=n(99376),i=n(20831),c=n(94789),l=n(12514),r=n(49804),u=n(67101),d=n(84264),m=n(49566),h=n(96761),x=n(84566),p=n(19250),f=n(14474),k=n(13634),j=n(73002),g=n(3914);function S(){let[e]=k.Z.useForm(),t=(0,a.useSearchParams)();(0,g.e)("token");let n=t.get("invitation_id"),[S,_]=(0,o.useState)(null),[w,Z]=(0,o.useState)(""),[N,b]=(0,o.useState)(""),[T,v]=(0,o.useState)(null),[y,E]=(0,o.useState)(""),[C,U]=(0,o.useState)("");return(0,o.useEffect)(()=>{n&&(0,p.W_)(n).then(e=>{let t=e.login_url;console.log("login_url:",t),E(t);let n=e.token,s=(0,f.o)(n);U(n),console.log("decoded:",s),_(s.key),console.log("decoded user email:",s.user_email),b(s.user_email),v(s.user_id)})},[n]),(0,s.jsx)("div",{className:"mx-auto w-full max-w-md mt-10",children:(0,s.jsxs)(l.Z,{children:[(0,s.jsx)(h.Z,{className:"text-sm mb-5 text-center",children:"\uD83D\uDE85 LiteLLM"}),(0,s.jsx)(h.Z,{className:"text-xl",children:"Sign up"}),(0,s.jsx)(d.Z,{children:"Claim your user account to login to Admin UI."}),(0,s.jsx)(c.Z,{className:"mt-4",title:"SSO",icon:x.GH$,color:"sky",children:(0,s.jsxs)(u.Z,{numItems:2,className:"flex justify-between items-center",children:[(0,s.jsx)(r.Z,{children:"SSO is under the Enterprise Tirer."}),(0,s.jsx)(r.Z,{children:(0,s.jsx)(i.Z,{variant:"primary",className:"mb-2",children:(0,s.jsx)("a",{href:"https://forms.gle/W3U4PZpJGFHWtHyA9",target:"_blank",children:"Get Free Trial"})})})]})}),(0,s.jsxs)(k.Z,{className:"mt-10 mb-5 mx-auto",layout:"vertical",onFinish:e=>{console.log("in handle submit. accessToken:",S,"token:",C,"formValues:",e),S&&C&&(e.user_email=N,T&&n&&(0,p.m_)(S,n,T,e.password).then(e=>{var t;let n="/ui/";n+="?userID="+((null===(t=e.data)||void 0===t?void 0:t.user_id)||e.user_id),document.cookie="token="+C,console.log("redirecting to:",n),window.location.href=n}))},children:[(0,s.jsxs)(s.Fragment,{children:[(0,s.jsx)(k.Z.Item,{label:"Email Address",name:"user_email",children:(0,s.jsx)(m.Z,{type:"email",disabled:!0,value:N,defaultValue:N,className:"max-w-md"})}),(0,s.jsx)(k.Z.Item,{label:"Password",name:"password",rules:[{required:!0,message:"password required to sign up"}],help:"Create a password for your account",children:(0,s.jsx)(m.Z,{placeholder:"",type:"password",className:"max-w-md"})})]}),(0,s.jsx)("div",{className:"mt-10",children:(0,s.jsx)(j.ZP,{htmlType:"submit",children:"Sign Up"})})]})]})})}},3914:function(e,t,n){"use strict";function s(){let e=window.location.hostname,t=["Lax","Strict","None"];["/","/ui"].forEach(n=>{document.cookie="token=; expires=Thu, 01 Jan 1970 00:00:00 UTC; path=".concat(n,";"),document.cookie="token=; expires=Thu, 01 Jan 1970 00:00:00 UTC; path=".concat(n,"; domain=").concat(e,";"),t.forEach(t=>{let s="None"===t?" Secure;":"";document.cookie="token=; expires=Thu, 01 Jan 1970 00:00:00 UTC; path=".concat(n,"; SameSite=").concat(t,";").concat(s),document.cookie="token=; expires=Thu, 01 Jan 1970 00:00:00 UTC; path=".concat(n,"; domain=").concat(e,"; SameSite=").concat(t,";").concat(s)})}),console.log("After clearing cookies:",document.cookie)}function o(e){let t=document.cookie.split("; ").find(t=>t.startsWith(e+"="));return t?t.split("=")[1]:null}n.d(t,{b:function(){return s},e:function(){return o}})}},function(e){e.O(0,[665,441,899,250,971,117,744],function(){return e(e.s=32922)}),_N_E=e.O()}]);
|
|
@ -1 +0,0 @@
|
|||
(self.webpackChunk_N_E=self.webpackChunk_N_E||[]).push([[461],{32922:function(e,s,t){Promise.resolve().then(t.bind(t,12011))},12011:function(e,s,t){"use strict";t.r(s),t.d(s,{default:function(){return g}});var l=t(57437),n=t(2265),a=t(99376),i=t(20831),r=t(94789),o=t(12514),c=t(49804),u=t(67101),d=t(84264),m=t(49566),h=t(96761),x=t(84566),f=t(19250),p=t(14474),j=t(13634),_=t(73002);function g(){let[e]=j.Z.useForm(),s=(0,a.useSearchParams)();!function(e){console.log("COOKIES",document.cookie);let s=document.cookie.split("; ").find(s=>s.startsWith(e+"="));s&&s.split("=")[1]}("token");let t=s.get("invitation_id"),[g,Z]=(0,n.useState)(null),[k,w]=(0,n.useState)(""),[S,b]=(0,n.useState)(""),[N,v]=(0,n.useState)(null),[y,E]=(0,n.useState)(""),[I,O]=(0,n.useState)("");return(0,n.useEffect)(()=>{t&&(0,f.W_)(t).then(e=>{let s=e.login_url;console.log("login_url:",s),E(s);let t=e.token,l=(0,p.o)(t);O(t),console.log("decoded:",l),Z(l.key),console.log("decoded user email:",l.user_email),b(l.user_email),v(l.user_id)})},[t]),(0,l.jsx)("div",{className:"mx-auto w-full max-w-md mt-10",children:(0,l.jsxs)(o.Z,{children:[(0,l.jsx)(h.Z,{className:"text-sm mb-5 text-center",children:"\uD83D\uDE85 LiteLLM"}),(0,l.jsx)(h.Z,{className:"text-xl",children:"Sign up"}),(0,l.jsx)(d.Z,{children:"Claim your user account to login to Admin UI."}),(0,l.jsx)(r.Z,{className:"mt-4",title:"SSO",icon:x.GH$,color:"sky",children:(0,l.jsxs)(u.Z,{numItems:2,className:"flex justify-between items-center",children:[(0,l.jsx)(c.Z,{children:"SSO is under the Enterprise Tirer."}),(0,l.jsx)(c.Z,{children:(0,l.jsx)(i.Z,{variant:"primary",className:"mb-2",children:(0,l.jsx)("a",{href:"https://forms.gle/W3U4PZpJGFHWtHyA9",target:"_blank",children:"Get Free Trial"})})})]})}),(0,l.jsxs)(j.Z,{className:"mt-10 mb-5 mx-auto",layout:"vertical",onFinish:e=>{console.log("in handle submit. accessToken:",g,"token:",I,"formValues:",e),g&&I&&(e.user_email=S,N&&t&&(0,f.m_)(g,t,N,e.password).then(e=>{var s;let t="/ui/";t+="?userID="+((null===(s=e.data)||void 0===s?void 0:s.user_id)||e.user_id),document.cookie="token="+I,console.log("redirecting to:",t),window.location.href=t}))},children:[(0,l.jsxs)(l.Fragment,{children:[(0,l.jsx)(j.Z.Item,{label:"Email Address",name:"user_email",children:(0,l.jsx)(m.Z,{type:"email",disabled:!0,value:S,defaultValue:S,className:"max-w-md"})}),(0,l.jsx)(j.Z.Item,{label:"Password",name:"password",rules:[{required:!0,message:"password required to sign up"}],help:"Create a password for your account",children:(0,l.jsx)(m.Z,{placeholder:"",type:"password",className:"max-w-md"})})]}),(0,l.jsx)("div",{className:"mt-10",children:(0,l.jsx)(_.ZP,{htmlType:"submit",children:"Sign Up"})})]})]})})}}},function(e){e.O(0,[665,441,899,250,971,117,744],function(){return e(e.s=32922)}),_N_E=e.O()}]);
|
|
@ -1 +1 @@
|
|||
<!DOCTYPE html><html id="__next_error__"><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width, initial-scale=1"/><link rel="preload" as="script" fetchPriority="low" href="/ui/_next/static/chunks/webpack-75a5453f51d60261.js"/><script src="/ui/_next/static/chunks/fd9d1056-524b80e1a6b8bb06.js" async=""></script><script src="/ui/_next/static/chunks/117-883150efc583d711.js" async=""></script><script src="/ui/_next/static/chunks/main-app-475d6efe4080647d.js" async=""></script><title>LiteLLM Dashboard</title><meta name="description" content="LiteLLM Proxy Admin UI"/><link rel="icon" href="/ui/favicon.ico" type="image/x-icon" sizes="16x16"/><meta name="next-size-adjust"/><script src="/ui/_next/static/chunks/polyfills-42372ed130431b0a.js" noModule=""></script></head><body><script src="/ui/_next/static/chunks/webpack-75a5453f51d60261.js" async=""></script><script>(self.__next_f=self.__next_f||[]).push([0]);self.__next_f.push([2,null])</script><script>self.__next_f.push([1,"1:HL[\"/ui/_next/static/media/a34f9d1faa5f3315-s.p.woff2\",\"font\",{\"crossOrigin\":\"\",\"type\":\"font/woff2\"}]\n2:HL[\"/ui/_next/static/css/86f6cc749f6b8493.css\",\"style\"]\n3:HL[\"/ui/_next/static/css/f41c66e22715ab00.css\",\"style\"]\n"])</script><script>self.__next_f.push([1,"4:I[12846,[],\"\"]\n6:I[19107,[],\"ClientPageRoot\"]\n7:I[92222,[\"665\",\"static/chunks/3014691f-0b72c78cfebbd712.js\",\"990\",\"static/chunks/13b76428-ebdf3012af0e4489.js\",\"441\",\"static/chunks/441-79926bf2b9d89e04.js\",\"261\",\"static/chunks/261-cb27c20c4f8ec4c6.js\",\"899\",\"static/chunks/899-354f59ecde307dfa.js\",\"914\",\"static/chunks/914-000d10374f86fc1a.js\",\"250\",\"static/chunks/250-51513f2f6dabf571.js\",\"699\",\"static/chunks/699-6b82f8e7b98ca1a3.js\",\"931\",\"static/chunks/app/page-e28453cd004ff93c.js\"],\"default\",1]\n8:I[4707,[],\"\"]\n9:I[36423,[],\"\"]\nb:I[61060,[],\"\"]\nc:[]\n"])</script><script>self.__next_f.push([1,"0:[\"$\",\"$L4\",null,{\"buildId\":\"sW550-yvC4l9ZFA0scEUc\",\"assetPrefix\":\"/ui\",\"urlParts\":[\"\",\"\"],\"initialTree\":[\"\",{\"children\":[\"__PAGE__\",{}]},\"$undefined\",\"$undefined\",true],\"initialSeedData\":[\"\",{\"children\":[\"__PAGE__\",{},[[\"$L5\",[\"$\",\"$L6\",null,{\"props\":{\"params\":{},\"searchParams\":{}},\"Component\":\"$7\"}],null],null],null]},[[[[\"$\",\"link\",\"0\",{\"rel\":\"stylesheet\",\"href\":\"/ui/_next/static/css/86f6cc749f6b8493.css\",\"precedence\":\"next\",\"crossOrigin\":\"$undefined\"}],[\"$\",\"link\",\"1\",{\"rel\":\"stylesheet\",\"href\":\"/ui/_next/static/css/f41c66e22715ab00.css\",\"precedence\":\"next\",\"crossOrigin\":\"$undefined\"}]],[\"$\",\"html\",null,{\"lang\":\"en\",\"children\":[\"$\",\"body\",null,{\"className\":\"__className_cf7686\",\"children\":[\"$\",\"$L8\",null,{\"parallelRouterKey\":\"children\",\"segmentPath\":[\"children\"],\"error\":\"$undefined\",\"errorStyles\":\"$undefined\",\"errorScripts\":\"$undefined\",\"template\":[\"$\",\"$L9\",null,{}],\"templateStyles\":\"$undefined\",\"templateScripts\":\"$undefined\",\"notFound\":[[\"$\",\"title\",null,{\"children\":\"404: This page could not be found.\"}],[\"$\",\"div\",null,{\"style\":{\"fontFamily\":\"system-ui,\\\"Segoe UI\\\",Roboto,Helvetica,Arial,sans-serif,\\\"Apple Color Emoji\\\",\\\"Segoe UI Emoji\\\"\",\"height\":\"100vh\",\"textAlign\":\"center\",\"display\":\"flex\",\"flexDirection\":\"column\",\"alignItems\":\"center\",\"justifyContent\":\"center\"},\"children\":[\"$\",\"div\",null,{\"children\":[[\"$\",\"style\",null,{\"dangerouslySetInnerHTML\":{\"__html\":\"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}\"}}],[\"$\",\"h1\",null,{\"className\":\"next-error-h1\",\"style\":{\"display\":\"inline-block\",\"margin\":\"0 20px 0 0\",\"padding\":\"0 23px 0 0\",\"fontSize\":24,\"fontWeight\":500,\"verticalAlign\":\"top\",\"lineHeight\":\"49px\"},\"children\":\"404\"}],[\"$\",\"div\",null,{\"style\":{\"display\":\"inline-block\"},\"children\":[\"$\",\"h2\",null,{\"style\":{\"fontSize\":14,\"fontWeight\":400,\"lineHeight\":\"49px\",\"margin\":0},\"children\":\"This page could not be found.\"}]}]]}]}]],\"notFoundStyles\":[]}]}]}]],null],null],\"couldBeIntercepted\":false,\"initialHead\":[null,\"$La\"],\"globalErrorComponent\":\"$b\",\"missingSlots\":\"$Wc\"}]\n"])</script><script>self.__next_f.push([1,"a:[[\"$\",\"meta\",\"0\",{\"name\":\"viewport\",\"content\":\"width=device-width, initial-scale=1\"}],[\"$\",\"meta\",\"1\",{\"charSet\":\"utf-8\"}],[\"$\",\"title\",\"2\",{\"children\":\"LiteLLM Dashboard\"}],[\"$\",\"meta\",\"3\",{\"name\":\"description\",\"content\":\"LiteLLM Proxy Admin UI\"}],[\"$\",\"link\",\"4\",{\"rel\":\"icon\",\"href\":\"/ui/favicon.ico\",\"type\":\"image/x-icon\",\"sizes\":\"16x16\"}],[\"$\",\"meta\",\"5\",{\"name\":\"next-size-adjust\"}]]\n5:null\n"])</script></body></html>
|
||||
<!DOCTYPE html><html id="__next_error__"><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width, initial-scale=1"/><link rel="preload" as="script" fetchPriority="low" href="/ui/_next/static/chunks/webpack-75a5453f51d60261.js"/><script src="/ui/_next/static/chunks/fd9d1056-524b80e1a6b8bb06.js" async=""></script><script src="/ui/_next/static/chunks/117-883150efc583d711.js" async=""></script><script src="/ui/_next/static/chunks/main-app-475d6efe4080647d.js" async=""></script><title>LiteLLM Dashboard</title><meta name="description" content="LiteLLM Proxy Admin UI"/><link rel="icon" href="/ui/favicon.ico" type="image/x-icon" sizes="16x16"/><meta name="next-size-adjust"/><script src="/ui/_next/static/chunks/polyfills-42372ed130431b0a.js" noModule=""></script></head><body><script src="/ui/_next/static/chunks/webpack-75a5453f51d60261.js" async=""></script><script>(self.__next_f=self.__next_f||[]).push([0]);self.__next_f.push([2,null])</script><script>self.__next_f.push([1,"1:HL[\"/ui/_next/static/media/a34f9d1faa5f3315-s.p.woff2\",\"font\",{\"crossOrigin\":\"\",\"type\":\"font/woff2\"}]\n2:HL[\"/ui/_next/static/css/86f6cc749f6b8493.css\",\"style\"]\n3:HL[\"/ui/_next/static/css/f41c66e22715ab00.css\",\"style\"]\n"])</script><script>self.__next_f.push([1,"4:I[12846,[],\"\"]\n6:I[19107,[],\"ClientPageRoot\"]\n7:I[92222,[\"665\",\"static/chunks/3014691f-0b72c78cfebbd712.js\",\"990\",\"static/chunks/13b76428-ebdf3012af0e4489.js\",\"441\",\"static/chunks/441-79926bf2b9d89e04.js\",\"261\",\"static/chunks/261-e48c2ac6ff0b811c.js\",\"899\",\"static/chunks/899-354f59ecde307dfa.js\",\"914\",\"static/chunks/914-e17acab83d0eadb5.js\",\"250\",\"static/chunks/250-51513f2f6dabf571.js\",\"699\",\"static/chunks/699-6b82f8e7b98ca1a3.js\",\"931\",\"static/chunks/app/page-b36633214e76cfd1.js\"],\"default\",1]\n8:I[4707,[],\"\"]\n9:I[36423,[],\"\"]\nb:I[61060,[],\"\"]\nc:[]\n"])</script><script>self.__next_f.push([1,"0:[\"$\",\"$L4\",null,{\"buildId\":\"rCxUxULLkHhl5KoPY9DHv\",\"assetPrefix\":\"/ui\",\"urlParts\":[\"\",\"\"],\"initialTree\":[\"\",{\"children\":[\"__PAGE__\",{}]},\"$undefined\",\"$undefined\",true],\"initialSeedData\":[\"\",{\"children\":[\"__PAGE__\",{},[[\"$L5\",[\"$\",\"$L6\",null,{\"props\":{\"params\":{},\"searchParams\":{}},\"Component\":\"$7\"}],null],null],null]},[[[[\"$\",\"link\",\"0\",{\"rel\":\"stylesheet\",\"href\":\"/ui/_next/static/css/86f6cc749f6b8493.css\",\"precedence\":\"next\",\"crossOrigin\":\"$undefined\"}],[\"$\",\"link\",\"1\",{\"rel\":\"stylesheet\",\"href\":\"/ui/_next/static/css/f41c66e22715ab00.css\",\"precedence\":\"next\",\"crossOrigin\":\"$undefined\"}]],[\"$\",\"html\",null,{\"lang\":\"en\",\"children\":[\"$\",\"body\",null,{\"className\":\"__className_cf7686\",\"children\":[\"$\",\"$L8\",null,{\"parallelRouterKey\":\"children\",\"segmentPath\":[\"children\"],\"error\":\"$undefined\",\"errorStyles\":\"$undefined\",\"errorScripts\":\"$undefined\",\"template\":[\"$\",\"$L9\",null,{}],\"templateStyles\":\"$undefined\",\"templateScripts\":\"$undefined\",\"notFound\":[[\"$\",\"title\",null,{\"children\":\"404: This page could not be found.\"}],[\"$\",\"div\",null,{\"style\":{\"fontFamily\":\"system-ui,\\\"Segoe UI\\\",Roboto,Helvetica,Arial,sans-serif,\\\"Apple Color Emoji\\\",\\\"Segoe UI Emoji\\\"\",\"height\":\"100vh\",\"textAlign\":\"center\",\"display\":\"flex\",\"flexDirection\":\"column\",\"alignItems\":\"center\",\"justifyContent\":\"center\"},\"children\":[\"$\",\"div\",null,{\"children\":[[\"$\",\"style\",null,{\"dangerouslySetInnerHTML\":{\"__html\":\"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}\"}}],[\"$\",\"h1\",null,{\"className\":\"next-error-h1\",\"style\":{\"display\":\"inline-block\",\"margin\":\"0 20px 0 0\",\"padding\":\"0 23px 0 0\",\"fontSize\":24,\"fontWeight\":500,\"verticalAlign\":\"top\",\"lineHeight\":\"49px\"},\"children\":\"404\"}],[\"$\",\"div\",null,{\"style\":{\"display\":\"inline-block\"},\"children\":[\"$\",\"h2\",null,{\"style\":{\"fontSize\":14,\"fontWeight\":400,\"lineHeight\":\"49px\",\"margin\":0},\"children\":\"This page could not be found.\"}]}]]}]}]],\"notFoundStyles\":[]}]}]}]],null],null],\"couldBeIntercepted\":false,\"initialHead\":[null,\"$La\"],\"globalErrorComponent\":\"$b\",\"missingSlots\":\"$Wc\"}]\n"])</script><script>self.__next_f.push([1,"a:[[\"$\",\"meta\",\"0\",{\"name\":\"viewport\",\"content\":\"width=device-width, initial-scale=1\"}],[\"$\",\"meta\",\"1\",{\"charSet\":\"utf-8\"}],[\"$\",\"title\",\"2\",{\"children\":\"LiteLLM Dashboard\"}],[\"$\",\"meta\",\"3\",{\"name\":\"description\",\"content\":\"LiteLLM Proxy Admin UI\"}],[\"$\",\"link\",\"4\",{\"rel\":\"icon\",\"href\":\"/ui/favicon.ico\",\"type\":\"image/x-icon\",\"sizes\":\"16x16\"}],[\"$\",\"meta\",\"5\",{\"name\":\"next-size-adjust\"}]]\n5:null\n"])</script></body></html>
|
|
@ -1,7 +1,7 @@
|
|||
2:I[19107,[],"ClientPageRoot"]
|
||||
3:I[92222,["665","static/chunks/3014691f-0b72c78cfebbd712.js","990","static/chunks/13b76428-ebdf3012af0e4489.js","441","static/chunks/441-79926bf2b9d89e04.js","261","static/chunks/261-cb27c20c4f8ec4c6.js","899","static/chunks/899-354f59ecde307dfa.js","914","static/chunks/914-000d10374f86fc1a.js","250","static/chunks/250-51513f2f6dabf571.js","699","static/chunks/699-6b82f8e7b98ca1a3.js","931","static/chunks/app/page-e28453cd004ff93c.js"],"default",1]
|
||||
3:I[92222,["665","static/chunks/3014691f-0b72c78cfebbd712.js","990","static/chunks/13b76428-ebdf3012af0e4489.js","441","static/chunks/441-79926bf2b9d89e04.js","261","static/chunks/261-e48c2ac6ff0b811c.js","899","static/chunks/899-354f59ecde307dfa.js","914","static/chunks/914-e17acab83d0eadb5.js","250","static/chunks/250-51513f2f6dabf571.js","699","static/chunks/699-6b82f8e7b98ca1a3.js","931","static/chunks/app/page-b36633214e76cfd1.js"],"default",1]
|
||||
4:I[4707,[],""]
|
||||
5:I[36423,[],""]
|
||||
0:["sW550-yvC4l9ZFA0scEUc",[[["",{"children":["__PAGE__",{}]},"$undefined","$undefined",true],["",{"children":["__PAGE__",{},[["$L1",["$","$L2",null,{"props":{"params":{},"searchParams":{}},"Component":"$3"}],null],null],null]},[[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/86f6cc749f6b8493.css","precedence":"next","crossOrigin":"$undefined"}],["$","link","1",{"rel":"stylesheet","href":"/ui/_next/static/css/f41c66e22715ab00.css","precedence":"next","crossOrigin":"$undefined"}]],["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_cf7686","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[]}]}]}]],null],null],["$L6",null]]]]
|
||||
0:["rCxUxULLkHhl5KoPY9DHv",[[["",{"children":["__PAGE__",{}]},"$undefined","$undefined",true],["",{"children":["__PAGE__",{},[["$L1",["$","$L2",null,{"props":{"params":{},"searchParams":{}},"Component":"$3"}],null],null],null]},[[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/86f6cc749f6b8493.css","precedence":"next","crossOrigin":"$undefined"}],["$","link","1",{"rel":"stylesheet","href":"/ui/_next/static/css/f41c66e22715ab00.css","precedence":"next","crossOrigin":"$undefined"}]],["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_cf7686","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[]}]}]}]],null],null],["$L6",null]]]]
|
||||
6:[["$","meta","0",{"name":"viewport","content":"width=device-width, initial-scale=1"}],["$","meta","1",{"charSet":"utf-8"}],["$","title","2",{"children":"LiteLLM Dashboard"}],["$","meta","3",{"name":"description","content":"LiteLLM Proxy Admin UI"}],["$","link","4",{"rel":"icon","href":"/ui/favicon.ico","type":"image/x-icon","sizes":"16x16"}],["$","meta","5",{"name":"next-size-adjust"}]]
|
||||
1:null
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
2:I[19107,[],"ClientPageRoot"]
|
||||
3:I[52829,["441","static/chunks/441-79926bf2b9d89e04.js","261","static/chunks/261-cb27c20c4f8ec4c6.js","250","static/chunks/250-51513f2f6dabf571.js","699","static/chunks/699-6b82f8e7b98ca1a3.js","418","static/chunks/app/model_hub/page-6f97b95f1023b0e9.js"],"default",1]
|
||||
3:I[52829,["441","static/chunks/441-79926bf2b9d89e04.js","261","static/chunks/261-e48c2ac6ff0b811c.js","250","static/chunks/250-51513f2f6dabf571.js","699","static/chunks/699-6b82f8e7b98ca1a3.js","418","static/chunks/app/model_hub/page-6f97b95f1023b0e9.js"],"default",1]
|
||||
4:I[4707,[],""]
|
||||
5:I[36423,[],""]
|
||||
0:["sW550-yvC4l9ZFA0scEUc",[[["",{"children":["model_hub",{"children":["__PAGE__",{}]}]},"$undefined","$undefined",true],["",{"children":["model_hub",{"children":["__PAGE__",{},[["$L1",["$","$L2",null,{"props":{"params":{},"searchParams":{}},"Component":"$3"}],null],null],null]},[null,["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children","model_hub","children"],"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":"$undefined","notFoundStyles":"$undefined"}]],null]},[[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/86f6cc749f6b8493.css","precedence":"next","crossOrigin":"$undefined"}],["$","link","1",{"rel":"stylesheet","href":"/ui/_next/static/css/f41c66e22715ab00.css","precedence":"next","crossOrigin":"$undefined"}]],["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_cf7686","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[]}]}]}]],null],null],["$L6",null]]]]
|
||||
0:["rCxUxULLkHhl5KoPY9DHv",[[["",{"children":["model_hub",{"children":["__PAGE__",{}]}]},"$undefined","$undefined",true],["",{"children":["model_hub",{"children":["__PAGE__",{},[["$L1",["$","$L2",null,{"props":{"params":{},"searchParams":{}},"Component":"$3"}],null],null],null]},[null,["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children","model_hub","children"],"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":"$undefined","notFoundStyles":"$undefined"}]],null]},[[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/86f6cc749f6b8493.css","precedence":"next","crossOrigin":"$undefined"}],["$","link","1",{"rel":"stylesheet","href":"/ui/_next/static/css/f41c66e22715ab00.css","precedence":"next","crossOrigin":"$undefined"}]],["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_cf7686","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[]}]}]}]],null],null],["$L6",null]]]]
|
||||
6:[["$","meta","0",{"name":"viewport","content":"width=device-width, initial-scale=1"}],["$","meta","1",{"charSet":"utf-8"}],["$","title","2",{"children":"LiteLLM Dashboard"}],["$","meta","3",{"name":"description","content":"LiteLLM Proxy Admin UI"}],["$","link","4",{"rel":"icon","href":"/ui/favicon.ico","type":"image/x-icon","sizes":"16x16"}],["$","meta","5",{"name":"next-size-adjust"}]]
|
||||
1:null
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
2:I[19107,[],"ClientPageRoot"]
|
||||
3:I[12011,["665","static/chunks/3014691f-0b72c78cfebbd712.js","441","static/chunks/441-79926bf2b9d89e04.js","899","static/chunks/899-354f59ecde307dfa.js","250","static/chunks/250-51513f2f6dabf571.js","461","static/chunks/app/onboarding/page-f2e9aa9e77b66520.js"],"default",1]
|
||||
3:I[12011,["665","static/chunks/3014691f-0b72c78cfebbd712.js","441","static/chunks/441-79926bf2b9d89e04.js","899","static/chunks/899-354f59ecde307dfa.js","250","static/chunks/250-51513f2f6dabf571.js","461","static/chunks/app/onboarding/page-a31bc08c35f01c0a.js"],"default",1]
|
||||
4:I[4707,[],""]
|
||||
5:I[36423,[],""]
|
||||
0:["sW550-yvC4l9ZFA0scEUc",[[["",{"children":["onboarding",{"children":["__PAGE__",{}]}]},"$undefined","$undefined",true],["",{"children":["onboarding",{"children":["__PAGE__",{},[["$L1",["$","$L2",null,{"props":{"params":{},"searchParams":{}},"Component":"$3"}],null],null],null]},[null,["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children","onboarding","children"],"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":"$undefined","notFoundStyles":"$undefined"}]],null]},[[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/86f6cc749f6b8493.css","precedence":"next","crossOrigin":"$undefined"}],["$","link","1",{"rel":"stylesheet","href":"/ui/_next/static/css/f41c66e22715ab00.css","precedence":"next","crossOrigin":"$undefined"}]],["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_cf7686","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[]}]}]}]],null],null],["$L6",null]]]]
|
||||
0:["rCxUxULLkHhl5KoPY9DHv",[[["",{"children":["onboarding",{"children":["__PAGE__",{}]}]},"$undefined","$undefined",true],["",{"children":["onboarding",{"children":["__PAGE__",{},[["$L1",["$","$L2",null,{"props":{"params":{},"searchParams":{}},"Component":"$3"}],null],null],null]},[null,["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children","onboarding","children"],"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":"$undefined","notFoundStyles":"$undefined"}]],null]},[[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/86f6cc749f6b8493.css","precedence":"next","crossOrigin":"$undefined"}],["$","link","1",{"rel":"stylesheet","href":"/ui/_next/static/css/f41c66e22715ab00.css","precedence":"next","crossOrigin":"$undefined"}]],["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_cf7686","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[]}]}]}]],null],null],["$L6",null]]]]
|
||||
6:[["$","meta","0",{"name":"viewport","content":"width=device-width, initial-scale=1"}],["$","meta","1",{"charSet":"utf-8"}],["$","title","2",{"children":"LiteLLM Dashboard"}],["$","meta","3",{"name":"description","content":"LiteLLM Proxy Admin UI"}],["$","link","4",{"rel":"icon","href":"/ui/favicon.ico","type":"image/x-icon","sizes":"16x16"}],["$","meta","5",{"name":"next-size-adjust"}]]
|
||||
1:null
|
||||
|
|
|
@ -1,34 +1,4 @@
|
|||
model_list:
|
||||
- model_name: claude-3.7
|
||||
- model_name: llama3.2-vision
|
||||
litellm_params:
|
||||
model: openai/gpt-3.5-turbo
|
||||
api_key: os.environ/OPENAI_API_KEY
|
||||
api_base: http://0.0.0.0:8090
|
||||
- model_name: deepseek-r1
|
||||
litellm_params:
|
||||
model: bedrock/deepseek_r1/arn:aws:bedrock:us-west-2:888602223428:imported-model/bnnr6463ejgf
|
||||
- model_name: deepseek-r1-api
|
||||
litellm_params:
|
||||
model: deepseek/deepseek-reasoner
|
||||
- model_name: cohere.embed-english-v3
|
||||
litellm_params:
|
||||
model: bedrock/cohere.embed-english-v3
|
||||
api_key: os.environ/COHERE_API_KEY
|
||||
- model_name: bedrock-claude-3-7
|
||||
litellm_params:
|
||||
model: bedrock/invoke/us.anthropic.claude-3-7-sonnet-20250219-v1:0
|
||||
- model_name: bedrock-claude-3-5-sonnet
|
||||
litellm_params:
|
||||
model: bedrock/invoke/us.anthropic.claude-3-5-sonnet-20240620-v1:0
|
||||
- model_name: bedrock-nova
|
||||
litellm_params:
|
||||
model: bedrock/us.amazon.nova-pro-v1:0
|
||||
- model_name: gpt-4o
|
||||
litellm_params:
|
||||
model: openai/gpt-4o
|
||||
|
||||
litellm_settings:
|
||||
cache: true
|
||||
cache_params: # set cache params for redis
|
||||
type: redis
|
||||
namespace: "litellm.caching"
|
||||
model: ollama/llama3.2-vision
|
|
@ -19,6 +19,7 @@ from litellm.types.integrations.slack_alerting import AlertType
|
|||
from litellm.types.llms.openai import AllMessageValues
|
||||
from litellm.types.router import RouterErrors, UpdateRouterConfig
|
||||
from litellm.types.utils import (
|
||||
CallTypes,
|
||||
EmbeddingResponse,
|
||||
GenericBudgetConfigType,
|
||||
ImageResponse,
|
||||
|
@ -664,6 +665,7 @@ class RegenerateKeyRequest(GenerateKeyRequest):
|
|||
duration: Optional[str] = None
|
||||
spend: Optional[float] = None
|
||||
metadata: Optional[dict] = None
|
||||
new_master_key: Optional[str] = None
|
||||
|
||||
|
||||
class KeyRequest(LiteLLMPydanticObjectBase):
|
||||
|
@ -688,6 +690,30 @@ class LiteLLM_ModelTable(LiteLLMPydanticObjectBase):
|
|||
model_config = ConfigDict(protected_namespaces=())
|
||||
|
||||
|
||||
class LiteLLM_ProxyModelTable(LiteLLMPydanticObjectBase):
|
||||
model_id: str
|
||||
model_name: str
|
||||
litellm_params: dict
|
||||
model_info: dict
|
||||
created_by: str
|
||||
updated_by: str
|
||||
|
||||
@model_validator(mode="before")
|
||||
@classmethod
|
||||
def check_potential_json_str(cls, values):
|
||||
if isinstance(values.get("litellm_params"), str):
|
||||
try:
|
||||
values["litellm_params"] = json.loads(values["litellm_params"])
|
||||
except json.JSONDecodeError:
|
||||
pass
|
||||
if isinstance(values.get("model_info"), str):
|
||||
try:
|
||||
values["model_info"] = json.loads(values["model_info"])
|
||||
except json.JSONDecodeError:
|
||||
pass
|
||||
return values
|
||||
|
||||
|
||||
class NewUserRequest(GenerateRequestBase):
|
||||
max_budget: Optional[float] = None
|
||||
user_email: Optional[str] = None
|
||||
|
@ -1003,6 +1029,7 @@ class AddTeamCallback(LiteLLMPydanticObjectBase):
|
|||
class TeamCallbackMetadata(LiteLLMPydanticObjectBase):
|
||||
success_callback: Optional[List[str]] = []
|
||||
failure_callback: Optional[List[str]] = []
|
||||
callbacks: Optional[List[str]] = []
|
||||
# for now - only supported for langfuse
|
||||
callback_vars: Optional[Dict[str, str]] = {}
|
||||
|
||||
|
@ -1015,6 +1042,9 @@ class TeamCallbackMetadata(LiteLLMPydanticObjectBase):
|
|||
failure_callback = values.get("failure_callback", [])
|
||||
if failure_callback is None:
|
||||
values.pop("failure_callback", None)
|
||||
callbacks = values.get("callbacks", [])
|
||||
if callbacks is None:
|
||||
values.pop("callbacks", None)
|
||||
|
||||
callback_vars = values.get("callback_vars", {})
|
||||
if callback_vars is None:
|
||||
|
@ -1023,6 +1053,7 @@ class TeamCallbackMetadata(LiteLLMPydanticObjectBase):
|
|||
return {
|
||||
"success_callback": [],
|
||||
"failure_callback": [],
|
||||
"callbacks": [],
|
||||
"callback_vars": {},
|
||||
}
|
||||
valid_keys = set(StandardCallbackDynamicParams.__annotations__.keys())
|
||||
|
@ -1156,6 +1187,13 @@ class KeyManagementSettings(LiteLLMPydanticObjectBase):
|
|||
Access mode for the secret manager, when write_only will only use for writing secrets
|
||||
"""
|
||||
|
||||
primary_secret_name: Optional[str] = None
|
||||
"""
|
||||
If set, will read secrets from this primary secret in the secret manager
|
||||
|
||||
eg. on AWS you can store multiple secret values as K/V pairs in a single secret
|
||||
"""
|
||||
|
||||
|
||||
class TeamDefaultSettings(LiteLLMPydanticObjectBase):
|
||||
team_id: str
|
||||
|
@ -1859,6 +1897,7 @@ class SpendLogsMetadata(TypedDict):
|
|||
applied_guardrails: Optional[List[str]]
|
||||
status: StandardLoggingPayloadStatus
|
||||
proxy_server_request: Optional[str]
|
||||
batch_models: Optional[List[str]]
|
||||
error_information: Optional[StandardLoggingPayloadErrorInformation]
|
||||
|
||||
|
||||
|
@ -1958,7 +1997,7 @@ class ProxyException(Exception):
|
|||
code: Optional[Union[int, str]] = None,
|
||||
headers: Optional[Dict[str, str]] = None,
|
||||
):
|
||||
self.message = message
|
||||
self.message = str(message)
|
||||
self.type = type
|
||||
self.param = param
|
||||
|
||||
|
@ -2542,3 +2581,8 @@ class PrismaCompatibleUpdateDBModel(TypedDict, total=False):
|
|||
|
||||
class SpecialManagementEndpointEnums(enum.Enum):
|
||||
DEFAULT_ORGANIZATION = "default_organization"
|
||||
|
||||
|
||||
class TransformRequestBody(BaseModel):
|
||||
call_type: CallTypes
|
||||
request_body: dict
|
||||
|
|
252
litellm/proxy/anthropic_endpoints/endpoints.py
Normal file
|
@ -0,0 +1,252 @@
|
|||
"""
|
||||
Unified /v1/messages endpoint - (Anthropic Spec)
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
import json
|
||||
import time
|
||||
import traceback
|
||||
|
||||
from fastapi import APIRouter, Depends, HTTPException, Request, Response, status
|
||||
from fastapi.responses import StreamingResponse
|
||||
|
||||
import litellm
|
||||
from litellm._logging import verbose_proxy_logger
|
||||
from litellm.proxy._types import *
|
||||
from litellm.proxy.auth.user_api_key_auth import user_api_key_auth
|
||||
from litellm.proxy.common_utils.http_parsing_utils import _read_request_body
|
||||
from litellm.proxy.litellm_pre_call_utils import add_litellm_data_to_request
|
||||
from litellm.proxy.utils import ProxyLogging
|
||||
|
||||
router = APIRouter()
|
||||
|
||||
|
||||
async def async_data_generator_anthropic(
|
||||
response,
|
||||
user_api_key_dict: UserAPIKeyAuth,
|
||||
request_data: dict,
|
||||
proxy_logging_obj: ProxyLogging,
|
||||
):
|
||||
verbose_proxy_logger.debug("inside generator")
|
||||
try:
|
||||
time.time()
|
||||
async for chunk in response:
|
||||
verbose_proxy_logger.debug(
|
||||
"async_data_generator: received streaming chunk - {}".format(chunk)
|
||||
)
|
||||
### CALL HOOKS ### - modify outgoing data
|
||||
chunk = await proxy_logging_obj.async_post_call_streaming_hook(
|
||||
user_api_key_dict=user_api_key_dict, response=chunk
|
||||
)
|
||||
|
||||
yield chunk
|
||||
except Exception as e:
|
||||
verbose_proxy_logger.exception(
|
||||
"litellm.proxy.proxy_server.async_data_generator(): Exception occured - {}".format(
|
||||
str(e)
|
||||
)
|
||||
)
|
||||
await proxy_logging_obj.post_call_failure_hook(
|
||||
user_api_key_dict=user_api_key_dict,
|
||||
original_exception=e,
|
||||
request_data=request_data,
|
||||
)
|
||||
verbose_proxy_logger.debug(
|
||||
f"\033[1;31mAn error occurred: {e}\n\n Debug this by setting `--debug`, e.g. `litellm --model gpt-3.5-turbo --debug`"
|
||||
)
|
||||
|
||||
if isinstance(e, HTTPException):
|
||||
raise e
|
||||
else:
|
||||
error_traceback = traceback.format_exc()
|
||||
error_msg = f"{str(e)}\n\n{error_traceback}"
|
||||
|
||||
proxy_exception = ProxyException(
|
||||
message=getattr(e, "message", error_msg),
|
||||
type=getattr(e, "type", "None"),
|
||||
param=getattr(e, "param", "None"),
|
||||
code=getattr(e, "status_code", 500),
|
||||
)
|
||||
error_returned = json.dumps({"error": proxy_exception.to_dict()})
|
||||
yield f"data: {error_returned}\n\n"
|
||||
|
||||
|
||||
@router.post(
|
||||
"/v1/messages",
|
||||
tags=["[beta] Anthropic `/v1/messages`"],
|
||||
dependencies=[Depends(user_api_key_auth)],
|
||||
include_in_schema=False,
|
||||
)
|
||||
async def anthropic_response( # noqa: PLR0915
|
||||
fastapi_response: Response,
|
||||
request: Request,
|
||||
user_api_key_dict: UserAPIKeyAuth = Depends(user_api_key_auth),
|
||||
):
|
||||
"""
|
||||
Use `{PROXY_BASE_URL}/anthropic/v1/messages` instead - [Docs](https://docs.litellm.ai/docs/anthropic_completion).
|
||||
|
||||
This was a BETA endpoint that calls 100+ LLMs in the anthropic format.
|
||||
"""
|
||||
from litellm.proxy.proxy_server import (
|
||||
general_settings,
|
||||
get_custom_headers,
|
||||
llm_router,
|
||||
proxy_config,
|
||||
proxy_logging_obj,
|
||||
user_api_base,
|
||||
user_max_tokens,
|
||||
user_model,
|
||||
user_request_timeout,
|
||||
user_temperature,
|
||||
version,
|
||||
)
|
||||
|
||||
request_data = await _read_request_body(request=request)
|
||||
data: dict = {**request_data}
|
||||
try:
|
||||
data["model"] = (
|
||||
general_settings.get("completion_model", None) # server default
|
||||
or user_model # model name passed via cli args
|
||||
or data.get("model", None) # default passed in http request
|
||||
)
|
||||
if user_model:
|
||||
data["model"] = user_model
|
||||
|
||||
data = await add_litellm_data_to_request(
|
||||
data=data, # type: ignore
|
||||
request=request,
|
||||
general_settings=general_settings,
|
||||
user_api_key_dict=user_api_key_dict,
|
||||
version=version,
|
||||
proxy_config=proxy_config,
|
||||
)
|
||||
|
||||
# override with user settings, these are params passed via cli
|
||||
if user_temperature:
|
||||
data["temperature"] = user_temperature
|
||||
if user_request_timeout:
|
||||
data["request_timeout"] = user_request_timeout
|
||||
if user_max_tokens:
|
||||
data["max_tokens"] = user_max_tokens
|
||||
if user_api_base:
|
||||
data["api_base"] = user_api_base
|
||||
|
||||
### MODEL ALIAS MAPPING ###
|
||||
# check if model name in model alias map
|
||||
# get the actual model name
|
||||
if data["model"] in litellm.model_alias_map:
|
||||
data["model"] = litellm.model_alias_map[data["model"]]
|
||||
|
||||
### CALL HOOKS ### - modify incoming data before calling the model
|
||||
data = await proxy_logging_obj.pre_call_hook( # type: ignore
|
||||
user_api_key_dict=user_api_key_dict, data=data, call_type="text_completion"
|
||||
)
|
||||
|
||||
### ROUTE THE REQUESTs ###
|
||||
router_model_names = llm_router.model_names if llm_router is not None else []
|
||||
|
||||
# skip router if user passed their key
|
||||
if (
|
||||
llm_router is not None and data["model"] in router_model_names
|
||||
): # model in router model list
|
||||
llm_response = asyncio.create_task(llm_router.aanthropic_messages(**data))
|
||||
elif (
|
||||
llm_router is not None
|
||||
and llm_router.model_group_alias is not None
|
||||
and data["model"] in llm_router.model_group_alias
|
||||
): # model set in model_group_alias
|
||||
llm_response = asyncio.create_task(llm_router.aanthropic_messages(**data))
|
||||
elif (
|
||||
llm_router is not None and data["model"] in llm_router.deployment_names
|
||||
): # model in router deployments, calling a specific deployment on the router
|
||||
llm_response = asyncio.create_task(
|
||||
llm_router.aanthropic_messages(**data, specific_deployment=True)
|
||||
)
|
||||
elif (
|
||||
llm_router is not None and data["model"] in llm_router.get_model_ids()
|
||||
): # model in router model list
|
||||
llm_response = asyncio.create_task(llm_router.aanthropic_messages(**data))
|
||||
elif (
|
||||
llm_router is not None
|
||||
and data["model"] not in router_model_names
|
||||
and (
|
||||
llm_router.default_deployment is not None
|
||||
or len(llm_router.pattern_router.patterns) > 0
|
||||
)
|
||||
): # model in router deployments, calling a specific deployment on the router
|
||||
llm_response = asyncio.create_task(llm_router.aanthropic_messages(**data))
|
||||
elif user_model is not None: # `litellm --model <your-model-name>`
|
||||
llm_response = asyncio.create_task(litellm.anthropic_messages(**data))
|
||||
else:
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_400_BAD_REQUEST,
|
||||
detail={
|
||||
"error": "completion: Invalid model name passed in model="
|
||||
+ data.get("model", "")
|
||||
},
|
||||
)
|
||||
|
||||
# Await the llm_response task
|
||||
response = await llm_response
|
||||
|
||||
hidden_params = getattr(response, "_hidden_params", {}) or {}
|
||||
model_id = hidden_params.get("model_id", None) or ""
|
||||
cache_key = hidden_params.get("cache_key", None) or ""
|
||||
api_base = hidden_params.get("api_base", None) or ""
|
||||
response_cost = hidden_params.get("response_cost", None) or ""
|
||||
|
||||
### ALERTING ###
|
||||
asyncio.create_task(
|
||||
proxy_logging_obj.update_request_status(
|
||||
litellm_call_id=data.get("litellm_call_id", ""), status="success"
|
||||
)
|
||||
)
|
||||
|
||||
verbose_proxy_logger.debug("final response: %s", response)
|
||||
|
||||
fastapi_response.headers.update(
|
||||
get_custom_headers(
|
||||
user_api_key_dict=user_api_key_dict,
|
||||
model_id=model_id,
|
||||
cache_key=cache_key,
|
||||
api_base=api_base,
|
||||
version=version,
|
||||
response_cost=response_cost,
|
||||
request_data=data,
|
||||
hidden_params=hidden_params,
|
||||
)
|
||||
)
|
||||
|
||||
if (
|
||||
"stream" in data and data["stream"] is True
|
||||
): # use generate_responses to stream responses
|
||||
selected_data_generator = async_data_generator_anthropic(
|
||||
response=response,
|
||||
user_api_key_dict=user_api_key_dict,
|
||||
request_data=data,
|
||||
proxy_logging_obj=proxy_logging_obj,
|
||||
)
|
||||
|
||||
return StreamingResponse(
|
||||
selected_data_generator, # type: ignore
|
||||
media_type="text/event-stream",
|
||||
)
|
||||
|
||||
verbose_proxy_logger.info("\nResponse from Litellm:\n{}".format(response))
|
||||
return response
|
||||
except Exception as e:
|
||||
await proxy_logging_obj.post_call_failure_hook(
|
||||
user_api_key_dict=user_api_key_dict, original_exception=e, request_data=data
|
||||
)
|
||||
verbose_proxy_logger.exception(
|
||||
"litellm.proxy.proxy_server.anthropic_response(): Exception occured - {}".format(
|
||||
str(e)
|
||||
)
|
||||
)
|
||||
error_msg = f"{str(e)}"
|
||||
raise ProxyException(
|
||||
message=getattr(e, "message", error_msg),
|
||||
type=getattr(e, "type", "None"),
|
||||
param=getattr(e, "param", "None"),
|
||||
code=getattr(e, "status_code", 500),
|
||||
)
|
|
@ -786,6 +786,13 @@ async def _user_api_key_auth_builder( # noqa: PLR0915
|
|||
)
|
||||
valid_token = None
|
||||
|
||||
if valid_token is None:
|
||||
raise Exception(
|
||||
"Invalid proxy server token passed. Received API Key (hashed)={}. Unable to find token in cache or `LiteLLM_VerificationTokenTable`".format(
|
||||
api_key
|
||||
)
|
||||
)
|
||||
|
||||
user_obj: Optional[LiteLLM_UserTable] = None
|
||||
valid_token_dict: dict = {}
|
||||
if valid_token is not None:
|
||||
|
|
|
@ -2,10 +2,10 @@
|
|||
|
||||
# /v1/batches Endpoints
|
||||
|
||||
import asyncio
|
||||
|
||||
######################################################################
|
||||
from typing import Dict, Optional
|
||||
import asyncio
|
||||
from typing import Dict, Optional, cast
|
||||
|
||||
from fastapi import APIRouter, Depends, HTTPException, Path, Request, Response
|
||||
|
||||
|
@ -199,8 +199,11 @@ async def retrieve_batch(
|
|||
```
|
||||
"""
|
||||
from litellm.proxy.proxy_server import (
|
||||
add_litellm_data_to_request,
|
||||
general_settings,
|
||||
get_custom_headers,
|
||||
llm_router,
|
||||
proxy_config,
|
||||
proxy_logging_obj,
|
||||
version,
|
||||
)
|
||||
|
@ -212,6 +215,23 @@ async def retrieve_batch(
|
|||
batch_id=batch_id,
|
||||
)
|
||||
|
||||
data = cast(dict, _retrieve_batch_request)
|
||||
|
||||
# setup logging
|
||||
data["litellm_call_id"] = request.headers.get(
|
||||
"x-litellm-call-id", str(uuid.uuid4())
|
||||
)
|
||||
|
||||
# Include original request and headers in the data
|
||||
data = await add_litellm_data_to_request(
|
||||
data=data,
|
||||
request=request,
|
||||
general_settings=general_settings,
|
||||
user_api_key_dict=user_api_key_dict,
|
||||
version=version,
|
||||
proxy_config=proxy_config,
|
||||
)
|
||||
|
||||
if litellm.enable_loadbalancing_on_batch_endpoints is True:
|
||||
if llm_router is None:
|
||||
raise HTTPException(
|
||||
|
@ -221,7 +241,7 @@ async def retrieve_batch(
|
|||
},
|
||||
)
|
||||
|
||||
response = await llm_router.aretrieve_batch(**_retrieve_batch_request) # type: ignore
|
||||
response = await llm_router.aretrieve_batch(**data) # type: ignore
|
||||
else:
|
||||
custom_llm_provider = (
|
||||
provider
|
||||
|
@ -229,7 +249,7 @@ async def retrieve_batch(
|
|||
or "openai"
|
||||
)
|
||||
response = await litellm.aretrieve_batch(
|
||||
custom_llm_provider=custom_llm_provider, **_retrieve_batch_request # type: ignore
|
||||
custom_llm_provider=custom_llm_provider, **data # type: ignore
|
||||
)
|
||||
|
||||
### ALERTING ###
|
||||
|
|
|
@ -1,5 +1,6 @@
|
|||
import base64
|
||||
import os
|
||||
from typing import Optional
|
||||
|
||||
from litellm._logging import verbose_proxy_logger
|
||||
|
||||
|
@ -19,9 +20,9 @@ def _get_salt_key():
|
|||
return salt_key
|
||||
|
||||
|
||||
def encrypt_value_helper(value: str):
|
||||
def encrypt_value_helper(value: str, new_encryption_key: Optional[str] = None):
|
||||
|
||||
signing_key = _get_salt_key()
|
||||
signing_key = new_encryption_key or _get_salt_key()
|
||||
|
||||
try:
|
||||
if isinstance(value, str):
|
||||
|
|
|
@ -4,6 +4,26 @@ model_list:
|
|||
model: openai/fake
|
||||
api_key: fake-key
|
||||
api_base: https://exampleopenaiendpoint-production.up.railway.app/
|
||||
- model_name: claude-3-5-sonnet-20241022
|
||||
litellm_params:
|
||||
model: anthropic/claude-3-5-sonnet-20241022
|
||||
api_key: os.environ/ANTHROPIC_API_KEY
|
||||
- model_name: claude-special-alias
|
||||
litellm_params:
|
||||
model: anthropic/claude-3-haiku-20240307
|
||||
api_key: os.environ/ANTHROPIC_API_KEY
|
||||
- model_name: claude-3-5-sonnet-20241022
|
||||
litellm_params:
|
||||
model: anthropic/claude-3-5-sonnet-20241022
|
||||
api_key: os.environ/ANTHROPIC_API_KEY
|
||||
- model_name: claude-3-7-sonnet-20250219
|
||||
litellm_params:
|
||||
model: anthropic/claude-3-7-sonnet-20250219
|
||||
api_key: os.environ/ANTHROPIC_API_KEY
|
||||
- model_name: anthropic/*
|
||||
litellm_params:
|
||||
model: anthropic/*
|
||||
api_key: os.environ/ANTHROPIC_API_KEY
|
||||
general_settings:
|
||||
master_key: sk-1234
|
||||
custom_auth: custom_auth_basic.user_api_key_auth
|
|
@ -1,87 +0,0 @@
|
|||
"""
|
||||
Runs when LLM Exceptions occur on LiteLLM Proxy
|
||||
"""
|
||||
|
||||
import copy
|
||||
import json
|
||||
import uuid
|
||||
|
||||
import litellm
|
||||
from litellm.proxy._types import LiteLLM_ErrorLogs
|
||||
|
||||
|
||||
async def _PROXY_failure_handler(
|
||||
kwargs, # kwargs to completion
|
||||
completion_response: litellm.ModelResponse, # response from completion
|
||||
start_time=None,
|
||||
end_time=None, # start/end time for completion
|
||||
):
|
||||
"""
|
||||
Async Failure Handler - runs when LLM Exceptions occur on LiteLLM Proxy.
|
||||
This function logs the errors to the Prisma DB
|
||||
|
||||
Can be disabled by setting the following on proxy_config.yaml:
|
||||
```yaml
|
||||
general_settings:
|
||||
disable_error_logs: True
|
||||
```
|
||||
|
||||
"""
|
||||
from litellm._logging import verbose_proxy_logger
|
||||
from litellm.proxy.proxy_server import general_settings, prisma_client
|
||||
|
||||
if general_settings.get("disable_error_logs") is True:
|
||||
return
|
||||
|
||||
if prisma_client is not None:
|
||||
verbose_proxy_logger.debug(
|
||||
"inside _PROXY_failure_handler kwargs=", extra=kwargs
|
||||
)
|
||||
|
||||
_exception = kwargs.get("exception")
|
||||
_exception_type = _exception.__class__.__name__
|
||||
_model = kwargs.get("model", None)
|
||||
|
||||
_optional_params = kwargs.get("optional_params", {})
|
||||
_optional_params = copy.deepcopy(_optional_params)
|
||||
|
||||
for k, v in _optional_params.items():
|
||||
v = str(v)
|
||||
v = v[:100]
|
||||
|
||||
_status_code = "500"
|
||||
try:
|
||||
_status_code = str(_exception.status_code)
|
||||
except Exception:
|
||||
# Don't let this fail logging the exception to the dB
|
||||
pass
|
||||
|
||||
_litellm_params = kwargs.get("litellm_params", {}) or {}
|
||||
_metadata = _litellm_params.get("metadata", {}) or {}
|
||||
_model_id = _metadata.get("model_info", {}).get("id", "")
|
||||
_model_group = _metadata.get("model_group", "")
|
||||
api_base = litellm.get_api_base(model=_model, optional_params=_litellm_params)
|
||||
_exception_string = str(_exception)
|
||||
|
||||
error_log = LiteLLM_ErrorLogs(
|
||||
request_id=str(uuid.uuid4()),
|
||||
model_group=_model_group,
|
||||
model_id=_model_id,
|
||||
litellm_model_name=kwargs.get("model"),
|
||||
request_kwargs=_optional_params,
|
||||
api_base=api_base,
|
||||
exception_type=_exception_type,
|
||||
status_code=_status_code,
|
||||
exception_string=_exception_string,
|
||||
startTime=kwargs.get("start_time"),
|
||||
endTime=kwargs.get("end_time"),
|
||||
)
|
||||
|
||||
error_log_dict = error_log.model_dump()
|
||||
error_log_dict["request_kwargs"] = json.dumps(error_log_dict["request_kwargs"])
|
||||
|
||||
await prisma_client.db.litellm_errorlogs.create(
|
||||
data=error_log_dict # type: ignore
|
||||
)
|
||||
|
||||
pass
|
|
@ -34,6 +34,9 @@ class _ProxyDBLogger(CustomLogger):
|
|||
):
|
||||
from litellm.proxy.proxy_server import update_database
|
||||
|
||||
if _ProxyDBLogger._should_track_errors_in_db() is False:
|
||||
return
|
||||
|
||||
_metadata = dict(
|
||||
StandardLoggingUserAPIKeyMetadata(
|
||||
user_api_key_hash=user_api_key_dict.api_key,
|
||||
|
@ -202,6 +205,21 @@ class _ProxyDBLogger(CustomLogger):
|
|||
"Error in tracking cost callback - %s", str(e)
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
def _should_track_errors_in_db():
|
||||
"""
|
||||
Returns True if errors should be tracked in the database
|
||||
|
||||
By default, errors are tracked in the database
|
||||
|
||||
If users want to disable error tracking, they can set the disable_error_logs flag in the general_settings
|
||||
"""
|
||||
from litellm.proxy.proxy_server import general_settings
|
||||
|
||||
if general_settings.get("disable_error_logs") is True:
|
||||
return False
|
||||
return
|
||||
|
||||
|
||||
def _should_track_cost_callback(
|
||||
user_api_key: Optional[str],
|
||||
|
|
|
@ -102,11 +102,15 @@ def convert_key_logging_metadata_to_callback(
|
|||
|
||||
if data.callback_name not in team_callback_settings_obj.failure_callback:
|
||||
team_callback_settings_obj.failure_callback.append(data.callback_name)
|
||||
elif data.callback_type == "success_and_failure":
|
||||
elif (
|
||||
not data.callback_type or data.callback_type == "success_and_failure"
|
||||
): # assume 'success_and_failure' = litellm.callbacks
|
||||
if team_callback_settings_obj.success_callback is None:
|
||||
team_callback_settings_obj.success_callback = []
|
||||
if team_callback_settings_obj.failure_callback is None:
|
||||
team_callback_settings_obj.failure_callback = []
|
||||
if team_callback_settings_obj.callbacks is None:
|
||||
team_callback_settings_obj.callbacks = []
|
||||
|
||||
if data.callback_name not in team_callback_settings_obj.success_callback:
|
||||
team_callback_settings_obj.success_callback.append(data.callback_name)
|
||||
|
@ -114,6 +118,9 @@ def convert_key_logging_metadata_to_callback(
|
|||
if data.callback_name not in team_callback_settings_obj.failure_callback:
|
||||
team_callback_settings_obj.failure_callback.append(data.callback_name)
|
||||
|
||||
if data.callback_name not in team_callback_settings_obj.callbacks:
|
||||
team_callback_settings_obj.callbacks.append(data.callback_name)
|
||||
|
||||
for var, value in data.callback_vars.items():
|
||||
if team_callback_settings_obj.callback_vars is None:
|
||||
team_callback_settings_obj.callback_vars = {}
|
||||
|
|
|
@ -1,4 +1,12 @@
|
|||
from litellm.proxy._types import LiteLLM_TeamTable, UserAPIKeyAuth
|
||||
from typing import Any, Union
|
||||
|
||||
from litellm.proxy._types import (
|
||||
GenerateKeyRequest,
|
||||
LiteLLM_ManagementEndpoint_MetadataFields_Premium,
|
||||
LiteLLM_TeamTable,
|
||||
UserAPIKeyAuth,
|
||||
)
|
||||
from litellm.proxy.utils import _premium_user_check
|
||||
|
||||
|
||||
def _is_user_team_admin(
|
||||
|
@ -12,3 +20,22 @@ def _is_user_team_admin(
|
|||
return True
|
||||
|
||||
return False
|
||||
|
||||
|
||||
def _set_object_metadata_field(
|
||||
object_data: Union[LiteLLM_TeamTable, GenerateKeyRequest],
|
||||
field_name: str,
|
||||
value: Any,
|
||||
) -> None:
|
||||
"""
|
||||
Helper function to set metadata fields that require premium user checks
|
||||
|
||||
Args:
|
||||
object_data: The team data object to modify
|
||||
field_name: Name of the metadata field to set
|
||||
value: Value to set for the field
|
||||
"""
|
||||
if field_name in LiteLLM_ManagementEndpoint_MetadataFields_Premium:
|
||||
_premium_user_check()
|
||||
object_data.metadata = object_data.metadata or {}
|
||||
object_data.metadata[field_name] = value
|
||||
|
|
|
@ -739,6 +739,43 @@ async def user_update(
|
|||
)
|
||||
|
||||
|
||||
async def get_user_key_counts(
|
||||
prisma_client,
|
||||
user_ids: Optional[List[str]] = None,
|
||||
):
|
||||
"""
|
||||
Helper function to get the count of keys for each user using Prisma's count method.
|
||||
|
||||
Args:
|
||||
prisma_client: The Prisma client instance
|
||||
user_ids: List of user IDs to get key counts for
|
||||
|
||||
Returns:
|
||||
Dictionary mapping user_id to key count
|
||||
"""
|
||||
from litellm.constants import UI_SESSION_TOKEN_TEAM_ID
|
||||
|
||||
if not user_ids or len(user_ids) == 0:
|
||||
return {}
|
||||
|
||||
result = {}
|
||||
|
||||
# Get count for each user_id individually
|
||||
for user_id in user_ids:
|
||||
count = await prisma_client.db.litellm_verificationtoken.count(
|
||||
where={
|
||||
"user_id": user_id,
|
||||
"OR": [
|
||||
{"team_id": None},
|
||||
{"team_id": {"not": UI_SESSION_TOKEN_TEAM_ID}},
|
||||
],
|
||||
}
|
||||
)
|
||||
result[user_id] = count
|
||||
|
||||
return result
|
||||
|
||||
|
||||
@router.get(
|
||||
"/user/get_users",
|
||||
tags=["Internal User management"],
|
||||
|
@ -830,14 +867,9 @@ async def get_users(
|
|||
|
||||
# Get key count for each user
|
||||
if users is not None:
|
||||
user_keys = await prisma_client.db.litellm_verificationtoken.group_by(
|
||||
by=["user_id"],
|
||||
count={"user_id": True},
|
||||
where={"user_id": {"in": [user.user_id for user in users]}},
|
||||
user_key_counts = await get_user_key_counts(
|
||||
prisma_client, [user.user_id for user in users]
|
||||
)
|
||||
user_key_counts = {
|
||||
item["user_id"]: item["_count"]["user_id"] for item in user_keys
|
||||
}
|
||||
else:
|
||||
user_key_counts = {}
|
||||
|
||||
|
|
|
@ -35,15 +35,24 @@ from litellm.proxy.auth.auth_checks import (
|
|||
)
|
||||
from litellm.proxy.auth.user_api_key_auth import user_api_key_auth
|
||||
from litellm.proxy.hooks.key_management_event_hooks import KeyManagementEventHooks
|
||||
from litellm.proxy.management_endpoints.common_utils import _is_user_team_admin
|
||||
from litellm.proxy.management_endpoints.common_utils import (
|
||||
_is_user_team_admin,
|
||||
_set_object_metadata_field,
|
||||
)
|
||||
from litellm.proxy.management_endpoints.model_management_endpoints import (
|
||||
_add_model_to_db,
|
||||
)
|
||||
from litellm.proxy.management_helpers.utils import management_endpoint_wrapper
|
||||
from litellm.proxy.spend_tracking.spend_tracking_utils import _is_master_key
|
||||
from litellm.proxy.utils import (
|
||||
PrismaClient,
|
||||
_hash_token_if_needed,
|
||||
handle_exception_on_proxy,
|
||||
jsonify_object,
|
||||
)
|
||||
from litellm.router import Router
|
||||
from litellm.secret_managers.main import get_secret
|
||||
from litellm.types.router import Deployment
|
||||
from litellm.types.utils import (
|
||||
BudgetConfig,
|
||||
PersonalUIKeyGenerationConfig,
|
||||
|
@ -507,6 +516,17 @@ async def generate_key_fn( # noqa: PLR0915
|
|||
}
|
||||
)
|
||||
_budget_id = getattr(_budget, "budget_id", None)
|
||||
|
||||
# ADD METADATA FIELDS
|
||||
# Set Management Endpoint Metadata Fields
|
||||
for field in LiteLLM_ManagementEndpoint_MetadataFields_Premium:
|
||||
if getattr(data, field) is not None:
|
||||
_set_object_metadata_field(
|
||||
object_data=data,
|
||||
field_name=field,
|
||||
value=getattr(data, field),
|
||||
)
|
||||
|
||||
data_json = data.model_dump(exclude_unset=True, exclude_none=True) # type: ignore
|
||||
|
||||
# if we get max_budget passed to /key/generate, then use it as key_max_budget. Since generate_key_helper_fn is used to make new users
|
||||
|
@ -531,7 +551,8 @@ async def generate_key_fn( # noqa: PLR0915
|
|||
f"Only premium users can add tags to keys. {CommonProxyErrors.not_premium_user.value}"
|
||||
)
|
||||
|
||||
if data_json["metadata"] is None:
|
||||
_metadata = data_json.get("metadata")
|
||||
if not _metadata:
|
||||
data_json["metadata"] = {"tags": data_json["tags"]}
|
||||
else:
|
||||
data_json["metadata"]["tags"] = data_json["tags"]
|
||||
|
@ -1510,14 +1531,98 @@ async def delete_key_aliases(
|
|||
)
|
||||
|
||||
|
||||
async def _rotate_master_key(
|
||||
prisma_client: PrismaClient,
|
||||
user_api_key_dict: UserAPIKeyAuth,
|
||||
current_master_key: str,
|
||||
new_master_key: str,
|
||||
) -> None:
|
||||
"""
|
||||
Rotate the master key
|
||||
|
||||
1. Get the values from the DB
|
||||
- Get models from DB
|
||||
- Get config from DB
|
||||
2. Decrypt the values
|
||||
- ModelTable
|
||||
- [{"model_name": "str", "litellm_params": {}}]
|
||||
- ConfigTable
|
||||
3. Encrypt the values with the new master key
|
||||
4. Update the values in the DB
|
||||
"""
|
||||
from litellm.proxy.proxy_server import proxy_config
|
||||
|
||||
try:
|
||||
models: Optional[List] = (
|
||||
await prisma_client.db.litellm_proxymodeltable.find_many()
|
||||
)
|
||||
except Exception:
|
||||
models = None
|
||||
# 2. process model table
|
||||
if models:
|
||||
decrypted_models = proxy_config.decrypt_model_list_from_db(new_models=models)
|
||||
verbose_proxy_logger.info(
|
||||
"ABLE TO DECRYPT MODELS - len(decrypted_models): %s", len(decrypted_models)
|
||||
)
|
||||
new_models = []
|
||||
for model in decrypted_models:
|
||||
new_model = await _add_model_to_db(
|
||||
model_params=Deployment(**model),
|
||||
user_api_key_dict=user_api_key_dict,
|
||||
prisma_client=prisma_client,
|
||||
new_encryption_key=new_master_key,
|
||||
should_create_model_in_db=False,
|
||||
)
|
||||
if new_model:
|
||||
new_models.append(jsonify_object(new_model.model_dump()))
|
||||
verbose_proxy_logger.info("Resetting proxy model table")
|
||||
await prisma_client.db.litellm_proxymodeltable.delete_many()
|
||||
verbose_proxy_logger.info("Creating %s models", len(new_models))
|
||||
await prisma_client.db.litellm_proxymodeltable.create_many(
|
||||
data=new_models,
|
||||
)
|
||||
# 3. process config table
|
||||
try:
|
||||
config = await prisma_client.db.litellm_config.find_many()
|
||||
except Exception:
|
||||
config = None
|
||||
|
||||
if config:
|
||||
"""If environment_variables is found, decrypt it and encrypt it with the new master key"""
|
||||
environment_variables_dict = {}
|
||||
for c in config:
|
||||
if c.param_name == "environment_variables":
|
||||
environment_variables_dict = c.param_value
|
||||
|
||||
if environment_variables_dict:
|
||||
decrypted_env_vars = proxy_config._decrypt_and_set_db_env_variables(
|
||||
environment_variables=environment_variables_dict
|
||||
)
|
||||
encrypted_env_vars = proxy_config._encrypt_env_variables(
|
||||
environment_variables=decrypted_env_vars,
|
||||
new_encryption_key=new_master_key,
|
||||
)
|
||||
|
||||
if encrypted_env_vars:
|
||||
await prisma_client.db.litellm_config.update(
|
||||
where={"param_name": "environment_variables"},
|
||||
data={"param_value": jsonify_object(encrypted_env_vars)},
|
||||
)
|
||||
|
||||
|
||||
@router.post(
|
||||
"/key/{key:path}/regenerate",
|
||||
tags=["key management"],
|
||||
dependencies=[Depends(user_api_key_auth)],
|
||||
)
|
||||
@router.post(
|
||||
"/key/regenerate",
|
||||
tags=["key management"],
|
||||
dependencies=[Depends(user_api_key_auth)],
|
||||
)
|
||||
@management_endpoint_wrapper
|
||||
async def regenerate_key_fn(
|
||||
key: str,
|
||||
key: Optional[str] = None,
|
||||
data: Optional[RegenerateKeyRequest] = None,
|
||||
user_api_key_dict: UserAPIKeyAuth = Depends(user_api_key_auth),
|
||||
litellm_changed_by: Optional[str] = Header(
|
||||
|
@ -1575,6 +1680,7 @@ async def regenerate_key_fn(
|
|||
|
||||
from litellm.proxy.proxy_server import (
|
||||
hash_token,
|
||||
master_key,
|
||||
premium_user,
|
||||
prisma_client,
|
||||
proxy_logging_obj,
|
||||
|
@ -1587,7 +1693,9 @@ async def regenerate_key_fn(
|
|||
)
|
||||
|
||||
# Check if key exists, raise exception if key is not in the DB
|
||||
|
||||
key = data.key if data and data.key else key
|
||||
if not key:
|
||||
raise HTTPException(status_code=400, detail={"error": "No key passed in."})
|
||||
### 1. Create New copy that is duplicate of existing key
|
||||
######################################################################
|
||||
|
||||
|
@ -1602,6 +1710,27 @@ async def regenerate_key_fn(
|
|||
detail={"error": "DB not connected. prisma_client is None"},
|
||||
)
|
||||
|
||||
_is_master_key_valid = _is_master_key(api_key=key, _master_key=master_key)
|
||||
|
||||
if master_key is not None and data and _is_master_key_valid:
|
||||
if data.new_master_key is None:
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_400_BAD_REQUEST,
|
||||
detail={"error": "New master key is required."},
|
||||
)
|
||||
await _rotate_master_key(
|
||||
prisma_client=prisma_client,
|
||||
user_api_key_dict=user_api_key_dict,
|
||||
current_master_key=master_key,
|
||||
new_master_key=data.new_master_key,
|
||||
)
|
||||
return GenerateKeyResponse(
|
||||
key=data.new_master_key,
|
||||
token=data.new_master_key,
|
||||
key_name=data.new_master_key,
|
||||
expires=None,
|
||||
)
|
||||
|
||||
if "sk" not in key:
|
||||
hashed_api_key = key
|
||||
else:
|
||||
|
@ -1683,6 +1812,7 @@ async def regenerate_key_fn(
|
|||
|
||||
return response
|
||||
except Exception as e:
|
||||
verbose_proxy_logger.exception("Error regenerating key: %s", e)
|
||||
raise handle_exception_on_proxy(e)
|
||||
|
||||
|
||||
|
|
|
@ -21,6 +21,7 @@ from litellm._logging import verbose_proxy_logger
|
|||
from litellm.constants import LITELLM_PROXY_ADMIN_NAME
|
||||
from litellm.proxy._types import (
|
||||
CommonProxyErrors,
|
||||
LiteLLM_ProxyModelTable,
|
||||
LitellmUserRoles,
|
||||
PrismaCompatibleUpdateDBModel,
|
||||
ProxyErrorTypes,
|
||||
|
@ -227,12 +228,16 @@ async def _add_model_to_db(
|
|||
model_params: Deployment,
|
||||
user_api_key_dict: UserAPIKeyAuth,
|
||||
prisma_client: PrismaClient,
|
||||
):
|
||||
new_encryption_key: Optional[str] = None,
|
||||
should_create_model_in_db: bool = True,
|
||||
) -> Optional[LiteLLM_ProxyModelTable]:
|
||||
# encrypt litellm params #
|
||||
_litellm_params_dict = model_params.litellm_params.dict(exclude_none=True)
|
||||
_orignal_litellm_model_name = model_params.litellm_params.model
|
||||
for k, v in _litellm_params_dict.items():
|
||||
encrypted_value = encrypt_value_helper(value=v)
|
||||
encrypted_value = encrypt_value_helper(
|
||||
value=v, new_encryption_key=new_encryption_key
|
||||
)
|
||||
model_params.litellm_params[k] = encrypted_value
|
||||
_data: dict = {
|
||||
"model_id": model_params.model_info.id,
|
||||
|
@ -246,9 +251,12 @@ async def _add_model_to_db(
|
|||
}
|
||||
if model_params.model_info.id is not None:
|
||||
_data["model_id"] = model_params.model_info.id
|
||||
if should_create_model_in_db:
|
||||
model_response = await prisma_client.db.litellm_proxymodeltable.create(
|
||||
data=_data # type: ignore
|
||||
)
|
||||
else:
|
||||
model_response = LiteLLM_ProxyModelTable(**_data)
|
||||
return model_response
|
||||
|
||||
|
||||
|
|
|
@ -14,7 +14,7 @@ import json
|
|||
import traceback
|
||||
import uuid
|
||||
from datetime import datetime, timedelta, timezone
|
||||
from typing import Any, List, Optional, Tuple, Union, cast
|
||||
from typing import List, Optional, Tuple, Union, cast
|
||||
|
||||
import fastapi
|
||||
from fastapi import APIRouter, Depends, Header, HTTPException, Request, status
|
||||
|
@ -57,7 +57,10 @@ from litellm.proxy.auth.auth_checks import (
|
|||
get_team_object,
|
||||
)
|
||||
from litellm.proxy.auth.user_api_key_auth import user_api_key_auth
|
||||
from litellm.proxy.management_endpoints.common_utils import _is_user_team_admin
|
||||
from litellm.proxy.management_endpoints.common_utils import (
|
||||
_is_user_team_admin,
|
||||
_set_object_metadata_field,
|
||||
)
|
||||
from litellm.proxy.management_helpers.utils import (
|
||||
add_new_member,
|
||||
management_endpoint_wrapper,
|
||||
|
@ -283,8 +286,8 @@ async def new_team( # noqa: PLR0915
|
|||
# Set Management Endpoint Metadata Fields
|
||||
for field in LiteLLM_ManagementEndpoint_MetadataFields_Premium:
|
||||
if getattr(data, field) is not None:
|
||||
_set_team_metadata_field(
|
||||
team_data=complete_team_data,
|
||||
_set_object_metadata_field(
|
||||
object_data=complete_team_data,
|
||||
field_name=field,
|
||||
value=getattr(data, field),
|
||||
)
|
||||
|
@ -1274,9 +1277,13 @@ async def team_info(
|
|||
)
|
||||
|
||||
try:
|
||||
team_info: BaseModel = await prisma_client.db.litellm_teamtable.find_unique(
|
||||
team_info: Optional[BaseModel] = (
|
||||
await prisma_client.db.litellm_teamtable.find_unique(
|
||||
where={"team_id": team_id}
|
||||
)
|
||||
)
|
||||
if team_info is None:
|
||||
raise Exception
|
||||
except Exception:
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_404_NOT_FOUND,
|
||||
|
@ -1673,23 +1680,6 @@ def _update_team_metadata_field(updated_kv: dict, field_name: str) -> None:
|
|||
updated_kv["metadata"] = {field_name: _value}
|
||||
|
||||
|
||||
def _set_team_metadata_field(
|
||||
team_data: LiteLLM_TeamTable, field_name: str, value: Any
|
||||
) -> None:
|
||||
"""
|
||||
Helper function to set metadata fields that require premium user checks
|
||||
|
||||
Args:
|
||||
team_data: The team data object to modify
|
||||
field_name: Name of the metadata field to set
|
||||
value: Value to set for the field
|
||||
"""
|
||||
if field_name in LiteLLM_ManagementEndpoint_MetadataFields_Premium:
|
||||
_premium_user_check()
|
||||
team_data.metadata = team_data.metadata or {}
|
||||
team_data.metadata[field_name] = value
|
||||
|
||||
|
||||
@router.get(
|
||||
"/team/filter/ui",
|
||||
tags=["team management"],
|
||||
|
|