mirror of
https://github.com/BerriAI/litellm.git
synced 2025-04-26 03:04:13 +00:00
Litellm 04 05 2025 release notes (#9785)
* docs: update docs * docs: additional cleanup * docs(index.md): add initial links * docs: more doc updates * docs(index.md): add more links * docs(files.md): add gemini files API to docs * docs(index.md): add more docs * docs: more docs * docs: update docs
This commit is contained in:
parent
52b35cd809
commit
792ee079c2
29 changed files with 777 additions and 172 deletions
|
@ -1076,32 +1076,24 @@ print(response)
|
||||||
```
|
```
|
||||||
|
|
||||||
|
|
||||||
### Parallel Function calling
|
### Tool Calling / Function Calling
|
||||||
|
|
||||||
See a detailed walthrough of parallel function calling with litellm [here](https://docs.litellm.ai/docs/completion/function_call)
|
See a detailed walthrough of parallel function calling with litellm [here](https://docs.litellm.ai/docs/completion/function_call)
|
||||||
|
|
||||||
|
|
||||||
|
<Tabs>
|
||||||
|
<TabItem value="sdk" label="SDK">
|
||||||
|
|
||||||
```python
|
```python
|
||||||
# set Azure env variables
|
# set Azure env variables
|
||||||
import os
|
import os
|
||||||
|
import litellm
|
||||||
|
import json
|
||||||
|
|
||||||
os.environ['AZURE_API_KEY'] = "" # litellm reads AZURE_API_KEY from .env and sends the request
|
os.environ['AZURE_API_KEY'] = "" # litellm reads AZURE_API_KEY from .env and sends the request
|
||||||
os.environ['AZURE_API_BASE'] = "https://openai-gpt-4-test-v-1.openai.azure.com/"
|
os.environ['AZURE_API_BASE'] = "https://openai-gpt-4-test-v-1.openai.azure.com/"
|
||||||
os.environ['AZURE_API_VERSION'] = "2023-07-01-preview"
|
os.environ['AZURE_API_VERSION'] = "2023-07-01-preview"
|
||||||
|
|
||||||
import litellm
|
|
||||||
import json
|
|
||||||
# Example dummy function hard coded to return the same weather
|
|
||||||
# In production, this could be your backend API or an external API
|
|
||||||
def get_current_weather(location, unit="fahrenheit"):
|
|
||||||
"""Get the current weather in a given location"""
|
|
||||||
if "tokyo" in location.lower():
|
|
||||||
return json.dumps({"location": "Tokyo", "temperature": "10", "unit": "celsius"})
|
|
||||||
elif "san francisco" in location.lower():
|
|
||||||
return json.dumps({"location": "San Francisco", "temperature": "72", "unit": "fahrenheit"})
|
|
||||||
elif "paris" in location.lower():
|
|
||||||
return json.dumps({"location": "Paris", "temperature": "22", "unit": "celsius"})
|
|
||||||
else:
|
|
||||||
return json.dumps({"location": location, "temperature": "unknown"})
|
|
||||||
|
|
||||||
## Step 1: send the conversation and available functions to the model
|
|
||||||
messages = [{"role": "user", "content": "What's the weather like in San Francisco, Tokyo, and Paris?"}]
|
|
||||||
tools = [
|
tools = [
|
||||||
{
|
{
|
||||||
"type": "function",
|
"type": "function",
|
||||||
|
@ -1125,7 +1117,7 @@ tools = [
|
||||||
|
|
||||||
response = litellm.completion(
|
response = litellm.completion(
|
||||||
model="azure/chatgpt-functioncalling", # model = azure/<your-azure-deployment-name>
|
model="azure/chatgpt-functioncalling", # model = azure/<your-azure-deployment-name>
|
||||||
messages=messages,
|
messages=[{"role": "user", "content": "What's the weather like in San Francisco, Tokyo, and Paris?"}],
|
||||||
tools=tools,
|
tools=tools,
|
||||||
tool_choice="auto", # auto is default, but we'll be explicit
|
tool_choice="auto", # auto is default, but we'll be explicit
|
||||||
)
|
)
|
||||||
|
@ -1134,8 +1126,49 @@ response_message = response.choices[0].message
|
||||||
tool_calls = response.choices[0].message.tool_calls
|
tool_calls = response.choices[0].message.tool_calls
|
||||||
print("\nTool Choice:\n", tool_calls)
|
print("\nTool Choice:\n", tool_calls)
|
||||||
```
|
```
|
||||||
|
</TabItem>
|
||||||
|
<TabItem value="proxy" label="PROXY">
|
||||||
|
|
||||||
|
1. Setup config.yaml
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
model_list:
|
||||||
|
- model_name: azure-gpt-3.5
|
||||||
|
litellm_params:
|
||||||
|
model: azure/chatgpt-functioncalling
|
||||||
|
api_base: os.environ/AZURE_API_BASE
|
||||||
|
api_key: os.environ/AZURE_API_KEY
|
||||||
|
api_version: "2023-07-01-preview"
|
||||||
|
```
|
||||||
|
|
||||||
|
2. Start proxy
|
||||||
|
|
||||||
|
```bash
|
||||||
|
litellm --config config.yaml
|
||||||
|
```
|
||||||
|
|
||||||
|
3. Test it
|
||||||
|
|
||||||
|
```bash
|
||||||
|
curl -L -X POST 'http://localhost:4000/v1/chat/completions' \
|
||||||
|
-H 'Content-Type: application/json' \
|
||||||
|
-H 'Authorization: Bearer sk-1234' \
|
||||||
|
-d '{
|
||||||
|
"model": "azure-gpt-3.5",
|
||||||
|
"messages": [
|
||||||
|
{
|
||||||
|
"role": "user",
|
||||||
|
"content": "Hey, how'\''s it going? Thinking long and hard before replying - what is the meaning of the world and life itself"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}'
|
||||||
|
```
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
</TabItem>
|
||||||
|
</Tabs>
|
||||||
### Spend Tracking for Azure OpenAI Models (PROXY)
|
### Spend Tracking for Azure OpenAI Models (PROXY)
|
||||||
|
|
||||||
Set base model for cost tracking azure image-gen call
|
Set base model for cost tracking azure image-gen call
|
||||||
|
|
|
@ -1,7 +1,7 @@
|
||||||
import Tabs from '@theme/Tabs';
|
import Tabs from '@theme/Tabs';
|
||||||
import TabItem from '@theme/TabItem';
|
import TabItem from '@theme/TabItem';
|
||||||
|
|
||||||
# 🆕 Databricks
|
# Databricks
|
||||||
|
|
||||||
LiteLLM supports all models on Databricks
|
LiteLLM supports all models on Databricks
|
||||||
|
|
||||||
|
@ -154,7 +154,205 @@ response = completion(
|
||||||
temperature: 0.5
|
temperature: 0.5
|
||||||
```
|
```
|
||||||
|
|
||||||
## Passings Databricks specific params - 'instruction'
|
|
||||||
|
## Usage - Thinking / `reasoning_content`
|
||||||
|
|
||||||
|
LiteLLM translates OpenAI's `reasoning_effort` to Anthropic's `thinking` parameter. [Code](https://github.com/BerriAI/litellm/blob/23051d89dd3611a81617d84277059cd88b2df511/litellm/llms/anthropic/chat/transformation.py#L298)
|
||||||
|
|
||||||
|
| reasoning_effort | thinking |
|
||||||
|
| ---------------- | -------- |
|
||||||
|
| "low" | "budget_tokens": 1024 |
|
||||||
|
| "medium" | "budget_tokens": 2048 |
|
||||||
|
| "high" | "budget_tokens": 4096 |
|
||||||
|
|
||||||
|
|
||||||
|
Known Limitations:
|
||||||
|
- Support for passing thinking blocks back to Claude [Issue](https://github.com/BerriAI/litellm/issues/9790)
|
||||||
|
|
||||||
|
|
||||||
|
<Tabs>
|
||||||
|
<TabItem value="sdk" label="SDK">
|
||||||
|
|
||||||
|
```python
|
||||||
|
from litellm import completion
|
||||||
|
import os
|
||||||
|
|
||||||
|
# set ENV variables (can also be passed in to .completion() - e.g. `api_base`, `api_key`)
|
||||||
|
os.environ["DATABRICKS_API_KEY"] = "databricks key"
|
||||||
|
os.environ["DATABRICKS_API_BASE"] = "databricks base url"
|
||||||
|
|
||||||
|
resp = completion(
|
||||||
|
model="databricks/databricks-claude-3-7-sonnet",
|
||||||
|
messages=[{"role": "user", "content": "What is the capital of France?"}],
|
||||||
|
reasoning_effort="low",
|
||||||
|
)
|
||||||
|
|
||||||
|
```
|
||||||
|
|
||||||
|
</TabItem>
|
||||||
|
|
||||||
|
<TabItem value="proxy" label="PROXY">
|
||||||
|
|
||||||
|
1. Setup config.yaml
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
- model_name: claude-3-7-sonnet
|
||||||
|
litellm_params:
|
||||||
|
model: databricks/databricks-claude-3-7-sonnet
|
||||||
|
api_key: os.environ/DATABRICKS_API_KEY
|
||||||
|
api_base: os.environ/DATABRICKS_API_BASE
|
||||||
|
```
|
||||||
|
|
||||||
|
2. Start proxy
|
||||||
|
|
||||||
|
```bash
|
||||||
|
litellm --config /path/to/config.yaml
|
||||||
|
```
|
||||||
|
|
||||||
|
3. Test it!
|
||||||
|
|
||||||
|
```bash
|
||||||
|
curl http://0.0.0.0:4000/v1/chat/completions \
|
||||||
|
-H "Content-Type: application/json" \
|
||||||
|
-H "Authorization: Bearer <YOUR-LITELLM-KEY>" \
|
||||||
|
-d '{
|
||||||
|
"model": "claude-3-7-sonnet",
|
||||||
|
"messages": [{"role": "user", "content": "What is the capital of France?"}],
|
||||||
|
"reasoning_effort": "low"
|
||||||
|
}'
|
||||||
|
```
|
||||||
|
|
||||||
|
</TabItem>
|
||||||
|
</Tabs>
|
||||||
|
|
||||||
|
|
||||||
|
**Expected Response**
|
||||||
|
|
||||||
|
```python
|
||||||
|
ModelResponse(
|
||||||
|
id='chatcmpl-c542d76d-f675-4e87-8e5f-05855f5d0f5e',
|
||||||
|
created=1740470510,
|
||||||
|
model='claude-3-7-sonnet-20250219',
|
||||||
|
object='chat.completion',
|
||||||
|
system_fingerprint=None,
|
||||||
|
choices=[
|
||||||
|
Choices(
|
||||||
|
finish_reason='stop',
|
||||||
|
index=0,
|
||||||
|
message=Message(
|
||||||
|
content="The capital of France is Paris.",
|
||||||
|
role='assistant',
|
||||||
|
tool_calls=None,
|
||||||
|
function_call=None,
|
||||||
|
provider_specific_fields={
|
||||||
|
'citations': None,
|
||||||
|
'thinking_blocks': [
|
||||||
|
{
|
||||||
|
'type': 'thinking',
|
||||||
|
'thinking': 'The capital of France is Paris. This is a very straightforward factual question.',
|
||||||
|
'signature': 'EuYBCkQYAiJAy6...'
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
),
|
||||||
|
thinking_blocks=[
|
||||||
|
{
|
||||||
|
'type': 'thinking',
|
||||||
|
'thinking': 'The capital of France is Paris. This is a very straightforward factual question.',
|
||||||
|
'signature': 'EuYBCkQYAiJAy6AGB...'
|
||||||
|
}
|
||||||
|
],
|
||||||
|
reasoning_content='The capital of France is Paris. This is a very straightforward factual question.'
|
||||||
|
)
|
||||||
|
],
|
||||||
|
usage=Usage(
|
||||||
|
completion_tokens=68,
|
||||||
|
prompt_tokens=42,
|
||||||
|
total_tokens=110,
|
||||||
|
completion_tokens_details=None,
|
||||||
|
prompt_tokens_details=PromptTokensDetailsWrapper(
|
||||||
|
audio_tokens=None,
|
||||||
|
cached_tokens=0,
|
||||||
|
text_tokens=None,
|
||||||
|
image_tokens=None
|
||||||
|
),
|
||||||
|
cache_creation_input_tokens=0,
|
||||||
|
cache_read_input_tokens=0
|
||||||
|
)
|
||||||
|
)
|
||||||
|
```
|
||||||
|
|
||||||
|
### Pass `thinking` to Anthropic models
|
||||||
|
|
||||||
|
You can also pass the `thinking` parameter to Anthropic models.
|
||||||
|
|
||||||
|
|
||||||
|
You can also pass the `thinking` parameter to Anthropic models.
|
||||||
|
|
||||||
|
<Tabs>
|
||||||
|
<TabItem value="sdk" label="SDK">
|
||||||
|
|
||||||
|
```python
|
||||||
|
from litellm import completion
|
||||||
|
import os
|
||||||
|
|
||||||
|
# set ENV variables (can also be passed in to .completion() - e.g. `api_base`, `api_key`)
|
||||||
|
os.environ["DATABRICKS_API_KEY"] = "databricks key"
|
||||||
|
os.environ["DATABRICKS_API_BASE"] = "databricks base url"
|
||||||
|
|
||||||
|
response = litellm.completion(
|
||||||
|
model="databricks/databricks-claude-3-7-sonnet",
|
||||||
|
messages=[{"role": "user", "content": "What is the capital of France?"}],
|
||||||
|
thinking={"type": "enabled", "budget_tokens": 1024},
|
||||||
|
)
|
||||||
|
```
|
||||||
|
|
||||||
|
</TabItem>
|
||||||
|
<TabItem value="proxy" label="PROXY">
|
||||||
|
|
||||||
|
```bash
|
||||||
|
curl http://0.0.0.0:4000/v1/chat/completions \
|
||||||
|
-H "Content-Type: application/json" \
|
||||||
|
-H "Authorization: Bearer $LITELLM_KEY" \
|
||||||
|
-d '{
|
||||||
|
"model": "databricks/databricks-claude-3-7-sonnet",
|
||||||
|
"messages": [{"role": "user", "content": "What is the capital of France?"}],
|
||||||
|
"thinking": {"type": "enabled", "budget_tokens": 1024}
|
||||||
|
}'
|
||||||
|
```
|
||||||
|
|
||||||
|
</TabItem>
|
||||||
|
</Tabs>
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
## Supported Databricks Chat Completion Models
|
||||||
|
|
||||||
|
:::tip
|
||||||
|
|
||||||
|
**We support ALL Databricks models, just set `model=databricks/<any-model-on-databricks>` as a prefix when sending litellm requests**
|
||||||
|
|
||||||
|
:::
|
||||||
|
|
||||||
|
|
||||||
|
| Model Name | Command |
|
||||||
|
|----------------------------|------------------------------------------------------------------|
|
||||||
|
| databricks/databricks-claude-3-7-sonnet | `completion(model='databricks/databricks/databricks-claude-3-7-sonnet', messages=messages)` |
|
||||||
|
| databricks-meta-llama-3-1-70b-instruct | `completion(model='databricks/databricks-meta-llama-3-1-70b-instruct', messages=messages)` |
|
||||||
|
| databricks-meta-llama-3-1-405b-instruct | `completion(model='databricks/databricks-meta-llama-3-1-405b-instruct', messages=messages)` |
|
||||||
|
| databricks-dbrx-instruct | `completion(model='databricks/databricks-dbrx-instruct', messages=messages)` |
|
||||||
|
| databricks-meta-llama-3-70b-instruct | `completion(model='databricks/databricks-meta-llama-3-70b-instruct', messages=messages)` |
|
||||||
|
| databricks-llama-2-70b-chat | `completion(model='databricks/databricks-llama-2-70b-chat', messages=messages)` |
|
||||||
|
| databricks-mixtral-8x7b-instruct | `completion(model='databricks/databricks-mixtral-8x7b-instruct', messages=messages)` |
|
||||||
|
| databricks-mpt-30b-instruct | `completion(model='databricks/databricks-mpt-30b-instruct', messages=messages)` |
|
||||||
|
| databricks-mpt-7b-instruct | `completion(model='databricks/databricks-mpt-7b-instruct', messages=messages)` |
|
||||||
|
|
||||||
|
|
||||||
|
## Embedding Models
|
||||||
|
|
||||||
|
### Passing Databricks specific params - 'instruction'
|
||||||
|
|
||||||
For embedding models, databricks lets you pass in an additional param 'instruction'. [Full Spec](https://github.com/BerriAI/litellm/blob/43353c28b341df0d9992b45c6ce464222ebd7984/litellm/llms/databricks.py#L164)
|
For embedding models, databricks lets you pass in an additional param 'instruction'. [Full Spec](https://github.com/BerriAI/litellm/blob/43353c28b341df0d9992b45c6ce464222ebd7984/litellm/llms/databricks.py#L164)
|
||||||
|
|
||||||
|
@ -187,27 +385,6 @@ response = litellm.embedding(
|
||||||
instruction: "Represent this sentence for searching relevant passages:"
|
instruction: "Represent this sentence for searching relevant passages:"
|
||||||
```
|
```
|
||||||
|
|
||||||
|
|
||||||
## Supported Databricks Chat Completion Models
|
|
||||||
|
|
||||||
:::tip
|
|
||||||
|
|
||||||
**We support ALL Databricks models, just set `model=databricks/<any-model-on-databricks>` as a prefix when sending litellm requests**
|
|
||||||
|
|
||||||
:::
|
|
||||||
|
|
||||||
|
|
||||||
| Model Name | Command |
|
|
||||||
|----------------------------|------------------------------------------------------------------|
|
|
||||||
| databricks-meta-llama-3-1-70b-instruct | `completion(model='databricks/databricks-meta-llama-3-1-70b-instruct', messages=messages)` |
|
|
||||||
| databricks-meta-llama-3-1-405b-instruct | `completion(model='databricks/databricks-meta-llama-3-1-405b-instruct', messages=messages)` |
|
|
||||||
| databricks-dbrx-instruct | `completion(model='databricks/databricks-dbrx-instruct', messages=messages)` |
|
|
||||||
| databricks-meta-llama-3-70b-instruct | `completion(model='databricks/databricks-meta-llama-3-70b-instruct', messages=messages)` |
|
|
||||||
| databricks-llama-2-70b-chat | `completion(model='databricks/databricks-llama-2-70b-chat', messages=messages)` |
|
|
||||||
| databricks-mixtral-8x7b-instruct | `completion(model='databricks/databricks-mixtral-8x7b-instruct', messages=messages)` |
|
|
||||||
| databricks-mpt-30b-instruct | `completion(model='databricks/databricks-mpt-30b-instruct', messages=messages)` |
|
|
||||||
| databricks-mpt-7b-instruct | `completion(model='databricks/databricks-mpt-7b-instruct', messages=messages)` |
|
|
||||||
|
|
||||||
## Supported Databricks Embedding Models
|
## Supported Databricks Embedding Models
|
||||||
|
|
||||||
:::tip
|
:::tip
|
||||||
|
|
|
@ -887,3 +887,54 @@ response = await client.chat.completions.create(
|
||||||
|
|
||||||
</TabItem>
|
</TabItem>
|
||||||
</Tabs>
|
</Tabs>
|
||||||
|
|
||||||
|
## Image Generation
|
||||||
|
|
||||||
|
<Tabs>
|
||||||
|
<TabItem value="sdk" label="SDK">
|
||||||
|
|
||||||
|
```python
|
||||||
|
from litellm import completion
|
||||||
|
|
||||||
|
response = completion(
|
||||||
|
model="gemini/gemini-2.0-flash-exp-image-generation",
|
||||||
|
messages=[{"role": "user", "content": "Generate an image of a cat"}],
|
||||||
|
modalities=["image", "text"],
|
||||||
|
)
|
||||||
|
assert response.choices[0].message.content is not None # ".."
|
||||||
|
```
|
||||||
|
</TabItem>
|
||||||
|
<TabItem value="proxy" label="PROXY">
|
||||||
|
|
||||||
|
1. Setup config.yaml
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
model_list:
|
||||||
|
- model_name: gemini-2.0-flash-exp-image-generation
|
||||||
|
litellm_params:
|
||||||
|
model: gemini/gemini-2.0-flash-exp-image-generation
|
||||||
|
api_key: os.environ/GEMINI_API_KEY
|
||||||
|
```
|
||||||
|
|
||||||
|
2. Start proxy
|
||||||
|
|
||||||
|
```bash
|
||||||
|
litellm --config /path/to/config.yaml
|
||||||
|
```
|
||||||
|
|
||||||
|
3. Test it!
|
||||||
|
|
||||||
|
```bash
|
||||||
|
curl -L -X POST 'http://localhost:4000/v1/chat/completions' \
|
||||||
|
-H 'Content-Type: application/json' \
|
||||||
|
-H 'Authorization: Bearer sk-1234' \
|
||||||
|
-d '{
|
||||||
|
"model": "gemini-2.0-flash-exp-image-generation",
|
||||||
|
"messages": [{"role": "user", "content": "Generate an image of a cat"}],
|
||||||
|
"modalities": ["image", "text"]
|
||||||
|
}'
|
||||||
|
```
|
||||||
|
|
||||||
|
</TabItem>
|
||||||
|
</Tabs>
|
||||||
|
|
||||||
|
|
161
docs/my-website/docs/providers/google_ai_studio/files.md
Normal file
161
docs/my-website/docs/providers/google_ai_studio/files.md
Normal file
|
@ -0,0 +1,161 @@
|
||||||
|
import Tabs from '@theme/Tabs';
|
||||||
|
import TabItem from '@theme/TabItem';
|
||||||
|
|
||||||
|
# [BETA] Google AI Studio (Gemini) Files API
|
||||||
|
|
||||||
|
Use this to upload files to Google AI Studio (Gemini).
|
||||||
|
|
||||||
|
Useful to pass in large media files to Gemini's `/generateContent` endpoint.
|
||||||
|
|
||||||
|
| Action | Supported |
|
||||||
|
|----------|-----------|
|
||||||
|
| `create` | Yes |
|
||||||
|
| `delete` | No |
|
||||||
|
| `retrieve` | No |
|
||||||
|
| `list` | No |
|
||||||
|
|
||||||
|
## Usage
|
||||||
|
|
||||||
|
<Tabs>
|
||||||
|
<TabItem value="sdk" label="SDK">
|
||||||
|
|
||||||
|
```python
|
||||||
|
import base64
|
||||||
|
import requests
|
||||||
|
from litellm import completion, create_file
|
||||||
|
import os
|
||||||
|
|
||||||
|
|
||||||
|
### UPLOAD FILE ###
|
||||||
|
|
||||||
|
# Fetch the audio file and convert it to a base64 encoded string
|
||||||
|
url = "https://cdn.openai.com/API/docs/audio/alloy.wav"
|
||||||
|
response = requests.get(url)
|
||||||
|
response.raise_for_status()
|
||||||
|
wav_data = response.content
|
||||||
|
encoded_string = base64.b64encode(wav_data).decode('utf-8')
|
||||||
|
|
||||||
|
|
||||||
|
file = create_file(
|
||||||
|
file=wav_data,
|
||||||
|
purpose="user_data",
|
||||||
|
extra_body={"custom_llm_provider": "gemini"},
|
||||||
|
api_key=os.getenv("GEMINI_API_KEY"),
|
||||||
|
)
|
||||||
|
|
||||||
|
print(f"file: {file}")
|
||||||
|
|
||||||
|
assert file is not None
|
||||||
|
|
||||||
|
|
||||||
|
### GENERATE CONTENT ###
|
||||||
|
completion = completion(
|
||||||
|
model="gemini-2.0-flash",
|
||||||
|
messages=[
|
||||||
|
{
|
||||||
|
"role": "user",
|
||||||
|
"content": [
|
||||||
|
{
|
||||||
|
"type": "text",
|
||||||
|
"text": "What is in this recording?"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"type": "file",
|
||||||
|
"file": {
|
||||||
|
"file_id": file.id,
|
||||||
|
"filename": "my-test-name",
|
||||||
|
"format": "audio/wav"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
]
|
||||||
|
)
|
||||||
|
|
||||||
|
print(completion.choices[0].message)
|
||||||
|
```
|
||||||
|
|
||||||
|
</TabItem>
|
||||||
|
<TabItem value="proxy" label="PROXY">
|
||||||
|
|
||||||
|
1. Setup config.yaml
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
model_list:
|
||||||
|
- model_name: "gemini-2.0-flash"
|
||||||
|
litellm_params:
|
||||||
|
model: gemini/gemini-2.0-flash
|
||||||
|
api_key: os.environ/GEMINI_API_KEY
|
||||||
|
```
|
||||||
|
|
||||||
|
2. Start proxy
|
||||||
|
|
||||||
|
```bash
|
||||||
|
litellm --config config.yaml
|
||||||
|
```
|
||||||
|
|
||||||
|
3. Test it
|
||||||
|
|
||||||
|
```python
|
||||||
|
import base64
|
||||||
|
import requests
|
||||||
|
from openai import OpenAI
|
||||||
|
|
||||||
|
client = OpenAI(
|
||||||
|
base_url="http://0.0.0.0:4000",
|
||||||
|
api_key="sk-1234"
|
||||||
|
)
|
||||||
|
|
||||||
|
# Fetch the audio file and convert it to a base64 encoded string
|
||||||
|
url = "https://cdn.openai.com/API/docs/audio/alloy.wav"
|
||||||
|
response = requests.get(url)
|
||||||
|
response.raise_for_status()
|
||||||
|
wav_data = response.content
|
||||||
|
encoded_string = base64.b64encode(wav_data).decode('utf-8')
|
||||||
|
|
||||||
|
|
||||||
|
file = client.files.create(
|
||||||
|
file=wav_data,
|
||||||
|
purpose="user_data",
|
||||||
|
extra_body={"target_model_names": "gemini-2.0-flash"}
|
||||||
|
)
|
||||||
|
|
||||||
|
print(f"file: {file}")
|
||||||
|
|
||||||
|
assert file is not None
|
||||||
|
|
||||||
|
completion = client.chat.completions.create(
|
||||||
|
model="gemini-2.0-flash",
|
||||||
|
modalities=["text", "audio"],
|
||||||
|
audio={"voice": "alloy", "format": "wav"},
|
||||||
|
messages=[
|
||||||
|
{
|
||||||
|
"role": "user",
|
||||||
|
"content": [
|
||||||
|
{
|
||||||
|
"type": "text",
|
||||||
|
"text": "What is in this recording?"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"type": "file",
|
||||||
|
"file": {
|
||||||
|
"file_id": file.id,
|
||||||
|
"filename": "my-test-name",
|
||||||
|
"format": "audio/wav"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
],
|
||||||
|
extra_body={"drop_params": True}
|
||||||
|
)
|
||||||
|
|
||||||
|
print(completion.choices[0].message)
|
||||||
|
```
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
</TabItem>
|
||||||
|
</Tabs>
|
||||||
|
|
|
@ -6,6 +6,8 @@ import Image from '@theme/IdealImage';
|
||||||
|
|
||||||
Track spend for keys, users, and teams across 100+ LLMs.
|
Track spend for keys, users, and teams across 100+ LLMs.
|
||||||
|
|
||||||
|
LiteLLM automatically tracks spend for all known models. See our [model cost map](https://github.com/BerriAI/litellm/blob/main/model_prices_and_context_window.json)
|
||||||
|
|
||||||
### How to Track Spend with LiteLLM
|
### How to Track Spend with LiteLLM
|
||||||
|
|
||||||
**Step 1**
|
**Step 1**
|
||||||
|
@ -35,10 +37,10 @@ response = client.chat.completions.create(
|
||||||
"content": "this is a test request, write a short poem"
|
"content": "this is a test request, write a short poem"
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
user="palantir",
|
user="palantir", # OPTIONAL: pass user to track spend by user
|
||||||
extra_body={
|
extra_body={
|
||||||
"metadata": {
|
"metadata": {
|
||||||
"tags": ["jobID:214590dsff09fds", "taskName:run_page_classification"]
|
"tags": ["jobID:214590dsff09fds", "taskName:run_page_classification"] # ENTERPRISE: pass tags to track spend by tags
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
|
@ -63,9 +65,9 @@ curl --location 'http://0.0.0.0:4000/chat/completions' \
|
||||||
"content": "what llm are you"
|
"content": "what llm are you"
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"user": "palantir",
|
"user": "palantir", # OPTIONAL: pass user to track spend by user
|
||||||
"metadata": {
|
"metadata": {
|
||||||
"tags": ["jobID:214590dsff09fds", "taskName:run_page_classification"]
|
"tags": ["jobID:214590dsff09fds", "taskName:run_page_classification"] # ENTERPRISE: pass tags to track spend by tags
|
||||||
}
|
}
|
||||||
}'
|
}'
|
||||||
```
|
```
|
||||||
|
@ -90,7 +92,7 @@ chat = ChatOpenAI(
|
||||||
user="palantir",
|
user="palantir",
|
||||||
extra_body={
|
extra_body={
|
||||||
"metadata": {
|
"metadata": {
|
||||||
"tags": ["jobID:214590dsff09fds", "taskName:run_page_classification"]
|
"tags": ["jobID:214590dsff09fds", "taskName:run_page_classification"] # ENTERPRISE: pass tags to track spend by tags
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
|
@ -150,8 +152,134 @@ Navigate to the Usage Tab on the LiteLLM UI (found on https://your-proxy-endpoin
|
||||||
</TabItem>
|
</TabItem>
|
||||||
</Tabs>
|
</Tabs>
|
||||||
|
|
||||||
## ✨ (Enterprise) API Endpoints to get Spend
|
### Allowing Non-Proxy Admins to access `/spend` endpoints
|
||||||
### Getting Spend Reports - To Charge Other Teams, Customers, Users
|
|
||||||
|
Use this when you want non-proxy admins to access `/spend` endpoints
|
||||||
|
|
||||||
|
:::info
|
||||||
|
|
||||||
|
Schedule a [meeting with us to get your Enterprise License](https://calendly.com/d/4mp-gd3-k5k/litellm-1-1-onboarding-chat)
|
||||||
|
|
||||||
|
:::
|
||||||
|
|
||||||
|
##### Create Key
|
||||||
|
Create Key with with `permissions={"get_spend_routes": true}`
|
||||||
|
```shell
|
||||||
|
curl --location 'http://0.0.0.0:4000/key/generate' \
|
||||||
|
--header 'Authorization: Bearer sk-1234' \
|
||||||
|
--header 'Content-Type: application/json' \
|
||||||
|
--data '{
|
||||||
|
"permissions": {"get_spend_routes": true}
|
||||||
|
}'
|
||||||
|
```
|
||||||
|
|
||||||
|
##### Use generated key on `/spend` endpoints
|
||||||
|
|
||||||
|
Access spend Routes with newly generate keys
|
||||||
|
```shell
|
||||||
|
curl -X GET 'http://localhost:4000/global/spend/report?start_date=2024-04-01&end_date=2024-06-30' \
|
||||||
|
-H 'Authorization: Bearer sk-H16BKvrSNConSsBYLGc_7A'
|
||||||
|
```
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
#### Reset Team, API Key Spend - MASTER KEY ONLY
|
||||||
|
|
||||||
|
Use `/global/spend/reset` if you want to:
|
||||||
|
- Reset the Spend for all API Keys, Teams. The `spend` for ALL Teams and Keys in `LiteLLM_TeamTable` and `LiteLLM_VerificationToken` will be set to `spend=0`
|
||||||
|
|
||||||
|
- LiteLLM will maintain all the logs in `LiteLLMSpendLogs` for Auditing Purposes
|
||||||
|
|
||||||
|
##### Request
|
||||||
|
Only the `LITELLM_MASTER_KEY` you set can access this route
|
||||||
|
```shell
|
||||||
|
curl -X POST \
|
||||||
|
'http://localhost:4000/global/spend/reset' \
|
||||||
|
-H 'Authorization: Bearer sk-1234' \
|
||||||
|
-H 'Content-Type: application/json'
|
||||||
|
```
|
||||||
|
|
||||||
|
##### Expected Responses
|
||||||
|
|
||||||
|
```shell
|
||||||
|
{"message":"Spend for all API Keys and Teams reset successfully","status":"success"}
|
||||||
|
```
|
||||||
|
|
||||||
|
|
||||||
|
## Set 'base_model' for Cost Tracking (e.g. Azure deployments)
|
||||||
|
|
||||||
|
**Problem**: Azure returns `gpt-4` in the response when `azure/gpt-4-1106-preview` is used. This leads to inaccurate cost tracking
|
||||||
|
|
||||||
|
**Solution** ✅ : Set `base_model` on your config so litellm uses the correct model for calculating azure cost
|
||||||
|
|
||||||
|
Get the base model name from [here](https://github.com/BerriAI/litellm/blob/main/model_prices_and_context_window.json)
|
||||||
|
|
||||||
|
Example config with `base_model`
|
||||||
|
```yaml
|
||||||
|
model_list:
|
||||||
|
- model_name: azure-gpt-3.5
|
||||||
|
litellm_params:
|
||||||
|
model: azure/chatgpt-v-2
|
||||||
|
api_base: os.environ/AZURE_API_BASE
|
||||||
|
api_key: os.environ/AZURE_API_KEY
|
||||||
|
api_version: "2023-07-01-preview"
|
||||||
|
model_info:
|
||||||
|
base_model: azure/gpt-4-1106-preview
|
||||||
|
```
|
||||||
|
|
||||||
|
## Daily Spend Breakdown API
|
||||||
|
|
||||||
|
Retrieve granular daily usage data for a user (by model, provider, and API key) with a single endpoint.
|
||||||
|
|
||||||
|
Example Request:
|
||||||
|
|
||||||
|
```shell title="Daily Spend Breakdown API" showLineNumbers
|
||||||
|
curl -L -X GET 'http://localhost:4000/user/daily/activity?start_date=2025-03-20&end_date=2025-03-27' \
|
||||||
|
-H 'Authorization: Bearer sk-...'
|
||||||
|
```
|
||||||
|
|
||||||
|
```json title="Daily Spend Breakdown API Response" showLineNumbers
|
||||||
|
{
|
||||||
|
"results": [
|
||||||
|
{
|
||||||
|
"date": "2025-03-27",
|
||||||
|
"metrics": {
|
||||||
|
"spend": 0.0177072,
|
||||||
|
"prompt_tokens": 111,
|
||||||
|
"completion_tokens": 1711,
|
||||||
|
"total_tokens": 1822,
|
||||||
|
"api_requests": 11
|
||||||
|
},
|
||||||
|
"breakdown": {
|
||||||
|
"models": {
|
||||||
|
"gpt-4o-mini": {
|
||||||
|
"spend": 1.095e-05,
|
||||||
|
"prompt_tokens": 37,
|
||||||
|
"completion_tokens": 9,
|
||||||
|
"total_tokens": 46,
|
||||||
|
"api_requests": 1
|
||||||
|
},
|
||||||
|
"providers": { "openai": { ... }, "azure_ai": { ... } },
|
||||||
|
"api_keys": { "3126b6eaf1...": { ... } }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"metadata": {
|
||||||
|
"total_spend": 0.7274667,
|
||||||
|
"total_prompt_tokens": 280990,
|
||||||
|
"total_completion_tokens": 376674,
|
||||||
|
"total_api_requests": 14
|
||||||
|
}
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
### API Reference
|
||||||
|
|
||||||
|
See our [Swagger API](https://litellm-api.up.railway.app/#/Budget%20%26%20Spend%20Tracking/get_user_daily_activity_user_daily_activity_get) for more details on the `/user/daily/activity` endpoint
|
||||||
|
|
||||||
|
## ✨ (Enterprise) Generate Spend Reports
|
||||||
|
|
||||||
|
Use this to charge other teams, customers, users
|
||||||
|
|
||||||
Use the `/global/spend/report` endpoint to get spend reports
|
Use the `/global/spend/report` endpoint to get spend reports
|
||||||
|
|
||||||
|
@ -470,105 +598,6 @@ curl -X GET 'http://localhost:4000/global/spend/report?start_date=2024-04-01&end
|
||||||
|
|
||||||
</Tabs>
|
</Tabs>
|
||||||
|
|
||||||
### Allowing Non-Proxy Admins to access `/spend` endpoints
|
|
||||||
|
|
||||||
Use this when you want non-proxy admins to access `/spend` endpoints
|
|
||||||
|
|
||||||
:::info
|
|
||||||
|
|
||||||
Schedule a [meeting with us to get your Enterprise License](https://calendly.com/d/4mp-gd3-k5k/litellm-1-1-onboarding-chat)
|
|
||||||
|
|
||||||
:::
|
|
||||||
|
|
||||||
##### Create Key
|
|
||||||
Create Key with with `permissions={"get_spend_routes": true}`
|
|
||||||
```shell
|
|
||||||
curl --location 'http://0.0.0.0:4000/key/generate' \
|
|
||||||
--header 'Authorization: Bearer sk-1234' \
|
|
||||||
--header 'Content-Type: application/json' \
|
|
||||||
--data '{
|
|
||||||
"permissions": {"get_spend_routes": true}
|
|
||||||
}'
|
|
||||||
```
|
|
||||||
|
|
||||||
##### Use generated key on `/spend` endpoints
|
|
||||||
|
|
||||||
Access spend Routes with newly generate keys
|
|
||||||
```shell
|
|
||||||
curl -X GET 'http://localhost:4000/global/spend/report?start_date=2024-04-01&end_date=2024-06-30' \
|
|
||||||
-H 'Authorization: Bearer sk-H16BKvrSNConSsBYLGc_7A'
|
|
||||||
```
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
#### Reset Team, API Key Spend - MASTER KEY ONLY
|
|
||||||
|
|
||||||
Use `/global/spend/reset` if you want to:
|
|
||||||
- Reset the Spend for all API Keys, Teams. The `spend` for ALL Teams and Keys in `LiteLLM_TeamTable` and `LiteLLM_VerificationToken` will be set to `spend=0`
|
|
||||||
|
|
||||||
- LiteLLM will maintain all the logs in `LiteLLMSpendLogs` for Auditing Purposes
|
|
||||||
|
|
||||||
##### Request
|
|
||||||
Only the `LITELLM_MASTER_KEY` you set can access this route
|
|
||||||
```shell
|
|
||||||
curl -X POST \
|
|
||||||
'http://localhost:4000/global/spend/reset' \
|
|
||||||
-H 'Authorization: Bearer sk-1234' \
|
|
||||||
-H 'Content-Type: application/json'
|
|
||||||
```
|
|
||||||
|
|
||||||
##### Expected Responses
|
|
||||||
|
|
||||||
```shell
|
|
||||||
{"message":"Spend for all API Keys and Teams reset successfully","status":"success"}
|
|
||||||
```
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
## Spend Tracking for Azure OpenAI Models
|
|
||||||
|
|
||||||
Set base model for cost tracking azure image-gen call
|
|
||||||
|
|
||||||
#### Image Generation
|
|
||||||
|
|
||||||
```yaml
|
|
||||||
model_list:
|
|
||||||
- model_name: dall-e-3
|
|
||||||
litellm_params:
|
|
||||||
model: azure/dall-e-3-test
|
|
||||||
api_version: 2023-06-01-preview
|
|
||||||
api_base: https://openai-gpt-4-test-v-1.openai.azure.com/
|
|
||||||
api_key: os.environ/AZURE_API_KEY
|
|
||||||
base_model: dall-e-3 # 👈 set dall-e-3 as base model
|
|
||||||
model_info:
|
|
||||||
mode: image_generation
|
|
||||||
```
|
|
||||||
|
|
||||||
#### Chat Completions / Embeddings
|
|
||||||
|
|
||||||
**Problem**: Azure returns `gpt-4` in the response when `azure/gpt-4-1106-preview` is used. This leads to inaccurate cost tracking
|
|
||||||
|
|
||||||
**Solution** ✅ : Set `base_model` on your config so litellm uses the correct model for calculating azure cost
|
|
||||||
|
|
||||||
Get the base model name from [here](https://github.com/BerriAI/litellm/blob/main/model_prices_and_context_window.json)
|
|
||||||
|
|
||||||
Example config with `base_model`
|
|
||||||
```yaml
|
|
||||||
model_list:
|
|
||||||
- model_name: azure-gpt-3.5
|
|
||||||
litellm_params:
|
|
||||||
model: azure/chatgpt-v-2
|
|
||||||
api_base: os.environ/AZURE_API_BASE
|
|
||||||
api_key: os.environ/AZURE_API_KEY
|
|
||||||
api_version: "2023-07-01-preview"
|
|
||||||
model_info:
|
|
||||||
base_model: azure/gpt-4-1106-preview
|
|
||||||
```
|
|
||||||
|
|
||||||
## Custom Input/Output Pricing
|
|
||||||
|
|
||||||
👉 Head to [Custom Input/Output Pricing](https://docs.litellm.ai/docs/proxy/custom_pricing) to setup custom pricing or your models
|
|
||||||
|
|
||||||
## ✨ Custom Spend Log metadata
|
## ✨ Custom Spend Log metadata
|
||||||
|
|
||||||
|
@ -587,4 +616,5 @@ Logging specific key,value pairs in spend logs metadata is an enterprise feature
|
||||||
|
|
||||||
Tracking spend with Custom tags is an enterprise feature. [See here](./enterprise.md#tracking-spend-for-custom-tags)
|
Tracking spend with Custom tags is an enterprise feature. [See here](./enterprise.md#tracking-spend-for-custom-tags)
|
||||||
|
|
||||||
:::
|
:::
|
||||||
|
|
||||||
|
|
|
@ -140,7 +140,7 @@ The above request should not be blocked, and you should receive a regular LLM re
|
||||||
|
|
||||||
</Tabs>
|
</Tabs>
|
||||||
|
|
||||||
# Advanced
|
## Advanced
|
||||||
|
|
||||||
Aim Guard provides user-specific Guardrail policies, enabling you to apply tailored policies to individual users.
|
Aim Guard provides user-specific Guardrail policies, enabling you to apply tailored policies to individual users.
|
||||||
To utilize this feature, include the end-user's email in the request payload by setting the `x-aim-user-email` header of your request.
|
To utilize this feature, include the end-user's email in the request payload by setting the `x-aim-user-email` header of your request.
|
||||||
|
|
|
@ -177,6 +177,50 @@ export LITELLM_SALT_KEY="sk-1234"
|
||||||
|
|
||||||
[**See Code**](https://github.com/BerriAI/litellm/blob/036a6821d588bd36d170713dcf5a72791a694178/litellm/proxy/common_utils/encrypt_decrypt_utils.py#L15)
|
[**See Code**](https://github.com/BerriAI/litellm/blob/036a6821d588bd36d170713dcf5a72791a694178/litellm/proxy/common_utils/encrypt_decrypt_utils.py#L15)
|
||||||
|
|
||||||
|
|
||||||
|
## 9. Use `prisma migrate deploy`
|
||||||
|
|
||||||
|
Use this to handle db migrations across LiteLLM versions in production
|
||||||
|
|
||||||
|
<Tabs>
|
||||||
|
<TabItem value="env" label="ENV">
|
||||||
|
|
||||||
|
```bash
|
||||||
|
USE_PRISMA_MIGRATE="True"
|
||||||
|
```
|
||||||
|
|
||||||
|
</TabItem>
|
||||||
|
|
||||||
|
<TabItem value="cli" label="CLI">
|
||||||
|
|
||||||
|
```bash
|
||||||
|
litellm --use_prisma_migrate
|
||||||
|
```
|
||||||
|
|
||||||
|
</TabItem>
|
||||||
|
</Tabs>
|
||||||
|
|
||||||
|
Benefits:
|
||||||
|
|
||||||
|
The migrate deploy command:
|
||||||
|
|
||||||
|
- **Does not** issue a warning if an already applied migration is missing from migration history
|
||||||
|
- **Does not** detect drift (production database schema differs from migration history end state - for example, due to a hotfix)
|
||||||
|
- **Does not** reset the database or generate artifacts (such as Prisma Client)
|
||||||
|
- **Does not** rely on a shadow database
|
||||||
|
|
||||||
|
|
||||||
|
### How does LiteLLM handle DB migrations in production?
|
||||||
|
|
||||||
|
1. A new migration file is written to our `litellm-proxy-extras` package. [See all](https://github.com/BerriAI/litellm/tree/main/litellm-proxy-extras/litellm_proxy_extras/migrations)
|
||||||
|
|
||||||
|
2. The core litellm pip package is bumped to point to the new `litellm-proxy-extras` package. This ensures, older versions of LiteLLM will continue to use the old migrations. [See code](https://github.com/BerriAI/litellm/blob/52b35cd8093b9ad833987b24f494586a1e923209/pyproject.toml#L58)
|
||||||
|
|
||||||
|
3. When you upgrade to a new version of LiteLLM, the migration file is applied to the database. [See code](https://github.com/BerriAI/litellm/blob/52b35cd8093b9ad833987b24f494586a1e923209/litellm-proxy-extras/litellm_proxy_extras/utils.py#L42)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
## Extras
|
## Extras
|
||||||
### Expected Performance in Production
|
### Expected Performance in Production
|
||||||
|
|
||||||
|
|
BIN
docs/my-website/img/release_notes/new_activity_tab.png
Normal file
BIN
docs/my-website/img/release_notes/new_activity_tab.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 326 KiB |
|
@ -6,7 +6,7 @@ authors:
|
||||||
- name: Krrish Dholakia
|
- name: Krrish Dholakia
|
||||||
title: CEO, LiteLLM
|
title: CEO, LiteLLM
|
||||||
url: https://www.linkedin.com/in/krish-d/
|
url: https://www.linkedin.com/in/krish-d/
|
||||||
image_url: https://media.licdn.com/dms/image/v2/D4D03AQGrlsJ3aqpHmQ/profile-displayphoto-shrink_400_400/B4DZSAzgP7HYAg-/0/1737327772964?e=1743638400&v=beta&t=39KOXMUFedvukiWWVPHf3qI45fuQD7lNglICwN31DrI
|
image_url: https://media.licdn.com/dms/image/v2/D4D03AQGrlsJ3aqpHmQ/profile-displayphoto-shrink_400_400/B4DZSAzgP7HYAg-/0/1737327772964?e=1749686400&v=beta&t=Hkl3U8Ps0VtvNxX0BNNq24b4dtX5wQaPFp6oiKCIHD8
|
||||||
- name: Ishaan Jaffer
|
- name: Ishaan Jaffer
|
||||||
title: CTO, LiteLLM
|
title: CTO, LiteLLM
|
||||||
url: https://www.linkedin.com/in/reffajnaahsi/
|
url: https://www.linkedin.com/in/reffajnaahsi/
|
||||||
|
|
|
@ -6,7 +6,7 @@ authors:
|
||||||
- name: Krrish Dholakia
|
- name: Krrish Dholakia
|
||||||
title: CEO, LiteLLM
|
title: CEO, LiteLLM
|
||||||
url: https://www.linkedin.com/in/krish-d/
|
url: https://www.linkedin.com/in/krish-d/
|
||||||
image_url: https://media.licdn.com/dms/image/v2/D4D03AQGrlsJ3aqpHmQ/profile-displayphoto-shrink_400_400/B4DZSAzgP7HYAg-/0/1737327772964?e=1743638400&v=beta&t=39KOXMUFedvukiWWVPHf3qI45fuQD7lNglICwN31DrI
|
image_url: https://media.licdn.com/dms/image/v2/D4D03AQGrlsJ3aqpHmQ/profile-displayphoto-shrink_400_400/B4DZSAzgP7HYAg-/0/1737327772964?e=1749686400&v=beta&t=Hkl3U8Ps0VtvNxX0BNNq24b4dtX5wQaPFp6oiKCIHD8
|
||||||
- name: Ishaan Jaffer
|
- name: Ishaan Jaffer
|
||||||
title: CTO, LiteLLM
|
title: CTO, LiteLLM
|
||||||
url: https://www.linkedin.com/in/reffajnaahsi/
|
url: https://www.linkedin.com/in/reffajnaahsi/
|
||||||
|
|
|
@ -6,7 +6,7 @@ authors:
|
||||||
- name: Krrish Dholakia
|
- name: Krrish Dholakia
|
||||||
title: CEO, LiteLLM
|
title: CEO, LiteLLM
|
||||||
url: https://www.linkedin.com/in/krish-d/
|
url: https://www.linkedin.com/in/krish-d/
|
||||||
image_url: https://media.licdn.com/dms/image/v2/D4D03AQGrlsJ3aqpHmQ/profile-displayphoto-shrink_400_400/B4DZSAzgP7HYAg-/0/1737327772964?e=1743638400&v=beta&t=39KOXMUFedvukiWWVPHf3qI45fuQD7lNglICwN31DrI
|
image_url: https://media.licdn.com/dms/image/v2/D4D03AQGrlsJ3aqpHmQ/profile-displayphoto-shrink_400_400/B4DZSAzgP7HYAg-/0/1737327772964?e=1749686400&v=beta&t=Hkl3U8Ps0VtvNxX0BNNq24b4dtX5wQaPFp6oiKCIHD8
|
||||||
- name: Ishaan Jaffer
|
- name: Ishaan Jaffer
|
||||||
title: CTO, LiteLLM
|
title: CTO, LiteLLM
|
||||||
url: https://www.linkedin.com/in/reffajnaahsi/
|
url: https://www.linkedin.com/in/reffajnaahsi/
|
||||||
|
|
|
@ -6,7 +6,7 @@ authors:
|
||||||
- name: Krrish Dholakia
|
- name: Krrish Dholakia
|
||||||
title: CEO, LiteLLM
|
title: CEO, LiteLLM
|
||||||
url: https://www.linkedin.com/in/krish-d/
|
url: https://www.linkedin.com/in/krish-d/
|
||||||
image_url: https://media.licdn.com/dms/image/v2/D4D03AQGrlsJ3aqpHmQ/profile-displayphoto-shrink_400_400/B4DZSAzgP7HYAg-/0/1737327772964?e=1743638400&v=beta&t=39KOXMUFedvukiWWVPHf3qI45fuQD7lNglICwN31DrI
|
image_url: https://media.licdn.com/dms/image/v2/D4D03AQGrlsJ3aqpHmQ/profile-displayphoto-shrink_400_400/B4DZSAzgP7HYAg-/0/1737327772964?e=1749686400&v=beta&t=Hkl3U8Ps0VtvNxX0BNNq24b4dtX5wQaPFp6oiKCIHD8
|
||||||
- name: Ishaan Jaffer
|
- name: Ishaan Jaffer
|
||||||
title: CTO, LiteLLM
|
title: CTO, LiteLLM
|
||||||
url: https://www.linkedin.com/in/reffajnaahsi/
|
url: https://www.linkedin.com/in/reffajnaahsi/
|
||||||
|
|
|
@ -6,7 +6,7 @@ authors:
|
||||||
- name: Krrish Dholakia
|
- name: Krrish Dholakia
|
||||||
title: CEO, LiteLLM
|
title: CEO, LiteLLM
|
||||||
url: https://www.linkedin.com/in/krish-d/
|
url: https://www.linkedin.com/in/krish-d/
|
||||||
image_url: https://media.licdn.com/dms/image/v2/D4D03AQGrlsJ3aqpHmQ/profile-displayphoto-shrink_400_400/B4DZSAzgP7HYAg-/0/1737327772964?e=1743638400&v=beta&t=39KOXMUFedvukiWWVPHf3qI45fuQD7lNglICwN31DrI
|
image_url: https://media.licdn.com/dms/image/v2/D4D03AQGrlsJ3aqpHmQ/profile-displayphoto-shrink_400_400/B4DZSAzgP7HYAg-/0/1737327772964?e=1749686400&v=beta&t=Hkl3U8Ps0VtvNxX0BNNq24b4dtX5wQaPFp6oiKCIHD8
|
||||||
- name: Ishaan Jaffer
|
- name: Ishaan Jaffer
|
||||||
title: CTO, LiteLLM
|
title: CTO, LiteLLM
|
||||||
url: https://www.linkedin.com/in/reffajnaahsi/
|
url: https://www.linkedin.com/in/reffajnaahsi/
|
||||||
|
|
|
@ -6,7 +6,7 @@ authors:
|
||||||
- name: Krrish Dholakia
|
- name: Krrish Dholakia
|
||||||
title: CEO, LiteLLM
|
title: CEO, LiteLLM
|
||||||
url: https://www.linkedin.com/in/krish-d/
|
url: https://www.linkedin.com/in/krish-d/
|
||||||
image_url: https://media.licdn.com/dms/image/v2/D4D03AQGrlsJ3aqpHmQ/profile-displayphoto-shrink_400_400/B4DZSAzgP7HYAg-/0/1737327772964?e=1743638400&v=beta&t=39KOXMUFedvukiWWVPHf3qI45fuQD7lNglICwN31DrI
|
image_url: https://media.licdn.com/dms/image/v2/D4D03AQGrlsJ3aqpHmQ/profile-displayphoto-shrink_400_400/B4DZSAzgP7HYAg-/0/1737327772964?e=1749686400&v=beta&t=Hkl3U8Ps0VtvNxX0BNNq24b4dtX5wQaPFp6oiKCIHD8
|
||||||
- name: Ishaan Jaffer
|
- name: Ishaan Jaffer
|
||||||
title: CTO, LiteLLM
|
title: CTO, LiteLLM
|
||||||
url: https://www.linkedin.com/in/reffajnaahsi/
|
url: https://www.linkedin.com/in/reffajnaahsi/
|
||||||
|
|
|
@ -6,7 +6,7 @@ authors:
|
||||||
- name: Krrish Dholakia
|
- name: Krrish Dholakia
|
||||||
title: CEO, LiteLLM
|
title: CEO, LiteLLM
|
||||||
url: https://www.linkedin.com/in/krish-d/
|
url: https://www.linkedin.com/in/krish-d/
|
||||||
image_url: https://media.licdn.com/dms/image/v2/D4D03AQGrlsJ3aqpHmQ/profile-displayphoto-shrink_400_400/B4DZSAzgP7HYAg-/0/1737327772964?e=1743638400&v=beta&t=39KOXMUFedvukiWWVPHf3qI45fuQD7lNglICwN31DrI
|
image_url: https://media.licdn.com/dms/image/v2/D4D03AQGrlsJ3aqpHmQ/profile-displayphoto-shrink_400_400/B4DZSAzgP7HYAg-/0/1737327772964?e=1749686400&v=beta&t=Hkl3U8Ps0VtvNxX0BNNq24b4dtX5wQaPFp6oiKCIHD8
|
||||||
- name: Ishaan Jaffer
|
- name: Ishaan Jaffer
|
||||||
title: CTO, LiteLLM
|
title: CTO, LiteLLM
|
||||||
url: https://www.linkedin.com/in/reffajnaahsi/
|
url: https://www.linkedin.com/in/reffajnaahsi/
|
||||||
|
|
|
@ -6,7 +6,7 @@ authors:
|
||||||
- name: Krrish Dholakia
|
- name: Krrish Dholakia
|
||||||
title: CEO, LiteLLM
|
title: CEO, LiteLLM
|
||||||
url: https://www.linkedin.com/in/krish-d/
|
url: https://www.linkedin.com/in/krish-d/
|
||||||
image_url: https://media.licdn.com/dms/image/v2/D4D03AQGrlsJ3aqpHmQ/profile-displayphoto-shrink_400_400/B4DZSAzgP7HYAg-/0/1737327772964?e=1743638400&v=beta&t=39KOXMUFedvukiWWVPHf3qI45fuQD7lNglICwN31DrI
|
image_url: https://media.licdn.com/dms/image/v2/D4D03AQGrlsJ3aqpHmQ/profile-displayphoto-shrink_400_400/B4DZSAzgP7HYAg-/0/1737327772964?e=1749686400&v=beta&t=Hkl3U8Ps0VtvNxX0BNNq24b4dtX5wQaPFp6oiKCIHD8
|
||||||
- name: Ishaan Jaffer
|
- name: Ishaan Jaffer
|
||||||
title: CTO, LiteLLM
|
title: CTO, LiteLLM
|
||||||
url: https://www.linkedin.com/in/reffajnaahsi/
|
url: https://www.linkedin.com/in/reffajnaahsi/
|
||||||
|
|
|
@ -6,7 +6,7 @@ authors:
|
||||||
- name: Krrish Dholakia
|
- name: Krrish Dholakia
|
||||||
title: CEO, LiteLLM
|
title: CEO, LiteLLM
|
||||||
url: https://www.linkedin.com/in/krish-d/
|
url: https://www.linkedin.com/in/krish-d/
|
||||||
image_url: https://media.licdn.com/dms/image/v2/D4D03AQGrlsJ3aqpHmQ/profile-displayphoto-shrink_400_400/B4DZSAzgP7HYAg-/0/1737327772964?e=1743638400&v=beta&t=39KOXMUFedvukiWWVPHf3qI45fuQD7lNglICwN31DrI
|
image_url: https://media.licdn.com/dms/image/v2/D4D03AQGrlsJ3aqpHmQ/profile-displayphoto-shrink_400_400/B4DZSAzgP7HYAg-/0/1737327772964?e=1749686400&v=beta&t=Hkl3U8Ps0VtvNxX0BNNq24b4dtX5wQaPFp6oiKCIHD8
|
||||||
- name: Ishaan Jaffer
|
- name: Ishaan Jaffer
|
||||||
title: CTO, LiteLLM
|
title: CTO, LiteLLM
|
||||||
url: https://www.linkedin.com/in/reffajnaahsi/
|
url: https://www.linkedin.com/in/reffajnaahsi/
|
||||||
|
|
|
@ -6,7 +6,7 @@ authors:
|
||||||
- name: Krrish Dholakia
|
- name: Krrish Dholakia
|
||||||
title: CEO, LiteLLM
|
title: CEO, LiteLLM
|
||||||
url: https://www.linkedin.com/in/krish-d/
|
url: https://www.linkedin.com/in/krish-d/
|
||||||
image_url: https://media.licdn.com/dms/image/v2/D4D03AQGrlsJ3aqpHmQ/profile-displayphoto-shrink_400_400/B4DZSAzgP7HYAg-/0/1737327772964?e=1743638400&v=beta&t=39KOXMUFedvukiWWVPHf3qI45fuQD7lNglICwN31DrI
|
image_url: https://media.licdn.com/dms/image/v2/D4D03AQGrlsJ3aqpHmQ/profile-displayphoto-shrink_400_400/B4DZSAzgP7HYAg-/0/1737327772964?e=1749686400&v=beta&t=Hkl3U8Ps0VtvNxX0BNNq24b4dtX5wQaPFp6oiKCIHD8
|
||||||
- name: Ishaan Jaffer
|
- name: Ishaan Jaffer
|
||||||
title: CTO, LiteLLM
|
title: CTO, LiteLLM
|
||||||
url: https://www.linkedin.com/in/reffajnaahsi/
|
url: https://www.linkedin.com/in/reffajnaahsi/
|
||||||
|
|
|
@ -6,7 +6,7 @@ authors:
|
||||||
- name: Krrish Dholakia
|
- name: Krrish Dholakia
|
||||||
title: CEO, LiteLLM
|
title: CEO, LiteLLM
|
||||||
url: https://www.linkedin.com/in/krish-d/
|
url: https://www.linkedin.com/in/krish-d/
|
||||||
image_url: https://media.licdn.com/dms/image/v2/D4D03AQGrlsJ3aqpHmQ/profile-displayphoto-shrink_400_400/B4DZSAzgP7HYAg-/0/1737327772964?e=1743638400&v=beta&t=39KOXMUFedvukiWWVPHf3qI45fuQD7lNglICwN31DrI
|
image_url: https://media.licdn.com/dms/image/v2/D4D03AQGrlsJ3aqpHmQ/profile-displayphoto-shrink_400_400/B4DZSAzgP7HYAg-/0/1737327772964?e=1749686400&v=beta&t=Hkl3U8Ps0VtvNxX0BNNq24b4dtX5wQaPFp6oiKCIHD8
|
||||||
- name: Ishaan Jaffer
|
- name: Ishaan Jaffer
|
||||||
title: CTO, LiteLLM
|
title: CTO, LiteLLM
|
||||||
url: https://www.linkedin.com/in/reffajnaahsi/
|
url: https://www.linkedin.com/in/reffajnaahsi/
|
||||||
|
|
|
@ -6,7 +6,7 @@ authors:
|
||||||
- name: Krrish Dholakia
|
- name: Krrish Dholakia
|
||||||
title: CEO, LiteLLM
|
title: CEO, LiteLLM
|
||||||
url: https://www.linkedin.com/in/krish-d/
|
url: https://www.linkedin.com/in/krish-d/
|
||||||
image_url: https://media.licdn.com/dms/image/v2/D4D03AQGrlsJ3aqpHmQ/profile-displayphoto-shrink_400_400/B4DZSAzgP7HYAg-/0/1737327772964?e=1743638400&v=beta&t=39KOXMUFedvukiWWVPHf3qI45fuQD7lNglICwN31DrI
|
image_url: https://media.licdn.com/dms/image/v2/D4D03AQGrlsJ3aqpHmQ/profile-displayphoto-shrink_400_400/B4DZSAzgP7HYAg-/0/1737327772964?e=1749686400&v=beta&t=Hkl3U8Ps0VtvNxX0BNNq24b4dtX5wQaPFp6oiKCIHD8
|
||||||
- name: Ishaan Jaffer
|
- name: Ishaan Jaffer
|
||||||
title: CTO, LiteLLM
|
title: CTO, LiteLLM
|
||||||
url: https://www.linkedin.com/in/reffajnaahsi/
|
url: https://www.linkedin.com/in/reffajnaahsi/
|
||||||
|
|
|
@ -6,7 +6,7 @@ authors:
|
||||||
- name: Krrish Dholakia
|
- name: Krrish Dholakia
|
||||||
title: CEO, LiteLLM
|
title: CEO, LiteLLM
|
||||||
url: https://www.linkedin.com/in/krish-d/
|
url: https://www.linkedin.com/in/krish-d/
|
||||||
image_url: https://media.licdn.com/dms/image/v2/D4D03AQGrlsJ3aqpHmQ/profile-displayphoto-shrink_400_400/B4DZSAzgP7HYAg-/0/1737327772964?e=1743638400&v=beta&t=39KOXMUFedvukiWWVPHf3qI45fuQD7lNglICwN31DrI
|
image_url: https://media.licdn.com/dms/image/v2/D4D03AQGrlsJ3aqpHmQ/profile-displayphoto-shrink_400_400/B4DZSAzgP7HYAg-/0/1737327772964?e=1749686400&v=beta&t=Hkl3U8Ps0VtvNxX0BNNq24b4dtX5wQaPFp6oiKCIHD8
|
||||||
- name: Ishaan Jaffer
|
- name: Ishaan Jaffer
|
||||||
title: CTO, LiteLLM
|
title: CTO, LiteLLM
|
||||||
url: https://www.linkedin.com/in/reffajnaahsi/
|
url: https://www.linkedin.com/in/reffajnaahsi/
|
||||||
|
|
|
@ -6,7 +6,7 @@ authors:
|
||||||
- name: Krrish Dholakia
|
- name: Krrish Dholakia
|
||||||
title: CEO, LiteLLM
|
title: CEO, LiteLLM
|
||||||
url: https://www.linkedin.com/in/krish-d/
|
url: https://www.linkedin.com/in/krish-d/
|
||||||
image_url: https://media.licdn.com/dms/image/v2/D4D03AQGrlsJ3aqpHmQ/profile-displayphoto-shrink_400_400/B4DZSAzgP7HYAg-/0/1737327772964?e=1743638400&v=beta&t=39KOXMUFedvukiWWVPHf3qI45fuQD7lNglICwN31DrI
|
image_url: https://media.licdn.com/dms/image/v2/D4D03AQGrlsJ3aqpHmQ/profile-displayphoto-shrink_400_400/B4DZSAzgP7HYAg-/0/1737327772964?e=1749686400&v=beta&t=Hkl3U8Ps0VtvNxX0BNNq24b4dtX5wQaPFp6oiKCIHD8
|
||||||
- name: Ishaan Jaffer
|
- name: Ishaan Jaffer
|
||||||
title: CTO, LiteLLM
|
title: CTO, LiteLLM
|
||||||
url: https://www.linkedin.com/in/reffajnaahsi/
|
url: https://www.linkedin.com/in/reffajnaahsi/
|
||||||
|
|
|
@ -6,7 +6,7 @@ authors:
|
||||||
- name: Krrish Dholakia
|
- name: Krrish Dholakia
|
||||||
title: CEO, LiteLLM
|
title: CEO, LiteLLM
|
||||||
url: https://www.linkedin.com/in/krish-d/
|
url: https://www.linkedin.com/in/krish-d/
|
||||||
image_url: https://media.licdn.com/dms/image/v2/D4D03AQGrlsJ3aqpHmQ/profile-displayphoto-shrink_400_400/B4DZSAzgP7HYAg-/0/1737327772964?e=1743638400&v=beta&t=39KOXMUFedvukiWWVPHf3qI45fuQD7lNglICwN31DrI
|
image_url: https://media.licdn.com/dms/image/v2/D4D03AQGrlsJ3aqpHmQ/profile-displayphoto-shrink_400_400/B4DZSAzgP7HYAg-/0/1737327772964?e=1749686400&v=beta&t=Hkl3U8Ps0VtvNxX0BNNq24b4dtX5wQaPFp6oiKCIHD8
|
||||||
- name: Ishaan Jaffer
|
- name: Ishaan Jaffer
|
||||||
title: CTO, LiteLLM
|
title: CTO, LiteLLM
|
||||||
url: https://www.linkedin.com/in/reffajnaahsi/
|
url: https://www.linkedin.com/in/reffajnaahsi/
|
||||||
|
|
|
@ -6,7 +6,7 @@ authors:
|
||||||
- name: Krrish Dholakia
|
- name: Krrish Dholakia
|
||||||
title: CEO, LiteLLM
|
title: CEO, LiteLLM
|
||||||
url: https://www.linkedin.com/in/krish-d/
|
url: https://www.linkedin.com/in/krish-d/
|
||||||
image_url: https://media.licdn.com/dms/image/v2/D4D03AQGrlsJ3aqpHmQ/profile-displayphoto-shrink_400_400/B4DZSAzgP7HYAg-/0/1737327772964?e=1743638400&v=beta&t=39KOXMUFedvukiWWVPHf3qI45fuQD7lNglICwN31DrI
|
image_url: https://media.licdn.com/dms/image/v2/D4D03AQGrlsJ3aqpHmQ/profile-displayphoto-shrink_400_400/B4DZSAzgP7HYAg-/0/1737327772964?e=1749686400&v=beta&t=Hkl3U8Ps0VtvNxX0BNNq24b4dtX5wQaPFp6oiKCIHD8
|
||||||
- name: Ishaan Jaffer
|
- name: Ishaan Jaffer
|
||||||
title: CTO, LiteLLM
|
title: CTO, LiteLLM
|
||||||
url: https://www.linkedin.com/in/reffajnaahsi/
|
url: https://www.linkedin.com/in/reffajnaahsi/
|
||||||
|
|
|
@ -6,7 +6,7 @@ authors:
|
||||||
- name: Krrish Dholakia
|
- name: Krrish Dholakia
|
||||||
title: CEO, LiteLLM
|
title: CEO, LiteLLM
|
||||||
url: https://www.linkedin.com/in/krish-d/
|
url: https://www.linkedin.com/in/krish-d/
|
||||||
image_url: https://media.licdn.com/dms/image/v2/D4D03AQGrlsJ3aqpHmQ/profile-displayphoto-shrink_400_400/B4DZSAzgP7HYAg-/0/1737327772964?e=1743638400&v=beta&t=39KOXMUFedvukiWWVPHf3qI45fuQD7lNglICwN31DrI
|
image_url: https://media.licdn.com/dms/image/v2/D4D03AQGrlsJ3aqpHmQ/profile-displayphoto-shrink_400_400/B4DZSAzgP7HYAg-/0/1737327772964?e=1749686400&v=beta&t=Hkl3U8Ps0VtvNxX0BNNq24b4dtX5wQaPFp6oiKCIHD8
|
||||||
- name: Ishaan Jaffer
|
- name: Ishaan Jaffer
|
||||||
title: CTO, LiteLLM
|
title: CTO, LiteLLM
|
||||||
url: https://www.linkedin.com/in/reffajnaahsi/
|
url: https://www.linkedin.com/in/reffajnaahsi/
|
||||||
|
|
|
@ -6,7 +6,7 @@ authors:
|
||||||
- name: Krrish Dholakia
|
- name: Krrish Dholakia
|
||||||
title: CEO, LiteLLM
|
title: CEO, LiteLLM
|
||||||
url: https://www.linkedin.com/in/krish-d/
|
url: https://www.linkedin.com/in/krish-d/
|
||||||
image_url: https://media.licdn.com/dms/image/v2/D4D03AQGrlsJ3aqpHmQ/profile-displayphoto-shrink_400_400/B4DZSAzgP7HYAg-/0/1737327772964?e=1743638400&v=beta&t=39KOXMUFedvukiWWVPHf3qI45fuQD7lNglICwN31DrI
|
image_url: https://media.licdn.com/dms/image/v2/D4D03AQGrlsJ3aqpHmQ/profile-displayphoto-shrink_400_400/B4DZSAzgP7HYAg-/0/1737327772964?e=1749686400&v=beta&t=Hkl3U8Ps0VtvNxX0BNNq24b4dtX5wQaPFp6oiKCIHD8
|
||||||
- name: Ishaan Jaffer
|
- name: Ishaan Jaffer
|
||||||
title: CTO, LiteLLM
|
title: CTO, LiteLLM
|
||||||
url: https://www.linkedin.com/in/reffajnaahsi/
|
url: https://www.linkedin.com/in/reffajnaahsi/
|
||||||
|
@ -39,4 +39,102 @@ ghcr.io/berriai/litellm:main-v1.65.4-stable
|
||||||
pip install litellm==1.65.4.post1
|
pip install litellm==1.65.4.post1
|
||||||
```
|
```
|
||||||
</TabItem>
|
</TabItem>
|
||||||
</Tabs>
|
</Tabs>
|
||||||
|
|
||||||
|
## New Models / Updated Models
|
||||||
|
1. Databricks - claude-3-7-sonnet cost tracking [PR](https://github.com/BerriAI/litellm/blob/52b35cd8093b9ad833987b24f494586a1e923209/model_prices_and_context_window.json#L10350)
|
||||||
|
2. VertexAI - `gemini-2.5-pro-exp-03-25` cost tracking [PR](https://github.com/BerriAI/litellm/blob/52b35cd8093b9ad833987b24f494586a1e923209/model_prices_and_context_window.json#L4492)
|
||||||
|
3. VertexAI - `gemini-2.0-flash` cost tracking [PR](https://github.com/BerriAI/litellm/blob/52b35cd8093b9ad833987b24f494586a1e923209/model_prices_and_context_window.json#L4689)
|
||||||
|
4. Groq - add whisper ASR models to model cost map [PR](https://github.com/BerriAI/litellm/blob/52b35cd8093b9ad833987b24f494586a1e923209/model_prices_and_context_window.json#L3324)
|
||||||
|
5. IBM - Add watsonx/ibm/granite-3-8b-instruct to model cost map [PR](https://github.com/BerriAI/litellm/blob/52b35cd8093b9ad833987b24f494586a1e923209/model_prices_and_context_window.json#L91)
|
||||||
|
6. Google AI Studio - add gemini/gemini-2.5-pro-preview-03-25 to model cost map [PR](https://github.com/BerriAI/litellm/blob/52b35cd8093b9ad833987b24f494586a1e923209/model_prices_and_context_window.json#L4850)
|
||||||
|
|
||||||
|
## LLM Translation
|
||||||
|
1. Vertex AI - Support anyOf param for OpenAI json schema translation [Get Started](https://docs.litellm.ai/docs/providers/vertex#json-schema)
|
||||||
|
2. Anthropic- response_format + thinking param support (works across Anthropic API, Bedrock, Vertex) [Get Started](https://docs.litellm.ai/docs/reasoning_content)
|
||||||
|
3. Anthropic - if thinking token is specified and max tokens is not - ensure max token to anthropic is higher than thinking tokens (works across Anthropic API, Bedrock, Vertex) [PR](https://github.com/BerriAI/litellm/pull/9594)
|
||||||
|
4. Bedrock - latency optimized inference support [Get Started](https://docs.litellm.ai/docs/providers/bedrock#usage---latency-optimized-inference)
|
||||||
|
5. Sagemaker - handle special tokens + multibyte character code in response [Get Started](https://docs.litellm.ai/docs/providers/aws_sagemaker)
|
||||||
|
6. MCP - add support for using SSE MCP servers [Get Started](https://docs.litellm.ai/docs/mcp#usage)
|
||||||
|
8. Anthropic - new `litellm.messages.create` interface for calling Anthropic `/v1/messages` via passthrough [Get Started](https://docs.litellm.ai/docs/anthropic_unified#usage)
|
||||||
|
11. Anthropic - support ‘file’ content type in message param (works across Anthropic API, Bedrock, Vertex) [Get Started](https://docs.litellm.ai/docs/providers/anthropic#usage---pdf)
|
||||||
|
12. Anthropic - map openai 'reasoning_effort' to anthropic 'thinking' param (works across Anthropic API, Bedrock, Vertex) [Get Started](https://docs.litellm.ai/docs/providers/anthropic#usage---thinking--reasoning_content)
|
||||||
|
13. Google AI Studio (Gemini) - [BETA] `/v1/files` upload support [Get Started](../../docs/providers/google_ai_studio/files)
|
||||||
|
14. Azure - fix o-series tool calling [Get Started](../../docs/providers/azure#tool-calling--function-calling)
|
||||||
|
15. Unified file id - [ALPHA] allow calling multiple providers with same file id [PR](https://github.com/BerriAI/litellm/pull/9718)
|
||||||
|
- This is experimental, and not recommended for production use.
|
||||||
|
- We plan to have a production-ready implementation by next week.
|
||||||
|
16. Google AI Studio (Gemini) - return logprobs [PR](https://github.com/BerriAI/litellm/pull/9713)
|
||||||
|
17. Anthropic - Support prompt caching for Anthropic tool calls [Get Started](https://docs.litellm.ai/docs/completion/prompt_caching)
|
||||||
|
18. OpenRouter - unwrap extra body on open router calls [PR](https://github.com/BerriAI/litellm/pull/9747)
|
||||||
|
19. VertexAI - fix credential caching issue [PR](https://github.com/BerriAI/litellm/pull/9756)
|
||||||
|
20. XAI - filter out 'name' param for XAI [PR](https://github.com/BerriAI/litellm/pull/9761)
|
||||||
|
21. Gemini - image generation output support [Get Started](../../docs/providers/gemini#image-generation)
|
||||||
|
22. Databricks - support claude-3-7-sonnet w/ thinking + response_format [Get Started](../../docs/providers/databricks#usage---thinking--reasoning_content)
|
||||||
|
|
||||||
|
## Spend Tracking Improvements
|
||||||
|
1. Reliability fix - Check sent and received model for cost calculation [PR](https://github.com/BerriAI/litellm/pull/9669)
|
||||||
|
2. Vertex AI - Multimodal embedding cost tracking [Get Started](https://docs.litellm.ai/docs/providers/vertex#multi-modal-embeddings), [PR](https://github.com/BerriAI/litellm/pull/9623)
|
||||||
|
|
||||||
|
## Management Endpoints / UI
|
||||||
|
|
||||||
|
<Image img={require('../../img/release_notes/new_activity_tab.png')} />
|
||||||
|
|
||||||
|
1. New Usage Tab
|
||||||
|
- Report 'total_tokens' + report success/failure calls
|
||||||
|
- Remove double bars on scroll
|
||||||
|
- Ensure ‘daily spend’ chart ordered from earliest to latest date
|
||||||
|
- showing spend per model per day
|
||||||
|
- show key alias on usage tab
|
||||||
|
- Allow non-admins to view their activity
|
||||||
|
- Add date picker to new usage tab
|
||||||
|
2. Virtual Keys Tab
|
||||||
|
- remove 'default key' on user signup
|
||||||
|
- fix showing user models available for personal key creation
|
||||||
|
3. Test Key Tab
|
||||||
|
- Allow testing image generation models
|
||||||
|
4. Models Tab
|
||||||
|
- Fix bulk adding models
|
||||||
|
- support reusable credentials for passthrough endpoints
|
||||||
|
- Allow team members to see team models
|
||||||
|
5. Teams Tab
|
||||||
|
- Fix json serialization error on update team metadata
|
||||||
|
6. Request Logs Tab
|
||||||
|
- Add reasoning_content token tracking across all providers on streaming
|
||||||
|
7. API
|
||||||
|
- return key alias on /user/daily/activity [Get Started](../../docs/proxy/cost_tracking#daily-spend-breakdown-api)
|
||||||
|
8. SSO
|
||||||
|
- Allow assigning SSO users to teams on MSFT SSO [PR](https://github.com/BerriAI/litellm/pull/9745)
|
||||||
|
|
||||||
|
## Logging / Guardrail Integrations
|
||||||
|
|
||||||
|
1. Console Logs - Add json formatting for uncaught exceptions [PR](https://github.com/BerriAI/litellm/pull/9619)
|
||||||
|
2. Guardrails - AIM Guardrails support for virtual key based policies [Get Started](../../docs/proxy/guardrails/aim_security)
|
||||||
|
3. Logging - fix completion start time tracking [PR](https://github.com/BerriAI/litellm/pull/9688)
|
||||||
|
4. Prometheus
|
||||||
|
- Allow adding authentication on Prometheus /metrics endpoints [PR](https://github.com/BerriAI/litellm/pull/9766)
|
||||||
|
- Distinguish LLM Provider Exception vs. LiteLLM Exception in metric naming [PR](https://github.com/BerriAI/litellm/pull/9760)
|
||||||
|
- Emit operational metrics for new DB Transaction architecture [PR](https://github.com/BerriAI/litellm/pull/9719)
|
||||||
|
|
||||||
|
## Performance / Loadbalancing / Reliability improvements
|
||||||
|
1. Preventing Deadlocks
|
||||||
|
- Reduce DB Deadlocks by storing spend updates in Redis and then committing to DB [PR](https://github.com/BerriAI/litellm/pull/9608)
|
||||||
|
- Ensure no deadlocks occur when updating DailyUserSpendTransaction [PR](https://github.com/BerriAI/litellm/pull/9690)
|
||||||
|
- High Traffic fix - ensure new DB + Redis architecture accurately tracks spend [PR](https://github.com/BerriAI/litellm/pull/9673)
|
||||||
|
- Use Redis for PodLock Manager instead of PG (ensures no deadlocks occur) [PR](https://github.com/BerriAI/litellm/pull/9715)
|
||||||
|
- v2 DB Deadlock Reduction Architecture – Add Max Size for In-Memory Queue + Backpressure Mechanism [PR](https://github.com/BerriAI/litellm/pull/9759)
|
||||||
|
|
||||||
|
2. Prisma Migrations [Get Started](../../docs/proxy/prod#9-use-prisma-migrate-deploy)
|
||||||
|
- connects litellm proxy to litellm's prisma migration files
|
||||||
|
- Handle db schema updates from new `litellm-proxy-extras` sdk
|
||||||
|
3. Redis - support password for sync sentinel clients [PR](https://github.com/BerriAI/litellm/pull/9622)
|
||||||
|
4. Fix "Circular reference detected" error when max_parallel_requests = 0 [PR](https://github.com/BerriAI/litellm/pull/9671)
|
||||||
|
5. Code QA - Ban hardcoded numbers [PR](https://github.com/BerriAI/litellm/pull/9709)
|
||||||
|
|
||||||
|
## Helm
|
||||||
|
1. fix: wrong indentation of ttlSecondsAfterFinished in chart [PR](https://github.com/BerriAI/litellm/pull/9611)
|
||||||
|
|
||||||
|
## General Proxy Improvements
|
||||||
|
1. Fix - only apply service_account_settings.enforced_params on service accounts [PR](https://github.com/BerriAI/litellm/pull/9683)
|
||||||
|
2. Fix - handle metadata null on `/chat/completion` [PR](https://github.com/BerriAI/litellm/issues/9717)
|
||||||
|
3. Fix - Move daily user transaction logging outside of 'disable_spend_logs' flag, as they’re unrelated [PR](https://github.com/BerriAI/litellm/pull/9772)
|
||||||
|
|
|
@ -188,7 +188,15 @@ const sidebars = {
|
||||||
"providers/azure_ai",
|
"providers/azure_ai",
|
||||||
"providers/aiml",
|
"providers/aiml",
|
||||||
"providers/vertex",
|
"providers/vertex",
|
||||||
"providers/gemini",
|
|
||||||
|
{
|
||||||
|
type: "category",
|
||||||
|
label: "Google AI Studio",
|
||||||
|
items: [
|
||||||
|
"providers/gemini",
|
||||||
|
"providers/google_ai_studio/files",
|
||||||
|
]
|
||||||
|
},
|
||||||
"providers/anthropic",
|
"providers/anthropic",
|
||||||
"providers/aws_sagemaker",
|
"providers/aws_sagemaker",
|
||||||
"providers/bedrock",
|
"providers/bedrock",
|
||||||
|
|
File diff suppressed because one or more lines are too long
|
@ -29,6 +29,10 @@ model_list:
|
||||||
model: databricks/databricks-claude-3-7-sonnet
|
model: databricks/databricks-claude-3-7-sonnet
|
||||||
api_key: os.environ/DATABRICKS_API_KEY
|
api_key: os.environ/DATABRICKS_API_KEY
|
||||||
api_base: os.environ/DATABRICKS_API_BASE
|
api_base: os.environ/DATABRICKS_API_BASE
|
||||||
|
- model_name: "gemini/gemini-2.0-flash"
|
||||||
|
litellm_params:
|
||||||
|
model: gemini/gemini-2.0-flash
|
||||||
|
api_key: os.environ/GEMINI_API_KEY
|
||||||
|
|
||||||
litellm_settings:
|
litellm_settings:
|
||||||
num_retries: 0
|
num_retries: 0
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue