mirror of
https://github.com/BerriAI/litellm.git
synced 2025-04-26 03:04:13 +00:00
Merge pull request #9475 from BerriAI/litellm_dev_03_22_2025_release_note
Litellm dev 03 22 2025 release note
This commit is contained in:
commit
ff8b85f6c4
15 changed files with 682 additions and 140 deletions
|
@ -200,3 +200,92 @@ Expected Response
|
||||||
|
|
||||||
</TabItem>
|
</TabItem>
|
||||||
</Tabs>
|
</Tabs>
|
||||||
|
|
||||||
|
|
||||||
|
## OpenAI 'file' message type
|
||||||
|
|
||||||
|
This is currently only supported for OpenAI models.
|
||||||
|
|
||||||
|
This will be supported for all providers soon.
|
||||||
|
|
||||||
|
<Tabs>
|
||||||
|
<TabItem value="sdk" label="SDK">
|
||||||
|
|
||||||
|
```python
|
||||||
|
import base64
|
||||||
|
from litellm import completion
|
||||||
|
|
||||||
|
with open("draconomicon.pdf", "rb") as f:
|
||||||
|
data = f.read()
|
||||||
|
|
||||||
|
base64_string = base64.b64encode(data).decode("utf-8")
|
||||||
|
|
||||||
|
completion = completion(
|
||||||
|
model="gpt-4o",
|
||||||
|
messages=[
|
||||||
|
{
|
||||||
|
"role": "user",
|
||||||
|
"content": [
|
||||||
|
{
|
||||||
|
"type": "file",
|
||||||
|
"file": {
|
||||||
|
"filename": "draconomicon.pdf",
|
||||||
|
"file_data": f"data:application/pdf;base64,{base64_string}",
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"type": "text",
|
||||||
|
"text": "What is the first dragon in the book?",
|
||||||
|
}
|
||||||
|
],
|
||||||
|
},
|
||||||
|
],
|
||||||
|
)
|
||||||
|
|
||||||
|
print(completion.choices[0].message.content)
|
||||||
|
```
|
||||||
|
|
||||||
|
</TabItem>
|
||||||
|
|
||||||
|
<TabItem value="proxy" label="PROXY">
|
||||||
|
|
||||||
|
1. Setup config.yaml
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
model_list:
|
||||||
|
- model_name: openai-model
|
||||||
|
litellm_params:
|
||||||
|
model: gpt-4o
|
||||||
|
api_key: os.environ/OPENAI_API_KEY
|
||||||
|
```
|
||||||
|
|
||||||
|
2. Start the proxy
|
||||||
|
|
||||||
|
```bash
|
||||||
|
litellm --config config.yaml
|
||||||
|
```
|
||||||
|
|
||||||
|
3. Test it!
|
||||||
|
|
||||||
|
```bash
|
||||||
|
curl -X POST 'http://0.0.0.0:4000/chat/completions' \
|
||||||
|
-H 'Content-Type: application/json' \
|
||||||
|
-H 'Authorization: Bearer sk-1234' \
|
||||||
|
-d '{
|
||||||
|
"model": "openai-model",
|
||||||
|
"messages": [
|
||||||
|
{"role": "user", "content": [
|
||||||
|
{
|
||||||
|
"type": "file",
|
||||||
|
"file": {
|
||||||
|
"filename": "draconomicon.pdf",
|
||||||
|
"file_data": f"data:application/pdf;base64,{base64_string}",
|
||||||
|
}
|
||||||
|
}
|
||||||
|
]}
|
||||||
|
]
|
||||||
|
}'
|
||||||
|
```
|
||||||
|
|
||||||
|
</TabItem>
|
||||||
|
</Tabs>
|
66
docs/my-website/docs/guides/security_settings.md
Normal file
66
docs/my-website/docs/guides/security_settings.md
Normal file
|
@ -0,0 +1,66 @@
|
||||||
|
import Tabs from '@theme/Tabs';
|
||||||
|
import TabItem from '@theme/TabItem';
|
||||||
|
|
||||||
|
# SSL Security Settings
|
||||||
|
|
||||||
|
If you're in an environment using an older TTS bundle, with an older encryption, follow this guide.
|
||||||
|
|
||||||
|
|
||||||
|
LiteLLM uses HTTPX for network requests, unless otherwise specified.
|
||||||
|
|
||||||
|
1. Disable SSL verification
|
||||||
|
|
||||||
|
|
||||||
|
<Tabs>
|
||||||
|
<TabItem value="sdk" label="SDK">
|
||||||
|
|
||||||
|
```python
|
||||||
|
import litellm
|
||||||
|
litellm.ssl_verify = False
|
||||||
|
```
|
||||||
|
</TabItem>
|
||||||
|
<TabItem value="proxy" label="PROXY">
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
litellm_settings:
|
||||||
|
ssl_verify: false
|
||||||
|
```
|
||||||
|
|
||||||
|
</TabItem>
|
||||||
|
<TabItem value="env_var" label="Environment Variables">
|
||||||
|
|
||||||
|
```bash
|
||||||
|
export SSL_VERIFY="False"
|
||||||
|
```
|
||||||
|
</TabItem>
|
||||||
|
</Tabs>
|
||||||
|
|
||||||
|
2. Lower security settings
|
||||||
|
|
||||||
|
<Tabs>
|
||||||
|
<TabItem value="sdk" label="SDK">
|
||||||
|
|
||||||
|
```python
|
||||||
|
import litellm
|
||||||
|
litellm.ssl_security_level = 1
|
||||||
|
litellm.ssl_certificate = "/path/to/certificate.pem"
|
||||||
|
```
|
||||||
|
</TabItem>
|
||||||
|
<TabItem value="proxy" label="PROXY">
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
litellm_settings:
|
||||||
|
ssl_security_level: 1
|
||||||
|
ssl_certificate: "/path/to/certificate.pem"
|
||||||
|
```
|
||||||
|
</TabItem>
|
||||||
|
<TabItem value="env_var" label="Environment Variables">
|
||||||
|
|
||||||
|
```bash
|
||||||
|
export SSL_SECURITY_LEVEL="1"
|
||||||
|
export SSL_CERTIFICATE="/path/to/certificate.pem"
|
||||||
|
```
|
||||||
|
</TabItem>
|
||||||
|
</Tabs>
|
||||||
|
|
||||||
|
|
|
@ -1,4 +1,7 @@
|
||||||
|
|
||||||
import Image from '@theme/IdealImage';
|
import Image from '@theme/IdealImage';
|
||||||
|
import Tabs from '@theme/Tabs';
|
||||||
|
import TabItem from '@theme/TabItem';
|
||||||
|
|
||||||
# Arize AI
|
# Arize AI
|
||||||
|
|
||||||
|
@ -11,6 +14,8 @@ https://github.com/BerriAI/litellm
|
||||||
|
|
||||||
:::
|
:::
|
||||||
|
|
||||||
|
<Image img={require('../../img/arize.png')} />
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
## Pre-Requisites
|
## Pre-Requisites
|
||||||
|
@ -24,7 +29,9 @@ You can also use the instrumentor option instead of the callback, which you can
|
||||||
```python
|
```python
|
||||||
litellm.callbacks = ["arize"]
|
litellm.callbacks = ["arize"]
|
||||||
```
|
```
|
||||||
|
|
||||||
```python
|
```python
|
||||||
|
|
||||||
import litellm
|
import litellm
|
||||||
import os
|
import os
|
||||||
|
|
||||||
|
@ -48,7 +55,7 @@ response = litellm.completion(
|
||||||
|
|
||||||
### Using with LiteLLM Proxy
|
### Using with LiteLLM Proxy
|
||||||
|
|
||||||
|
1. Setup config.yaml
|
||||||
```yaml
|
```yaml
|
||||||
model_list:
|
model_list:
|
||||||
- model_name: gpt-4
|
- model_name: gpt-4
|
||||||
|
@ -60,13 +67,134 @@ model_list:
|
||||||
litellm_settings:
|
litellm_settings:
|
||||||
callbacks: ["arize"]
|
callbacks: ["arize"]
|
||||||
|
|
||||||
|
general_settings:
|
||||||
|
master_key: "sk-1234" # can also be set as an environment variable
|
||||||
|
|
||||||
environment_variables:
|
environment_variables:
|
||||||
ARIZE_SPACE_KEY: "d0*****"
|
ARIZE_SPACE_KEY: "d0*****"
|
||||||
ARIZE_API_KEY: "141a****"
|
ARIZE_API_KEY: "141a****"
|
||||||
ARIZE_ENDPOINT: "https://otlp.arize.com/v1" # OPTIONAL - your custom arize GRPC api endpoint
|
ARIZE_ENDPOINT: "https://otlp.arize.com/v1" # OPTIONAL - your custom arize GRPC api endpoint
|
||||||
ARIZE_HTTP_ENDPOINT: "https://otlp.arize.com/v1" # OPTIONAL - your custom arize HTTP api endpoint. Set either this or ARIZE_ENDPOINT
|
ARIZE_HTTP_ENDPOINT: "https://otlp.arize.com/v1" # OPTIONAL - your custom arize HTTP api endpoint. Set either this or ARIZE_ENDPOINT or Neither (defaults to https://otlp.arize.com/v1 on grpc)
|
||||||
```
|
```
|
||||||
|
|
||||||
|
2. Start the proxy
|
||||||
|
|
||||||
|
```bash
|
||||||
|
litellm --config config.yaml
|
||||||
|
```
|
||||||
|
|
||||||
|
3. Test it!
|
||||||
|
|
||||||
|
```bash
|
||||||
|
curl -X POST 'http://0.0.0.0:4000/chat/completions' \
|
||||||
|
-H 'Content-Type: application/json' \
|
||||||
|
-H 'Authorization: Bearer sk-1234' \
|
||||||
|
-d '{ "model": "gpt-4", "messages": [{"role": "user", "content": "Hi 👋 - i'm openai"}]}'
|
||||||
|
```
|
||||||
|
|
||||||
|
## Pass Arize Space/Key per-request
|
||||||
|
|
||||||
|
Supported parameters:
|
||||||
|
- `arize_api_key`
|
||||||
|
- `arize_space_key`
|
||||||
|
|
||||||
|
<Tabs>
|
||||||
|
<TabItem value="sdk" label="SDK">
|
||||||
|
|
||||||
|
```python
|
||||||
|
import litellm
|
||||||
|
import os
|
||||||
|
|
||||||
|
# LLM API Keys
|
||||||
|
os.environ['OPENAI_API_KEY']=""
|
||||||
|
|
||||||
|
# set arize as a callback, litellm will send the data to arize
|
||||||
|
litellm.callbacks = ["arize"]
|
||||||
|
|
||||||
|
# openai call
|
||||||
|
response = litellm.completion(
|
||||||
|
model="gpt-3.5-turbo",
|
||||||
|
messages=[
|
||||||
|
{"role": "user", "content": "Hi 👋 - i'm openai"}
|
||||||
|
],
|
||||||
|
arize_api_key=os.getenv("ARIZE_SPACE_2_API_KEY"),
|
||||||
|
arize_space_key=os.getenv("ARIZE_SPACE_2_KEY"),
|
||||||
|
)
|
||||||
|
```
|
||||||
|
|
||||||
|
</TabItem>
|
||||||
|
<TabItem value="proxy" label="PROXY">
|
||||||
|
|
||||||
|
1. Setup config.yaml
|
||||||
|
```yaml
|
||||||
|
model_list:
|
||||||
|
- model_name: gpt-4
|
||||||
|
litellm_params:
|
||||||
|
model: openai/fake
|
||||||
|
api_key: fake-key
|
||||||
|
api_base: https://exampleopenaiendpoint-production.up.railway.app/
|
||||||
|
|
||||||
|
litellm_settings:
|
||||||
|
callbacks: ["arize"]
|
||||||
|
|
||||||
|
general_settings:
|
||||||
|
master_key: "sk-1234" # can also be set as an environment variable
|
||||||
|
```
|
||||||
|
|
||||||
|
2. Start the proxy
|
||||||
|
|
||||||
|
```bash
|
||||||
|
litellm --config /path/to/config.yaml
|
||||||
|
```
|
||||||
|
|
||||||
|
3. Test it!
|
||||||
|
|
||||||
|
<Tabs>
|
||||||
|
<TabItem value="curl" label="CURL">
|
||||||
|
|
||||||
|
```bash
|
||||||
|
curl -X POST 'http://0.0.0.0:4000/chat/completions' \
|
||||||
|
-H 'Content-Type: application/json' \
|
||||||
|
-H 'Authorization: Bearer sk-1234' \
|
||||||
|
-d '{
|
||||||
|
"model": "gpt-4",
|
||||||
|
"messages": [{"role": "user", "content": "Hi 👋 - i'm openai"}],
|
||||||
|
"arize_api_key": "ARIZE_SPACE_2_API_KEY",
|
||||||
|
"arize_space_key": "ARIZE_SPACE_2_KEY"
|
||||||
|
}'
|
||||||
|
```
|
||||||
|
</TabItem>
|
||||||
|
<TabItem value="openai_python" label="OpenAI Python">
|
||||||
|
|
||||||
|
```python
|
||||||
|
import openai
|
||||||
|
client = openai.OpenAI(
|
||||||
|
api_key="anything",
|
||||||
|
base_url="http://0.0.0.0:4000"
|
||||||
|
)
|
||||||
|
|
||||||
|
# request sent to model set on litellm proxy, `litellm --model`
|
||||||
|
response = client.chat.completions.create(
|
||||||
|
model="gpt-3.5-turbo",
|
||||||
|
messages = [
|
||||||
|
{
|
||||||
|
"role": "user",
|
||||||
|
"content": "this is a test request, write a short poem"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
extra_body={
|
||||||
|
"arize_api_key": "ARIZE_SPACE_2_API_KEY",
|
||||||
|
"arize_space_key": "ARIZE_SPACE_2_KEY"
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
|
print(response)
|
||||||
|
```
|
||||||
|
</TabItem>
|
||||||
|
</Tabs>
|
||||||
|
</TabItem>
|
||||||
|
</Tabs>
|
||||||
|
|
||||||
## Support & Talk to Founders
|
## Support & Talk to Founders
|
||||||
|
|
||||||
- [Schedule Demo 👋](https://calendly.com/d/4mp-gd3-k5k/berriai-1-1-onboarding-litellm-hosted-version)
|
- [Schedule Demo 👋](https://calendly.com/d/4mp-gd3-k5k/berriai-1-1-onboarding-litellm-hosted-version)
|
||||||
|
|
|
@ -291,14 +291,15 @@ response = completion(
|
||||||
)
|
)
|
||||||
```
|
```
|
||||||
|
|
||||||
## Azure O1 Models
|
## O-Series Models
|
||||||
|
|
||||||
| Model Name | Function Call |
|
Azure OpenAI O-Series models are supported on LiteLLM.
|
||||||
|---------------------|----------------------------------------------------|
|
|
||||||
| o1-mini | `response = completion(model="azure/<your deployment name>", messages=messages)` |
|
|
||||||
| o1-preview | `response = completion(model="azure/<your deployment name>", messages=messages)` |
|
|
||||||
|
|
||||||
Set `litellm.enable_preview_features = True` to use Azure O1 Models with streaming support.
|
LiteLLM routes any deployment name with `o1` or `o3` in the model name, to the O-Series [transformation](https://github.com/BerriAI/litellm/blob/91ed05df2962b8eee8492374b048d27cc144d08c/litellm/llms/azure/chat/o1_transformation.py#L4) logic.
|
||||||
|
|
||||||
|
To set this explicitly, set `model` to `azure/o_series/<your-deployment-name>`.
|
||||||
|
|
||||||
|
**Automatic Routing**
|
||||||
|
|
||||||
<Tabs>
|
<Tabs>
|
||||||
<TabItem value="sdk" label="SDK">
|
<TabItem value="sdk" label="SDK">
|
||||||
|
@ -306,60 +307,112 @@ Set `litellm.enable_preview_features = True` to use Azure O1 Models with streami
|
||||||
```python
|
```python
|
||||||
import litellm
|
import litellm
|
||||||
|
|
||||||
litellm.enable_preview_features = True # 👈 KEY CHANGE
|
litellm.completion(model="azure/my-o3-deployment", messages=[{"role": "user", "content": "Hello, world!"}]) # 👈 Note: 'o3' in the deployment name
|
||||||
|
|
||||||
response = litellm.completion(
|
|
||||||
model="azure/<your deployment name>",
|
|
||||||
messages=[{"role": "user", "content": "What is the weather like in Boston?"}],
|
|
||||||
stream=True
|
|
||||||
)
|
|
||||||
|
|
||||||
for chunk in response:
|
|
||||||
print(chunk)
|
|
||||||
```
|
```
|
||||||
</TabItem>
|
</TabItem>
|
||||||
<TabItem value="proxy" label="Proxy">
|
<TabItem value="proxy" label="PROXY">
|
||||||
|
|
||||||
1. Setup config.yaml
|
|
||||||
```yaml
|
```yaml
|
||||||
model_list:
|
model_list:
|
||||||
- model_name: o1-mini
|
- model_name: o3-mini
|
||||||
litellm_params:
|
litellm_params:
|
||||||
model: azure/o1-mini
|
model: azure/o3-model
|
||||||
api_base: "os.environ/AZURE_API_BASE"
|
api_base: os.environ/AZURE_API_BASE
|
||||||
api_key: "os.environ/AZURE_API_KEY"
|
api_key: os.environ/AZURE_API_KEY
|
||||||
api_version: "os.environ/AZURE_API_VERSION"
|
|
||||||
|
|
||||||
litellm_settings:
|
|
||||||
enable_preview_features: true # 👈 KEY CHANGE
|
|
||||||
```
|
```
|
||||||
|
|
||||||
2. Start proxy
|
</TabItem>
|
||||||
|
</Tabs>
|
||||||
|
|
||||||
|
**Explicit Routing**
|
||||||
|
|
||||||
|
<Tabs>
|
||||||
|
<TabItem value="sdk" label="SDK">
|
||||||
|
|
||||||
|
```python
|
||||||
|
import litellm
|
||||||
|
|
||||||
|
litellm.completion(model="azure/o_series/my-random-deployment-name", messages=[{"role": "user", "content": "Hello, world!"}]) # 👈 Note: 'o_series/' in the deployment name
|
||||||
|
```
|
||||||
|
</TabItem>
|
||||||
|
<TabItem value="proxy" label="PROXY">
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
model_list:
|
||||||
|
- model_name: o3-mini
|
||||||
|
litellm_params:
|
||||||
|
model: azure/o_series/my-random-deployment-name
|
||||||
|
api_base: os.environ/AZURE_API_BASE
|
||||||
|
api_key: os.environ/AZURE_API_KEY
|
||||||
|
```
|
||||||
|
</TabItem>
|
||||||
|
</Tabs>
|
||||||
|
|
||||||
|
|
||||||
|
## Azure Audio Model
|
||||||
|
|
||||||
|
<Tabs>
|
||||||
|
<TabItem value="sdk" label="SDK">
|
||||||
|
|
||||||
|
```python
|
||||||
|
from litellm import completion
|
||||||
|
import os
|
||||||
|
|
||||||
|
os.environ["AZURE_API_KEY"] = ""
|
||||||
|
os.environ["AZURE_API_BASE"] = ""
|
||||||
|
os.environ["AZURE_API_VERSION"] = ""
|
||||||
|
|
||||||
|
response = completion(
|
||||||
|
model="azure/azure-openai-4o-audio",
|
||||||
|
messages=[
|
||||||
|
{
|
||||||
|
"role": "user",
|
||||||
|
"content": "I want to try out speech to speech"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
modalities=["text","audio"],
|
||||||
|
audio={"voice": "alloy", "format": "wav"}
|
||||||
|
)
|
||||||
|
|
||||||
|
print(response)
|
||||||
|
```
|
||||||
|
</TabItem>
|
||||||
|
<TabItem value="proxy" label="PROXY">
|
||||||
|
|
||||||
|
1. Setup config.yaml
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
model_list:
|
||||||
|
- model_name: azure-openai-4o-audio
|
||||||
|
litellm_params:
|
||||||
|
model: azure/azure-openai-4o-audio
|
||||||
|
api_base: os.environ/AZURE_API_BASE
|
||||||
|
api_key: os.environ/AZURE_API_KEY
|
||||||
|
api_version: os.environ/AZURE_API_VERSION
|
||||||
|
```
|
||||||
|
|
||||||
|
2. Start proxy
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
litellm --config /path/to/config.yaml
|
litellm --config /path/to/config.yaml
|
||||||
```
|
```
|
||||||
|
|
||||||
3. Test it
|
3. Test it!
|
||||||
|
|
||||||
```python
|
|
||||||
import openai
|
|
||||||
client = openai.OpenAI(
|
|
||||||
api_key="anything",
|
|
||||||
base_url="http://0.0.0.0:4000"
|
|
||||||
)
|
|
||||||
|
|
||||||
response = client.chat.completions.create(model="o1-mini", messages = [
|
```bash
|
||||||
{
|
curl http://localhost:4000/v1/chat/completions \
|
||||||
"role": "user",
|
-H "Authorization: Bearer $LITELLM_API_KEY" \
|
||||||
"content": "this is a test request, write a short poem"
|
-H "Content-Type: application/json" \
|
||||||
}
|
-d '{
|
||||||
],
|
"model": "azure-openai-4o-audio",
|
||||||
stream=True)
|
"messages": [{"role": "user", "content": "I want to try out speech to speech"}],
|
||||||
|
"modalities": ["text","audio"],
|
||||||
for chunk in response:
|
"audio": {"voice": "alloy", "format": "wav"}
|
||||||
print(chunk)
|
}'
|
||||||
```
|
```
|
||||||
|
|
||||||
|
|
||||||
</TabItem>
|
</TabItem>
|
||||||
</Tabs>
|
</Tabs>
|
||||||
|
|
||||||
|
@ -948,62 +1001,9 @@ Expected Response:
|
||||||
{"data":[{"id":"batch_R3V...}
|
{"data":[{"id":"batch_R3V...}
|
||||||
```
|
```
|
||||||
|
|
||||||
## O-Series Models
|
|
||||||
|
|
||||||
Azure OpenAI O-Series models are supported on LiteLLM.
|
|
||||||
|
|
||||||
LiteLLM routes any deployment name with `o1` or `o3` in the model name, to the O-Series [transformation](https://github.com/BerriAI/litellm/blob/91ed05df2962b8eee8492374b048d27cc144d08c/litellm/llms/azure/chat/o1_transformation.py#L4) logic.
|
|
||||||
|
|
||||||
To set this explicitly, set `model` to `azure/o_series/<your-deployment-name>`.
|
|
||||||
|
|
||||||
**Automatic Routing**
|
|
||||||
|
|
||||||
<Tabs>
|
|
||||||
<TabItem value="sdk" label="SDK">
|
|
||||||
|
|
||||||
```python
|
|
||||||
import litellm
|
|
||||||
|
|
||||||
litellm.completion(model="azure/my-o3-deployment", messages=[{"role": "user", "content": "Hello, world!"}]) # 👈 Note: 'o3' in the deployment name
|
|
||||||
```
|
|
||||||
</TabItem>
|
|
||||||
<TabItem value="proxy" label="PROXY">
|
|
||||||
|
|
||||||
```yaml
|
|
||||||
model_list:
|
|
||||||
- model_name: o3-mini
|
|
||||||
litellm_params:
|
|
||||||
model: azure/o3-model
|
|
||||||
api_base: os.environ/AZURE_API_BASE
|
|
||||||
api_key: os.environ/AZURE_API_KEY
|
|
||||||
```
|
|
||||||
|
|
||||||
</TabItem>
|
|
||||||
</Tabs>
|
|
||||||
|
|
||||||
**Explicit Routing**
|
|
||||||
|
|
||||||
<Tabs>
|
|
||||||
<TabItem value="sdk" label="SDK">
|
|
||||||
|
|
||||||
```python
|
|
||||||
import litellm
|
|
||||||
|
|
||||||
litellm.completion(model="azure/o_series/my-random-deployment-name", messages=[{"role": "user", "content": "Hello, world!"}]) # 👈 Note: 'o_series/' in the deployment name
|
|
||||||
```
|
|
||||||
</TabItem>
|
|
||||||
<TabItem value="proxy" label="PROXY">
|
|
||||||
|
|
||||||
```yaml
|
|
||||||
model_list:
|
|
||||||
- model_name: o3-mini
|
|
||||||
litellm_params:
|
|
||||||
model: azure/o_series/my-random-deployment-name
|
|
||||||
api_base: os.environ/AZURE_API_BASE
|
|
||||||
api_key: os.environ/AZURE_API_KEY
|
|
||||||
```
|
|
||||||
</TabItem>
|
|
||||||
</Tabs>
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -1428,10 +1428,14 @@ response = litellm.embedding(
|
||||||
|
|
||||||
|
|
||||||
## Supported AWS Bedrock Models
|
## Supported AWS Bedrock Models
|
||||||
|
|
||||||
|
LiteLLM supports ALL Bedrock models.
|
||||||
|
|
||||||
Here's an example of using a bedrock model with LiteLLM. For a complete list, refer to the [model cost map](https://github.com/BerriAI/litellm/blob/main/model_prices_and_context_window.json)
|
Here's an example of using a bedrock model with LiteLLM. For a complete list, refer to the [model cost map](https://github.com/BerriAI/litellm/blob/main/model_prices_and_context_window.json)
|
||||||
|
|
||||||
| Model Name | Command |
|
| Model Name | Command |
|
||||||
|----------------------------|------------------------------------------------------------------|
|
|----------------------------|------------------------------------------------------------------|
|
||||||
|
| Deepseek R1 | `completion(model='bedrock/us.deepseek.r1-v1:0', messages=messages)` | `os.environ['AWS_ACCESS_KEY_ID']`, `os.environ['AWS_SECRET_ACCESS_KEY']` |
|
||||||
| Anthropic Claude-V3.5 Sonnet | `completion(model='bedrock/anthropic.claude-3-5-sonnet-20240620-v1:0', messages=messages)` | `os.environ['AWS_ACCESS_KEY_ID']`, `os.environ['AWS_SECRET_ACCESS_KEY']` |
|
| Anthropic Claude-V3.5 Sonnet | `completion(model='bedrock/anthropic.claude-3-5-sonnet-20240620-v1:0', messages=messages)` | `os.environ['AWS_ACCESS_KEY_ID']`, `os.environ['AWS_SECRET_ACCESS_KEY']` |
|
||||||
| Anthropic Claude-V3 sonnet | `completion(model='bedrock/anthropic.claude-3-sonnet-20240229-v1:0', messages=messages)` | `os.environ['AWS_ACCESS_KEY_ID']`, `os.environ['AWS_SECRET_ACCESS_KEY']` |
|
| Anthropic Claude-V3 sonnet | `completion(model='bedrock/anthropic.claude-3-sonnet-20240229-v1:0', messages=messages)` | `os.environ['AWS_ACCESS_KEY_ID']`, `os.environ['AWS_SECRET_ACCESS_KEY']` |
|
||||||
| Anthropic Claude-V3 Haiku | `completion(model='bedrock/anthropic.claude-3-haiku-20240307-v1:0', messages=messages)` | `os.environ['AWS_ACCESS_KEY_ID']`, `os.environ['AWS_SECRET_ACCESS_KEY']` |
|
| Anthropic Claude-V3 Haiku | `completion(model='bedrock/anthropic.claude-3-haiku-20240307-v1:0', messages=messages)` | `os.environ['AWS_ACCESS_KEY_ID']`, `os.environ['AWS_SECRET_ACCESS_KEY']` |
|
||||||
|
|
|
@ -202,6 +202,67 @@ curl -X POST 'http://0.0.0.0:4000/chat/completions' \
|
||||||
</TabItem>
|
</TabItem>
|
||||||
</Tabs>
|
</Tabs>
|
||||||
|
|
||||||
|
|
||||||
|
## Using Ollama FIM on `/v1/completions`
|
||||||
|
|
||||||
|
LiteLLM supports calling Ollama's `/api/generate` endpoint on `/v1/completions` requests.
|
||||||
|
|
||||||
|
<Tabs>
|
||||||
|
<TabItem value="sdk" label="SDK">
|
||||||
|
|
||||||
|
```python
|
||||||
|
import litellm
|
||||||
|
litellm._turn_on_debug() # turn on debug to see the request
|
||||||
|
from litellm import completion
|
||||||
|
|
||||||
|
response = completion(
|
||||||
|
model="ollama/llama3.1",
|
||||||
|
prompt="Hello, world!",
|
||||||
|
api_base="http://localhost:11434"
|
||||||
|
)
|
||||||
|
print(response)
|
||||||
|
```
|
||||||
|
</TabItem>
|
||||||
|
<TabItem value="proxy" label="PROXY">
|
||||||
|
|
||||||
|
1. Setup config.yaml
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
model_list:
|
||||||
|
- model_name: "llama3.1"
|
||||||
|
litellm_params:
|
||||||
|
model: "ollama/llama3.1"
|
||||||
|
api_base: "http://localhost:11434"
|
||||||
|
```
|
||||||
|
|
||||||
|
2. Start proxy
|
||||||
|
|
||||||
|
```bash
|
||||||
|
litellm --config /path/to/config.yaml --detailed_debug
|
||||||
|
|
||||||
|
# RUNNING ON http://0.0.0.0:4000
|
||||||
|
```
|
||||||
|
|
||||||
|
3. Test it!
|
||||||
|
|
||||||
|
```python
|
||||||
|
from openai import OpenAI
|
||||||
|
|
||||||
|
client = OpenAI(
|
||||||
|
api_key="anything", # 👈 PROXY KEY (can be anything, if master_key not set)
|
||||||
|
base_url="http://0.0.0.0:4000" # 👈 PROXY BASE URL
|
||||||
|
)
|
||||||
|
|
||||||
|
response = client.completions.create(
|
||||||
|
model="ollama/llama3.1",
|
||||||
|
prompt="Hello, world!",
|
||||||
|
api_base="http://localhost:11434"
|
||||||
|
)
|
||||||
|
print(response)
|
||||||
|
```
|
||||||
|
</TabItem>
|
||||||
|
</Tabs>
|
||||||
|
|
||||||
## Using ollama `api/chat`
|
## Using ollama `api/chat`
|
||||||
In order to send ollama requests to `POST /api/chat` on your ollama server, set the model prefix to `ollama_chat`
|
In order to send ollama requests to `POST /api/chat` on your ollama server, set the model prefix to `ollama_chat`
|
||||||
|
|
||||||
|
|
|
@ -228,6 +228,92 @@ response = completion(
|
||||||
|
|
||||||
```
|
```
|
||||||
|
|
||||||
|
## PDF File Parsing
|
||||||
|
|
||||||
|
OpenAI has a new `file` message type that allows you to pass in a PDF file and have it parsed into a structured output. [Read more](https://platform.openai.com/docs/guides/pdf-files?api-mode=chat&lang=python)
|
||||||
|
|
||||||
|
<Tabs>
|
||||||
|
<TabItem value="sdk" label="SDK">
|
||||||
|
|
||||||
|
```python
|
||||||
|
import base64
|
||||||
|
from litellm import completion
|
||||||
|
|
||||||
|
with open("draconomicon.pdf", "rb") as f:
|
||||||
|
data = f.read()
|
||||||
|
|
||||||
|
base64_string = base64.b64encode(data).decode("utf-8")
|
||||||
|
|
||||||
|
completion = completion(
|
||||||
|
model="gpt-4o",
|
||||||
|
messages=[
|
||||||
|
{
|
||||||
|
"role": "user",
|
||||||
|
"content": [
|
||||||
|
{
|
||||||
|
"type": "file",
|
||||||
|
"file": {
|
||||||
|
"filename": "draconomicon.pdf",
|
||||||
|
"file_data": f"data:application/pdf;base64,{base64_string}",
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"type": "text",
|
||||||
|
"text": "What is the first dragon in the book?",
|
||||||
|
}
|
||||||
|
],
|
||||||
|
},
|
||||||
|
],
|
||||||
|
)
|
||||||
|
|
||||||
|
print(completion.choices[0].message.content)
|
||||||
|
```
|
||||||
|
|
||||||
|
</TabItem>
|
||||||
|
|
||||||
|
<TabItem value="proxy" label="PROXY">
|
||||||
|
|
||||||
|
1. Setup config.yaml
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
model_list:
|
||||||
|
- model_name: openai-model
|
||||||
|
litellm_params:
|
||||||
|
model: gpt-4o
|
||||||
|
api_key: os.environ/OPENAI_API_KEY
|
||||||
|
```
|
||||||
|
|
||||||
|
2. Start the proxy
|
||||||
|
|
||||||
|
```bash
|
||||||
|
litellm --config config.yaml
|
||||||
|
```
|
||||||
|
|
||||||
|
3. Test it!
|
||||||
|
|
||||||
|
```bash
|
||||||
|
curl -X POST 'http://0.0.0.0:4000/chat/completions' \
|
||||||
|
-H 'Content-Type: application/json' \
|
||||||
|
-H 'Authorization: Bearer sk-1234' \
|
||||||
|
-d '{
|
||||||
|
"model": "openai-model",
|
||||||
|
"messages": [
|
||||||
|
{"role": "user", "content": [
|
||||||
|
{
|
||||||
|
"type": "file",
|
||||||
|
"file": {
|
||||||
|
"filename": "draconomicon.pdf",
|
||||||
|
"file_data": f"data:application/pdf;base64,{base64_string}",
|
||||||
|
}
|
||||||
|
}
|
||||||
|
]}
|
||||||
|
]
|
||||||
|
}'
|
||||||
|
```
|
||||||
|
|
||||||
|
</TabItem>
|
||||||
|
</Tabs>
|
||||||
|
|
||||||
## OpenAI Fine Tuned Models
|
## OpenAI Fine Tuned Models
|
||||||
|
|
||||||
| Model Name | Function Call |
|
| Model Name | Function Call |
|
||||||
|
@ -449,26 +535,6 @@ response = litellm.acompletion(
|
||||||
)
|
)
|
||||||
```
|
```
|
||||||
|
|
||||||
### Using Helicone Proxy with LiteLLM
|
|
||||||
```python
|
|
||||||
import os
|
|
||||||
import litellm
|
|
||||||
from litellm import completion
|
|
||||||
|
|
||||||
os.environ["OPENAI_API_KEY"] = ""
|
|
||||||
|
|
||||||
# os.environ["OPENAI_API_BASE"] = ""
|
|
||||||
litellm.api_base = "https://oai.hconeai.com/v1"
|
|
||||||
litellm.headers = {
|
|
||||||
"Helicone-Auth": f"Bearer {os.getenv('HELICONE_API_KEY')}",
|
|
||||||
"Helicone-Cache-Enabled": "true",
|
|
||||||
}
|
|
||||||
|
|
||||||
messages = [{ "content": "Hello, how are you?","role": "user"}]
|
|
||||||
|
|
||||||
# openai call
|
|
||||||
response = completion("gpt-3.5-turbo", messages)
|
|
||||||
```
|
|
||||||
|
|
||||||
### Using OpenAI Proxy with LiteLLM
|
### Using OpenAI Proxy with LiteLLM
|
||||||
```python
|
```python
|
||||||
|
|
|
@ -10,9 +10,11 @@ LiteLLM supports all the text / chat / vision models from [OpenRouter](https://o
|
||||||
import os
|
import os
|
||||||
from litellm import completion
|
from litellm import completion
|
||||||
os.environ["OPENROUTER_API_KEY"] = ""
|
os.environ["OPENROUTER_API_KEY"] = ""
|
||||||
|
os.environ["OPENROUTER_API_BASE"] = "" # [OPTIONAL] defaults to https://openrouter.ai/api/v1
|
||||||
|
|
||||||
os.environ["OR_SITE_URL"] = "" # optional
|
|
||||||
os.environ["OR_APP_NAME"] = "" # optional
|
os.environ["OR_SITE_URL"] = "" # [OPTIONAL]
|
||||||
|
os.environ["OR_APP_NAME"] = "" # [OPTIONAL]
|
||||||
|
|
||||||
response = completion(
|
response = completion(
|
||||||
model="openrouter/google/palm-2-chat-bison",
|
model="openrouter/google/palm-2-chat-bison",
|
||||||
|
|
|
@ -147,6 +147,7 @@ general_settings:
|
||||||
|------|------|-------------|
|
|------|------|-------------|
|
||||||
| completion_model | string | The default model to use for completions when `model` is not specified in the request |
|
| completion_model | string | The default model to use for completions when `model` is not specified in the request |
|
||||||
| disable_spend_logs | boolean | If true, turns off writing each transaction to the database |
|
| disable_spend_logs | boolean | If true, turns off writing each transaction to the database |
|
||||||
|
| disable_spend_updates | boolean | If true, turns off all spend updates to the DB. Including key/user/team spend updates. |
|
||||||
| disable_master_key_return | boolean | If true, turns off returning master key on UI. (checked on '/user/info' endpoint) |
|
| disable_master_key_return | boolean | If true, turns off returning master key on UI. (checked on '/user/info' endpoint) |
|
||||||
| disable_retry_on_max_parallel_request_limit_error | boolean | If true, turns off retries when max parallel request limit is reached |
|
| disable_retry_on_max_parallel_request_limit_error | boolean | If true, turns off retries when max parallel request limit is reached |
|
||||||
| disable_reset_budget | boolean | If true, turns off reset budget scheduled task |
|
| disable_reset_budget | boolean | If true, turns off reset budget scheduled task |
|
||||||
|
|
|
@ -79,6 +79,7 @@ Inherits from `StandardLoggingUserAPIKeyMetadata` and adds:
|
||||||
| `response_cost` | `Optional[str]` | Optional response cost |
|
| `response_cost` | `Optional[str]` | Optional response cost |
|
||||||
| `additional_headers` | `Optional[StandardLoggingAdditionalHeaders]` | Additional headers |
|
| `additional_headers` | `Optional[StandardLoggingAdditionalHeaders]` | Additional headers |
|
||||||
| `batch_models` | `Optional[List[str]]` | Only set for Batches API. Lists the models used for cost calculation |
|
| `batch_models` | `Optional[List[str]]` | Only set for Batches API. Lists the models used for cost calculation |
|
||||||
|
| `litellm_model_name` | `Optional[str]` | Model name sent in request |
|
||||||
|
|
||||||
## StandardLoggingModelInformation
|
## StandardLoggingModelInformation
|
||||||
|
|
||||||
|
|
|
@ -43,19 +43,19 @@ These headers are useful for clients to understand the current rate limit status
|
||||||
| `x-litellm-max-fallbacks` | int | Maximum number of fallback attempts allowed |
|
| `x-litellm-max-fallbacks` | int | Maximum number of fallback attempts allowed |
|
||||||
|
|
||||||
## Cost Tracking Headers
|
## Cost Tracking Headers
|
||||||
| Header | Type | Description |
|
| Header | Type | Description | Available on Pass-Through Endpoints |
|
||||||
|--------|------|-------------|
|
|--------|------|-------------|-------------|
|
||||||
| `x-litellm-response-cost` | float | Cost of the API call |
|
| `x-litellm-response-cost` | float | Cost of the API call | |
|
||||||
| `x-litellm-key-spend` | float | Total spend for the API key |
|
| `x-litellm-key-spend` | float | Total spend for the API key | ✅ |
|
||||||
|
|
||||||
## LiteLLM Specific Headers
|
## LiteLLM Specific Headers
|
||||||
| Header | Type | Description |
|
| Header | Type | Description | Available on Pass-Through Endpoints |
|
||||||
|--------|------|-------------|
|
|--------|------|-------------|-------------|
|
||||||
| `x-litellm-call-id` | string | Unique identifier for the API call |
|
| `x-litellm-call-id` | string | Unique identifier for the API call | ✅ |
|
||||||
| `x-litellm-model-id` | string | Unique identifier for the model used |
|
| `x-litellm-model-id` | string | Unique identifier for the model used | |
|
||||||
| `x-litellm-model-api-base` | string | Base URL of the API endpoint |
|
| `x-litellm-model-api-base` | string | Base URL of the API endpoint | ✅ |
|
||||||
| `x-litellm-version` | string | Version of LiteLLM being used |
|
| `x-litellm-version` | string | Version of LiteLLM being used | |
|
||||||
| `x-litellm-model-group` | string | Model group identifier |
|
| `x-litellm-model-group` | string | Model group identifier | |
|
||||||
|
|
||||||
## Response headers from LLM providers
|
## Response headers from LLM providers
|
||||||
|
|
||||||
|
|
BIN
docs/my-website/img/arize.png
Normal file
BIN
docs/my-website/img/arize.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 707 KiB |
|
@ -26,14 +26,6 @@ This release is primarily focused on:
|
||||||
- UI - Credential Management, re-use credentials when adding new models
|
- UI - Credential Management, re-use credentials when adding new models
|
||||||
- UI - Test Connection to LLM Provider before adding a model
|
- UI - Test Connection to LLM Provider before adding a model
|
||||||
|
|
||||||
:::info
|
|
||||||
|
|
||||||
This release will be live on 03/16/2025
|
|
||||||
|
|
||||||
:::
|
|
||||||
|
|
||||||
<!-- <Image img={require('../../img/release_notes/v16311_release.jpg')} /> -->
|
|
||||||
|
|
||||||
## Known Issues
|
## Known Issues
|
||||||
- 🚨 Known issue on Azure OpenAI - We don't recommend upgrading if you use Azure OpenAI. This version failed our Azure OpenAI load test
|
- 🚨 Known issue on Azure OpenAI - We don't recommend upgrading if you use Azure OpenAI. This version failed our Azure OpenAI load test
|
||||||
|
|
||||||
|
|
131
docs/my-website/release_notes/v1.63.14/index.md
Normal file
131
docs/my-website/release_notes/v1.63.14/index.md
Normal file
|
@ -0,0 +1,131 @@
|
||||||
|
---
|
||||||
|
title: v1.63.14-stable
|
||||||
|
slug: v1.63.14-stable
|
||||||
|
date: 2025-03-22T10:00:00
|
||||||
|
authors:
|
||||||
|
- name: Krrish Dholakia
|
||||||
|
title: CEO, LiteLLM
|
||||||
|
url: https://www.linkedin.com/in/krish-d/
|
||||||
|
image_url: https://media.licdn.com/dms/image/v2/D4D03AQGrlsJ3aqpHmQ/profile-displayphoto-shrink_400_400/B4DZSAzgP7HYAg-/0/1737327772964?e=1743638400&v=beta&t=39KOXMUFedvukiWWVPHf3qI45fuQD7lNglICwN31DrI
|
||||||
|
- name: Ishaan Jaffer
|
||||||
|
title: CTO, LiteLLM
|
||||||
|
url: https://www.linkedin.com/in/reffajnaahsi/
|
||||||
|
image_url: https://pbs.twimg.com/profile_images/1613813310264340481/lz54oEiB_400x400.jpg
|
||||||
|
|
||||||
|
tags: [credential management, thinking content, responses api, snowflake]
|
||||||
|
hide_table_of_contents: false
|
||||||
|
---
|
||||||
|
|
||||||
|
import Image from '@theme/IdealImage';
|
||||||
|
|
||||||
|
These are the changes since `v1.63.11-stable`.
|
||||||
|
|
||||||
|
This release brings:
|
||||||
|
- LLM Translation Improvements (MCP Support and Bedrock Application Profiles)
|
||||||
|
- Perf improvements for Usage-based Routing
|
||||||
|
- Streaming guardrail support via websockets
|
||||||
|
|
||||||
|
## Docker Run LiteLLM Proxy
|
||||||
|
|
||||||
|
```
|
||||||
|
docker run
|
||||||
|
-e STORE_MODEL_IN_DB=True
|
||||||
|
-p 4000:4000
|
||||||
|
ghcr.io/berriai/litellm:main-v1.63.14-stable
|
||||||
|
```
|
||||||
|
|
||||||
|
## Demo Instance
|
||||||
|
|
||||||
|
Here's a Demo Instance to test changes:
|
||||||
|
- Instance: https://demo.litellm.ai/
|
||||||
|
- Login Credentials:
|
||||||
|
- Username: admin
|
||||||
|
- Password: sk-1234
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
## New Models / Updated Models
|
||||||
|
|
||||||
|
- Azure gpt-4o - fixed pricing to latest global pricing - [PR](https://github.com/BerriAI/litellm/pull/9361)
|
||||||
|
- O1-Pro - add pricing + model information - [PR](https://github.com/BerriAI/litellm/pull/9397)
|
||||||
|
- Azure AI - mistral 3.1 small pricing added - [PR](https://github.com/BerriAI/litellm/pull/9453)
|
||||||
|
- Azure - gpt-4.5-preview pricing added - [PR](https://github.com/BerriAI/litellm/pull/9453)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
## LLM Translation
|
||||||
|
|
||||||
|
1. **New LLM Features**
|
||||||
|
|
||||||
|
- Bedrock: Support bedrock application inference profiles [Docs](https://docs.litellm.ai/docs/providers/bedrock#bedrock-application-inference-profile)
|
||||||
|
- Infer aws region from bedrock application profile id - (`arn:aws:bedrock:us-east-1:...`)
|
||||||
|
- Ollama - support calling via `/v1/completions` [Get Started](../../docs/providers/ollama#using-ollama-fim-on-v1completions)
|
||||||
|
- Bedrock - support `us.deepseek.r1-v1:0` model name [Docs](../../docs/providers/bedrock#supported-aws-bedrock-models)
|
||||||
|
|
||||||
|
https://github.com/BerriAI/litellm/pull/9363
|
||||||
|
- OpenRouter - `OPENROUTER_API_BASE` env var support [Docs](../../docs/providers/openrouter.md)
|
||||||
|
- Azure - add audio model parameter support - [Docs](../../docs/providers/azure#azure-audio-model)
|
||||||
|
- OpenAI - PDF File support [Docs](../../docs/completion/document_understanding#openai-file-message-type)
|
||||||
|
- OpenAI - o1-pro Responses API streaming support [Docs](../../docs/response_api.md#streaming)
|
||||||
|
- [BETA] MCP - Use MCP Tools with LiteLLM SDK [Docs](../../docs/mcp)
|
||||||
|
|
||||||
|
2. **Bug Fixes**
|
||||||
|
|
||||||
|
- Voyage: prompt token on embedding tracking fix - [PR](https://github.com/BerriAI/litellm/commit/56d3e75b330c3c3862dc6e1c51c1210e48f1068e)
|
||||||
|
- Streaming - Prevents final chunk w/ usage from being ignored (impacted bedrock streaming + cost tracking) - [PR](https://github.com/BerriAI/litellm/commit/dd2c980d5bb9e1a3b125e364c5d841751e67c96d)
|
||||||
|
- Sagemaker - Fix ‘Too little data for declared Content-Length’ error - [PR](https://github.com/BerriAI/litellm/pull/9326)
|
||||||
|
- OpenAI-compatible models - fix issue when calling openai-compatible models w/ custom_llm_provider set - [PR](https://github.com/BerriAI/litellm/pull/9355)
|
||||||
|
- VertexAI - Embedding ‘outputDimensionality’ support - [PR](https://github.com/BerriAI/litellm/commit/437dbe724620675295f298164a076cbd8019d304)
|
||||||
|
- Anthropic - return consistent json response format on streaming/non-streaming - [PR](https://github.com/BerriAI/litellm/pull/9437)
|
||||||
|
|
||||||
|
## Spend Tracking Improvements
|
||||||
|
|
||||||
|
- `litellm_proxy/` - support reading litellm response cost header from proxy, when using client sdk
|
||||||
|
- Reset Budget Job - fix budget reset error on keys/teams/users - [PR](https://github.com/BerriAI/litellm/pull/9329)
|
||||||
|
|
||||||
|
## UI
|
||||||
|
|
||||||
|
1. Users Page
|
||||||
|
- Feature: Control default internal user settings [PR](https://github.com/BerriAI/litellm/pull/9374)
|
||||||
|
2. Icons:
|
||||||
|
- Feature: Replace external "artificialanalysis.ai" icons by local svg [PR](https://github.com/BerriAI/litellm/pull/9374)
|
||||||
|
3. Sign In/Sign Out
|
||||||
|
- Fix: Default login when `default_user_id` user does not exist in DB [PR](https://github.com/BerriAI/litellm/pull/9395)
|
||||||
|
|
||||||
|
|
||||||
|
## Logging Integrations
|
||||||
|
|
||||||
|
- Support post-call guardrails for streaming responses - https://github.com/BerriAI/litellm/commit/4a31b32a88b7729a032e58ab046079d17000087f [NEEDS DOCS]
|
||||||
|
- Arize [Get Started](../../docs/observability/arize_integration)
|
||||||
|
- fix invalid package import [PR](https://github.com/BerriAI/litellm/pull/9338)
|
||||||
|
- migrate to using standardloggingpayload for metadata, ensures spans land successfully [PR](https://github.com/BerriAI/litellm/pull/9338)
|
||||||
|
- fix logging to just log the LLM I/O [PR](https://github.com/BerriAI/litellm/pull/9353)
|
||||||
|
- Dynamic API Key/Space param support [Get Started](../../docs/observability/arize_integration#pass-arize-spacekey-per-request)
|
||||||
|
- StandardLoggingPayload - Log litellm_model_name in payload. Allows knowing what the model sent to API provider was [Get Started](../../docs/proxy/logging_spec#standardlogginghiddenparams)
|
||||||
|
- Prompt Management - Allow building custom prompt management integration [Get Started](../../docs/proxy/custom_prompt_management.md)
|
||||||
|
|
||||||
|
## Performance / Reliability improvements
|
||||||
|
|
||||||
|
- Redis Caching - add 5s default timeout, prevents hanging redis connection from impacting llm calls [PR](https://github.com/BerriAI/litellm/commit/db92956ae33ed4c4e3233d7e1b0c7229817159bf)
|
||||||
|
- Allow disabling all spend updates / writes to DB - patch to allow disabling all spend updates to DB with a flag [PR](https://github.com/BerriAI/litellm/pull/9331)
|
||||||
|
- Azure OpenAI - correctly re-use azure openai client, fixes perf issue from previous Stable release [PR](https://github.com/BerriAI/litellm/commit/f2026ef907c06d94440930917add71314b901413)
|
||||||
|
- Azure OpenAI - uses litellm.ssl_verify on Azure/OpenAI clients [PR](https://github.com/BerriAI/litellm/commit/f2026ef907c06d94440930917add71314b901413)
|
||||||
|
- Usage-based routing - Wildcard model support [Get Started](../../docs/proxy/usage_based_routing#wildcard-model-support)
|
||||||
|
- Usage-based routing - Support batch writing increments to redis - reduces latency to same as ‘simple-shuffle’ [PR](https://github.com/BerriAI/litellm/pull/9357)
|
||||||
|
- Router - show reason for model cooldown on ‘no healthy deployments available error’ [PR](https://github.com/BerriAI/litellm/pull/9438)
|
||||||
|
- Caching - add max value limit to an item in in-memory cache (1MB) - prevents OOM errors on large image url’s being sent through proxy [PR](https://github.com/BerriAI/litellm/pull/9448)
|
||||||
|
|
||||||
|
|
||||||
|
## General Improvements
|
||||||
|
|
||||||
|
- Passthrough Endpoints - support returning api-base on pass-through endpoints Response Headers [Docs](../../docs/proxy/response_headers#litellm-specific-headers)
|
||||||
|
- SSL - support reading ssl security level from env var - Allows user to specify lower security settings [Get Started](../../docs/guides/security_settings)
|
||||||
|
- Credentials - only poll Credentials table when `STORE_MODEL_IN_DB` is True [PR](https://github.com/BerriAI/litellm/pull/9376)
|
||||||
|
- Image URL Handling - new architecture doc on image url handling [Docs](../../docs/proxy/image_handling)
|
||||||
|
- OpenAI - bump to pip install "openai==1.68.2" [PR](https://github.com/BerriAI/litellm/commit/e85e3bc52a9de86ad85c3dbb12d87664ee567a5a)
|
||||||
|
- Gunicorn - security fix - bump gunicorn==23.0.0 [PR](https://github.com/BerriAI/litellm/commit/7e9fc92f5c7fea1e7294171cd3859d55384166eb)
|
||||||
|
|
||||||
|
|
||||||
|
## Complete Git Diff
|
||||||
|
|
||||||
|
[Here's the complete git diff](https://github.com/BerriAI/litellm/compare/v1.63.11-stable...v1.63.14.rc)
|
|
@ -243,6 +243,7 @@ const sidebars = {
|
||||||
"exception_mapping",
|
"exception_mapping",
|
||||||
"completion/provider_specific_params",
|
"completion/provider_specific_params",
|
||||||
"guides/finetuned_models",
|
"guides/finetuned_models",
|
||||||
|
"guides/security_settings",
|
||||||
"completion/audio",
|
"completion/audio",
|
||||||
"completion/web_search",
|
"completion/web_search",
|
||||||
"completion/document_understanding",
|
"completion/document_understanding",
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue