diff --git a/docs/my-website/docs/completion/document_understanding.md b/docs/my-website/docs/completion/document_understanding.md index 6719169aef..c101aa1aef 100644 --- a/docs/my-website/docs/completion/document_understanding.md +++ b/docs/my-website/docs/completion/document_understanding.md @@ -200,3 +200,92 @@ Expected Response + + +## OpenAI 'file' message type + +This is currently only supported for OpenAI models. + +This will be supported for all providers soon. + + + + +```python +import base64 +from litellm import completion + +with open("draconomicon.pdf", "rb") as f: + data = f.read() + +base64_string = base64.b64encode(data).decode("utf-8") + +completion = completion( + model="gpt-4o", + messages=[ + { + "role": "user", + "content": [ + { + "type": "file", + "file": { + "filename": "draconomicon.pdf", + "file_data": f"data:application/pdf;base64,{base64_string}", + } + }, + { + "type": "text", + "text": "What is the first dragon in the book?", + } + ], + }, + ], +) + +print(completion.choices[0].message.content) +``` + + + + + +1. Setup config.yaml + +```yaml +model_list: + - model_name: openai-model + litellm_params: + model: gpt-4o + api_key: os.environ/OPENAI_API_KEY +``` + +2. Start the proxy + +```bash +litellm --config config.yaml +``` + +3. Test it! + +```bash +curl -X POST 'http://0.0.0.0:4000/chat/completions' \ +-H 'Content-Type: application/json' \ +-H 'Authorization: Bearer sk-1234' \ +-d '{ + "model": "openai-model", + "messages": [ + {"role": "user", "content": [ + { + "type": "file", + "file": { + "filename": "draconomicon.pdf", + "file_data": f"data:application/pdf;base64,{base64_string}", + } + } + ]} + ] +}' +``` + + + \ No newline at end of file diff --git a/docs/my-website/docs/guides/security_settings.md b/docs/my-website/docs/guides/security_settings.md new file mode 100644 index 0000000000..4dfeda2d70 --- /dev/null +++ b/docs/my-website/docs/guides/security_settings.md @@ -0,0 +1,66 @@ +import Tabs from '@theme/Tabs'; +import TabItem from '@theme/TabItem'; + +# SSL Security Settings + +If you're in an environment using an older TTS bundle, with an older encryption, follow this guide. + + +LiteLLM uses HTTPX for network requests, unless otherwise specified. + +1. Disable SSL verification + + + + + +```python +import litellm +litellm.ssl_verify = False +``` + + + +```yaml +litellm_settings: + ssl_verify: false +``` + + + + +```bash +export SSL_VERIFY="False" +``` + + + +2. Lower security settings + + + + +```python +import litellm +litellm.ssl_security_level = 1 +litellm.ssl_certificate = "/path/to/certificate.pem" +``` + + + +```yaml +litellm_settings: + ssl_security_level: 1 + ssl_certificate: "/path/to/certificate.pem" +``` + + + +```bash +export SSL_SECURITY_LEVEL="1" +export SSL_CERTIFICATE="/path/to/certificate.pem" +``` + + + + diff --git a/docs/my-website/docs/observability/arize_integration.md b/docs/my-website/docs/observability/arize_integration.md index 1cd36a1111..a654a1b4de 100644 --- a/docs/my-website/docs/observability/arize_integration.md +++ b/docs/my-website/docs/observability/arize_integration.md @@ -1,4 +1,7 @@ + import Image from '@theme/IdealImage'; +import Tabs from '@theme/Tabs'; +import TabItem from '@theme/TabItem'; # Arize AI @@ -11,6 +14,8 @@ https://github.com/BerriAI/litellm ::: + + ## Pre-Requisites @@ -24,7 +29,9 @@ You can also use the instrumentor option instead of the callback, which you can ```python litellm.callbacks = ["arize"] ``` + ```python + import litellm import os @@ -48,7 +55,7 @@ response = litellm.completion( ### Using with LiteLLM Proxy - +1. Setup config.yaml ```yaml model_list: - model_name: gpt-4 @@ -60,13 +67,134 @@ model_list: litellm_settings: callbacks: ["arize"] +general_settings: + master_key: "sk-1234" # can also be set as an environment variable + environment_variables: ARIZE_SPACE_KEY: "d0*****" ARIZE_API_KEY: "141a****" ARIZE_ENDPOINT: "https://otlp.arize.com/v1" # OPTIONAL - your custom arize GRPC api endpoint - ARIZE_HTTP_ENDPOINT: "https://otlp.arize.com/v1" # OPTIONAL - your custom arize HTTP api endpoint. Set either this or ARIZE_ENDPOINT + ARIZE_HTTP_ENDPOINT: "https://otlp.arize.com/v1" # OPTIONAL - your custom arize HTTP api endpoint. Set either this or ARIZE_ENDPOINT or Neither (defaults to https://otlp.arize.com/v1 on grpc) ``` +2. Start the proxy + +```bash +litellm --config config.yaml +``` + +3. Test it! + +```bash +curl -X POST 'http://0.0.0.0:4000/chat/completions' \ +-H 'Content-Type: application/json' \ +-H 'Authorization: Bearer sk-1234' \ +-d '{ "model": "gpt-4", "messages": [{"role": "user", "content": "Hi 👋 - i'm openai"}]}' +``` + +## Pass Arize Space/Key per-request + +Supported parameters: +- `arize_api_key` +- `arize_space_key` + + + + +```python +import litellm +import os + +# LLM API Keys +os.environ['OPENAI_API_KEY']="" + +# set arize as a callback, litellm will send the data to arize +litellm.callbacks = ["arize"] + +# openai call +response = litellm.completion( + model="gpt-3.5-turbo", + messages=[ + {"role": "user", "content": "Hi 👋 - i'm openai"} + ], + arize_api_key=os.getenv("ARIZE_SPACE_2_API_KEY"), + arize_space_key=os.getenv("ARIZE_SPACE_2_KEY"), +) +``` + + + + +1. Setup config.yaml +```yaml +model_list: + - model_name: gpt-4 + litellm_params: + model: openai/fake + api_key: fake-key + api_base: https://exampleopenaiendpoint-production.up.railway.app/ + +litellm_settings: + callbacks: ["arize"] + +general_settings: + master_key: "sk-1234" # can also be set as an environment variable +``` + +2. Start the proxy + +```bash +litellm --config /path/to/config.yaml +``` + +3. Test it! + + + + +```bash +curl -X POST 'http://0.0.0.0:4000/chat/completions' \ +-H 'Content-Type: application/json' \ +-H 'Authorization: Bearer sk-1234' \ +-d '{ + "model": "gpt-4", + "messages": [{"role": "user", "content": "Hi 👋 - i'm openai"}], + "arize_api_key": "ARIZE_SPACE_2_API_KEY", + "arize_space_key": "ARIZE_SPACE_2_KEY" +}' +``` + + + +```python +import openai +client = openai.OpenAI( + api_key="anything", + base_url="http://0.0.0.0:4000" +) + +# request sent to model set on litellm proxy, `litellm --model` +response = client.chat.completions.create( + model="gpt-3.5-turbo", + messages = [ + { + "role": "user", + "content": "this is a test request, write a short poem" + } + ], + extra_body={ + "arize_api_key": "ARIZE_SPACE_2_API_KEY", + "arize_space_key": "ARIZE_SPACE_2_KEY" + } +) + +print(response) +``` + + + + + ## Support & Talk to Founders - [Schedule Demo 👋](https://calendly.com/d/4mp-gd3-k5k/berriai-1-1-onboarding-litellm-hosted-version) diff --git a/docs/my-website/docs/providers/azure.md b/docs/my-website/docs/providers/azure.md index 111738a449..2cfb92546a 100644 --- a/docs/my-website/docs/providers/azure.md +++ b/docs/my-website/docs/providers/azure.md @@ -291,14 +291,15 @@ response = completion( ) ``` -## Azure O1 Models +## O-Series Models -| Model Name | Function Call | -|---------------------|----------------------------------------------------| -| o1-mini | `response = completion(model="azure/", messages=messages)` | -| o1-preview | `response = completion(model="azure/", messages=messages)` | +Azure OpenAI O-Series models are supported on LiteLLM. -Set `litellm.enable_preview_features = True` to use Azure O1 Models with streaming support. +LiteLLM routes any deployment name with `o1` or `o3` in the model name, to the O-Series [transformation](https://github.com/BerriAI/litellm/blob/91ed05df2962b8eee8492374b048d27cc144d08c/litellm/llms/azure/chat/o1_transformation.py#L4) logic. + +To set this explicitly, set `model` to `azure/o_series/`. + +**Automatic Routing** @@ -306,60 +307,112 @@ Set `litellm.enable_preview_features = True` to use Azure O1 Models with streami ```python import litellm -litellm.enable_preview_features = True # 👈 KEY CHANGE - -response = litellm.completion( - model="azure/", - messages=[{"role": "user", "content": "What is the weather like in Boston?"}], - stream=True -) - -for chunk in response: - print(chunk) +litellm.completion(model="azure/my-o3-deployment", messages=[{"role": "user", "content": "Hello, world!"}]) # 👈 Note: 'o3' in the deployment name ``` - + -1. Setup config.yaml ```yaml model_list: - - model_name: o1-mini + - model_name: o3-mini litellm_params: - model: azure/o1-mini - api_base: "os.environ/AZURE_API_BASE" - api_key: "os.environ/AZURE_API_KEY" - api_version: "os.environ/AZURE_API_VERSION" - -litellm_settings: - enable_preview_features: true # 👈 KEY CHANGE + model: azure/o3-model + api_base: os.environ/AZURE_API_BASE + api_key: os.environ/AZURE_API_KEY ``` -2. Start proxy + + + +**Explicit Routing** + + + + +```python +import litellm + +litellm.completion(model="azure/o_series/my-random-deployment-name", messages=[{"role": "user", "content": "Hello, world!"}]) # 👈 Note: 'o_series/' in the deployment name +``` + + + +```yaml +model_list: + - model_name: o3-mini + litellm_params: + model: azure/o_series/my-random-deployment-name + api_base: os.environ/AZURE_API_BASE + api_key: os.environ/AZURE_API_KEY +``` + + + + +## Azure Audio Model + + + + +```python +from litellm import completion +import os + +os.environ["AZURE_API_KEY"] = "" +os.environ["AZURE_API_BASE"] = "" +os.environ["AZURE_API_VERSION"] = "" + +response = completion( + model="azure/azure-openai-4o-audio", + messages=[ + { + "role": "user", + "content": "I want to try out speech to speech" + } + ], + modalities=["text","audio"], + audio={"voice": "alloy", "format": "wav"} +) + +print(response) +``` + + + +1. Setup config.yaml + +```yaml +model_list: + - model_name: azure-openai-4o-audio + litellm_params: + model: azure/azure-openai-4o-audio + api_base: os.environ/AZURE_API_BASE + api_key: os.environ/AZURE_API_KEY + api_version: os.environ/AZURE_API_VERSION +``` + +2. Start proxy ```bash litellm --config /path/to/config.yaml ``` -3. Test it +3. Test it! -```python -import openai -client = openai.OpenAI( - api_key="anything", - base_url="http://0.0.0.0:4000" -) -response = client.chat.completions.create(model="o1-mini", messages = [ - { - "role": "user", - "content": "this is a test request, write a short poem" - } -], -stream=True) - -for chunk in response: - print(chunk) +```bash +curl http://localhost:4000/v1/chat/completions \ + -H "Authorization: Bearer $LITELLM_API_KEY" \ + -H "Content-Type: application/json" \ + -d '{ + "model": "azure-openai-4o-audio", + "messages": [{"role": "user", "content": "I want to try out speech to speech"}], + "modalities": ["text","audio"], + "audio": {"voice": "alloy", "format": "wav"} + }' ``` + + @@ -948,62 +1001,9 @@ Expected Response: {"data":[{"id":"batch_R3V...} ``` -## O-Series Models -Azure OpenAI O-Series models are supported on LiteLLM. -LiteLLM routes any deployment name with `o1` or `o3` in the model name, to the O-Series [transformation](https://github.com/BerriAI/litellm/blob/91ed05df2962b8eee8492374b048d27cc144d08c/litellm/llms/azure/chat/o1_transformation.py#L4) logic. -To set this explicitly, set `model` to `azure/o_series/`. - -**Automatic Routing** - - - - -```python -import litellm - -litellm.completion(model="azure/my-o3-deployment", messages=[{"role": "user", "content": "Hello, world!"}]) # 👈 Note: 'o3' in the deployment name -``` - - - -```yaml -model_list: - - model_name: o3-mini - litellm_params: - model: azure/o3-model - api_base: os.environ/AZURE_API_BASE - api_key: os.environ/AZURE_API_KEY -``` - - - - -**Explicit Routing** - - - - -```python -import litellm - -litellm.completion(model="azure/o_series/my-random-deployment-name", messages=[{"role": "user", "content": "Hello, world!"}]) # 👈 Note: 'o_series/' in the deployment name -``` - - - -```yaml -model_list: - - model_name: o3-mini - litellm_params: - model: azure/o_series/my-random-deployment-name - api_base: os.environ/AZURE_API_BASE - api_key: os.environ/AZURE_API_KEY -``` - - diff --git a/docs/my-website/docs/providers/bedrock.md b/docs/my-website/docs/providers/bedrock.md index 45ad3f0c61..58b44b04f2 100644 --- a/docs/my-website/docs/providers/bedrock.md +++ b/docs/my-website/docs/providers/bedrock.md @@ -1428,10 +1428,14 @@ response = litellm.embedding( ## Supported AWS Bedrock Models + +LiteLLM supports ALL Bedrock models. + Here's an example of using a bedrock model with LiteLLM. For a complete list, refer to the [model cost map](https://github.com/BerriAI/litellm/blob/main/model_prices_and_context_window.json) | Model Name | Command | |----------------------------|------------------------------------------------------------------| +| Deepseek R1 | `completion(model='bedrock/us.deepseek.r1-v1:0', messages=messages)` | `os.environ['AWS_ACCESS_KEY_ID']`, `os.environ['AWS_SECRET_ACCESS_KEY']` | | Anthropic Claude-V3.5 Sonnet | `completion(model='bedrock/anthropic.claude-3-5-sonnet-20240620-v1:0', messages=messages)` | `os.environ['AWS_ACCESS_KEY_ID']`, `os.environ['AWS_SECRET_ACCESS_KEY']` | | Anthropic Claude-V3 sonnet | `completion(model='bedrock/anthropic.claude-3-sonnet-20240229-v1:0', messages=messages)` | `os.environ['AWS_ACCESS_KEY_ID']`, `os.environ['AWS_SECRET_ACCESS_KEY']` | | Anthropic Claude-V3 Haiku | `completion(model='bedrock/anthropic.claude-3-haiku-20240307-v1:0', messages=messages)` | `os.environ['AWS_ACCESS_KEY_ID']`, `os.environ['AWS_SECRET_ACCESS_KEY']` | diff --git a/docs/my-website/docs/providers/ollama.md b/docs/my-website/docs/providers/ollama.md index 848be2beb7..21dfde9982 100644 --- a/docs/my-website/docs/providers/ollama.md +++ b/docs/my-website/docs/providers/ollama.md @@ -202,6 +202,67 @@ curl -X POST 'http://0.0.0.0:4000/chat/completions' \ + +## Using Ollama FIM on `/v1/completions` + +LiteLLM supports calling Ollama's `/api/generate` endpoint on `/v1/completions` requests. + + + + +```python +import litellm +litellm._turn_on_debug() # turn on debug to see the request +from litellm import completion + +response = completion( + model="ollama/llama3.1", + prompt="Hello, world!", + api_base="http://localhost:11434" +) +print(response) +``` + + + +1. Setup config.yaml + +```yaml +model_list: + - model_name: "llama3.1" + litellm_params: + model: "ollama/llama3.1" + api_base: "http://localhost:11434" +``` + +2. Start proxy + +```bash +litellm --config /path/to/config.yaml --detailed_debug + +# RUNNING ON http://0.0.0.0:4000 +``` + +3. Test it! + +```python +from openai import OpenAI + +client = OpenAI( + api_key="anything", # 👈 PROXY KEY (can be anything, if master_key not set) + base_url="http://0.0.0.0:4000" # 👈 PROXY BASE URL +) + +response = client.completions.create( + model="ollama/llama3.1", + prompt="Hello, world!", + api_base="http://localhost:11434" +) +print(response) +``` + + + ## Using ollama `api/chat` In order to send ollama requests to `POST /api/chat` on your ollama server, set the model prefix to `ollama_chat` diff --git a/docs/my-website/docs/providers/openai.md b/docs/my-website/docs/providers/openai.md index 15661f6521..794f3da647 100644 --- a/docs/my-website/docs/providers/openai.md +++ b/docs/my-website/docs/providers/openai.md @@ -228,6 +228,92 @@ response = completion( ``` +## PDF File Parsing + +OpenAI has a new `file` message type that allows you to pass in a PDF file and have it parsed into a structured output. [Read more](https://platform.openai.com/docs/guides/pdf-files?api-mode=chat&lang=python) + + + + +```python +import base64 +from litellm import completion + +with open("draconomicon.pdf", "rb") as f: + data = f.read() + +base64_string = base64.b64encode(data).decode("utf-8") + +completion = completion( + model="gpt-4o", + messages=[ + { + "role": "user", + "content": [ + { + "type": "file", + "file": { + "filename": "draconomicon.pdf", + "file_data": f"data:application/pdf;base64,{base64_string}", + } + }, + { + "type": "text", + "text": "What is the first dragon in the book?", + } + ], + }, + ], +) + +print(completion.choices[0].message.content) +``` + + + + + +1. Setup config.yaml + +```yaml +model_list: + - model_name: openai-model + litellm_params: + model: gpt-4o + api_key: os.environ/OPENAI_API_KEY +``` + +2. Start the proxy + +```bash +litellm --config config.yaml +``` + +3. Test it! + +```bash +curl -X POST 'http://0.0.0.0:4000/chat/completions' \ +-H 'Content-Type: application/json' \ +-H 'Authorization: Bearer sk-1234' \ +-d '{ + "model": "openai-model", + "messages": [ + {"role": "user", "content": [ + { + "type": "file", + "file": { + "filename": "draconomicon.pdf", + "file_data": f"data:application/pdf;base64,{base64_string}", + } + } + ]} + ] +}' +``` + + + + ## OpenAI Fine Tuned Models | Model Name | Function Call | @@ -449,26 +535,6 @@ response = litellm.acompletion( ) ``` -### Using Helicone Proxy with LiteLLM -```python -import os -import litellm -from litellm import completion - -os.environ["OPENAI_API_KEY"] = "" - -# os.environ["OPENAI_API_BASE"] = "" -litellm.api_base = "https://oai.hconeai.com/v1" -litellm.headers = { - "Helicone-Auth": f"Bearer {os.getenv('HELICONE_API_KEY')}", - "Helicone-Cache-Enabled": "true", -} - -messages = [{ "content": "Hello, how are you?","role": "user"}] - -# openai call -response = completion("gpt-3.5-turbo", messages) -``` ### Using OpenAI Proxy with LiteLLM ```python diff --git a/docs/my-website/docs/providers/openrouter.md b/docs/my-website/docs/providers/openrouter.md index 09669c9f9e..58a87f6849 100644 --- a/docs/my-website/docs/providers/openrouter.md +++ b/docs/my-website/docs/providers/openrouter.md @@ -10,9 +10,11 @@ LiteLLM supports all the text / chat / vision models from [OpenRouter](https://o import os from litellm import completion os.environ["OPENROUTER_API_KEY"] = "" +os.environ["OPENROUTER_API_BASE"] = "" # [OPTIONAL] defaults to https://openrouter.ai/api/v1 -os.environ["OR_SITE_URL"] = "" # optional -os.environ["OR_APP_NAME"] = "" # optional + +os.environ["OR_SITE_URL"] = "" # [OPTIONAL] +os.environ["OR_APP_NAME"] = "" # [OPTIONAL] response = completion( model="openrouter/google/palm-2-chat-bison", diff --git a/docs/my-website/docs/proxy/config_settings.md b/docs/my-website/docs/proxy/config_settings.md index 71ea045fb1..0e85223d45 100644 --- a/docs/my-website/docs/proxy/config_settings.md +++ b/docs/my-website/docs/proxy/config_settings.md @@ -147,6 +147,7 @@ general_settings: |------|------|-------------| | completion_model | string | The default model to use for completions when `model` is not specified in the request | | disable_spend_logs | boolean | If true, turns off writing each transaction to the database | +| disable_spend_updates | boolean | If true, turns off all spend updates to the DB. Including key/user/team spend updates. | | disable_master_key_return | boolean | If true, turns off returning master key on UI. (checked on '/user/info' endpoint) | | disable_retry_on_max_parallel_request_limit_error | boolean | If true, turns off retries when max parallel request limit is reached | | disable_reset_budget | boolean | If true, turns off reset budget scheduled task | diff --git a/docs/my-website/docs/proxy/logging_spec.md b/docs/my-website/docs/proxy/logging_spec.md index 7da937e565..b314dd350b 100644 --- a/docs/my-website/docs/proxy/logging_spec.md +++ b/docs/my-website/docs/proxy/logging_spec.md @@ -79,6 +79,7 @@ Inherits from `StandardLoggingUserAPIKeyMetadata` and adds: | `response_cost` | `Optional[str]` | Optional response cost | | `additional_headers` | `Optional[StandardLoggingAdditionalHeaders]` | Additional headers | | `batch_models` | `Optional[List[str]]` | Only set for Batches API. Lists the models used for cost calculation | +| `litellm_model_name` | `Optional[str]` | Model name sent in request | ## StandardLoggingModelInformation diff --git a/docs/my-website/docs/proxy/response_headers.md b/docs/my-website/docs/proxy/response_headers.md index b07f82d780..32f09fab42 100644 --- a/docs/my-website/docs/proxy/response_headers.md +++ b/docs/my-website/docs/proxy/response_headers.md @@ -43,19 +43,19 @@ These headers are useful for clients to understand the current rate limit status | `x-litellm-max-fallbacks` | int | Maximum number of fallback attempts allowed | ## Cost Tracking Headers -| Header | Type | Description | -|--------|------|-------------| -| `x-litellm-response-cost` | float | Cost of the API call | -| `x-litellm-key-spend` | float | Total spend for the API key | +| Header | Type | Description | Available on Pass-Through Endpoints | +|--------|------|-------------|-------------| +| `x-litellm-response-cost` | float | Cost of the API call | | +| `x-litellm-key-spend` | float | Total spend for the API key | ✅ | ## LiteLLM Specific Headers -| Header | Type | Description | -|--------|------|-------------| -| `x-litellm-call-id` | string | Unique identifier for the API call | -| `x-litellm-model-id` | string | Unique identifier for the model used | -| `x-litellm-model-api-base` | string | Base URL of the API endpoint | -| `x-litellm-version` | string | Version of LiteLLM being used | -| `x-litellm-model-group` | string | Model group identifier | +| Header | Type | Description | Available on Pass-Through Endpoints | +|--------|------|-------------|-------------| +| `x-litellm-call-id` | string | Unique identifier for the API call | ✅ | +| `x-litellm-model-id` | string | Unique identifier for the model used | | +| `x-litellm-model-api-base` | string | Base URL of the API endpoint | ✅ | +| `x-litellm-version` | string | Version of LiteLLM being used | | +| `x-litellm-model-group` | string | Model group identifier | | ## Response headers from LLM providers diff --git a/docs/my-website/img/arize.png b/docs/my-website/img/arize.png new file mode 100644 index 0000000000..45d6dacda9 Binary files /dev/null and b/docs/my-website/img/arize.png differ diff --git a/docs/my-website/release_notes/v1.63.11-stable/index.md b/docs/my-website/release_notes/v1.63.11-stable/index.md index f502420507..91336fc681 100644 --- a/docs/my-website/release_notes/v1.63.11-stable/index.md +++ b/docs/my-website/release_notes/v1.63.11-stable/index.md @@ -26,14 +26,6 @@ This release is primarily focused on: - UI - Credential Management, re-use credentials when adding new models - UI - Test Connection to LLM Provider before adding a model -:::info - -This release will be live on 03/16/2025 - -::: - - - ## Known Issues - 🚨 Known issue on Azure OpenAI - We don't recommend upgrading if you use Azure OpenAI. This version failed our Azure OpenAI load test diff --git a/docs/my-website/release_notes/v1.63.14/index.md b/docs/my-website/release_notes/v1.63.14/index.md new file mode 100644 index 0000000000..f30231604a --- /dev/null +++ b/docs/my-website/release_notes/v1.63.14/index.md @@ -0,0 +1,131 @@ +--- +title: v1.63.14-stable +slug: v1.63.14-stable +date: 2025-03-22T10:00:00 +authors: + - name: Krrish Dholakia + title: CEO, LiteLLM + url: https://www.linkedin.com/in/krish-d/ + image_url: https://media.licdn.com/dms/image/v2/D4D03AQGrlsJ3aqpHmQ/profile-displayphoto-shrink_400_400/B4DZSAzgP7HYAg-/0/1737327772964?e=1743638400&v=beta&t=39KOXMUFedvukiWWVPHf3qI45fuQD7lNglICwN31DrI + - name: Ishaan Jaffer + title: CTO, LiteLLM + url: https://www.linkedin.com/in/reffajnaahsi/ + image_url: https://pbs.twimg.com/profile_images/1613813310264340481/lz54oEiB_400x400.jpg + +tags: [credential management, thinking content, responses api, snowflake] +hide_table_of_contents: false +--- + +import Image from '@theme/IdealImage'; + +These are the changes since `v1.63.11-stable`. + +This release brings: +- LLM Translation Improvements (MCP Support and Bedrock Application Profiles) +- Perf improvements for Usage-based Routing +- Streaming guardrail support via websockets + +## Docker Run LiteLLM Proxy + +``` +docker run +-e STORE_MODEL_IN_DB=True +-p 4000:4000 +ghcr.io/berriai/litellm:main-v1.63.14-stable +``` + +## Demo Instance + +Here's a Demo Instance to test changes: +- Instance: https://demo.litellm.ai/ +- Login Credentials: + - Username: admin + - Password: sk-1234 + + + +## New Models / Updated Models + +- Azure gpt-4o - fixed pricing to latest global pricing - [PR](https://github.com/BerriAI/litellm/pull/9361) +- O1-Pro - add pricing + model information - [PR](https://github.com/BerriAI/litellm/pull/9397) +- Azure AI - mistral 3.1 small pricing added - [PR](https://github.com/BerriAI/litellm/pull/9453) +- Azure - gpt-4.5-preview pricing added - [PR](https://github.com/BerriAI/litellm/pull/9453) + + + +## LLM Translation + +1. **New LLM Features** + +- Bedrock: Support bedrock application inference profiles [Docs](https://docs.litellm.ai/docs/providers/bedrock#bedrock-application-inference-profile) + - Infer aws region from bedrock application profile id - (`arn:aws:bedrock:us-east-1:...`) +- Ollama - support calling via `/v1/completions` [Get Started](../../docs/providers/ollama#using-ollama-fim-on-v1completions) +- Bedrock - support `us.deepseek.r1-v1:0` model name [Docs](../../docs/providers/bedrock#supported-aws-bedrock-models) + +https://github.com/BerriAI/litellm/pull/9363 +- OpenRouter - `OPENROUTER_API_BASE` env var support [Docs](../../docs/providers/openrouter.md) +- Azure - add audio model parameter support - [Docs](../../docs/providers/azure#azure-audio-model) +- OpenAI - PDF File support [Docs](../../docs/completion/document_understanding#openai-file-message-type) +- OpenAI - o1-pro Responses API streaming support [Docs](../../docs/response_api.md#streaming) +- [BETA] MCP - Use MCP Tools with LiteLLM SDK [Docs](../../docs/mcp) + +2. **Bug Fixes** + +- Voyage: prompt token on embedding tracking fix - [PR](https://github.com/BerriAI/litellm/commit/56d3e75b330c3c3862dc6e1c51c1210e48f1068e) +- Streaming - Prevents final chunk w/ usage from being ignored (impacted bedrock streaming + cost tracking) - [PR](https://github.com/BerriAI/litellm/commit/dd2c980d5bb9e1a3b125e364c5d841751e67c96d) +- Sagemaker - Fix ‘Too little data for declared Content-Length’ error - [PR](https://github.com/BerriAI/litellm/pull/9326) +- OpenAI-compatible models - fix issue when calling openai-compatible models w/ custom_llm_provider set - [PR](https://github.com/BerriAI/litellm/pull/9355) +- VertexAI - Embedding ‘outputDimensionality’ support - [PR](https://github.com/BerriAI/litellm/commit/437dbe724620675295f298164a076cbd8019d304) +- Anthropic - return consistent json response format on streaming/non-streaming - [PR](https://github.com/BerriAI/litellm/pull/9437) + +## Spend Tracking Improvements + +- `litellm_proxy/` - support reading litellm response cost header from proxy, when using client sdk +- Reset Budget Job - fix budget reset error on keys/teams/users - [PR](https://github.com/BerriAI/litellm/pull/9329) + +## UI + +1. Users Page + - Feature: Control default internal user settings [PR](https://github.com/BerriAI/litellm/pull/9374) +2. Icons: + - Feature: Replace external "artificialanalysis.ai" icons by local svg [PR](https://github.com/BerriAI/litellm/pull/9374) +3. Sign In/Sign Out + - Fix: Default login when `default_user_id` user does not exist in DB [PR](https://github.com/BerriAI/litellm/pull/9395) + + +## Logging Integrations + +- Support post-call guardrails for streaming responses - https://github.com/BerriAI/litellm/commit/4a31b32a88b7729a032e58ab046079d17000087f [NEEDS DOCS] +- Arize [Get Started](../../docs/observability/arize_integration) + - fix invalid package import [PR](https://github.com/BerriAI/litellm/pull/9338) + - migrate to using standardloggingpayload for metadata, ensures spans land successfully [PR](https://github.com/BerriAI/litellm/pull/9338) + - fix logging to just log the LLM I/O [PR](https://github.com/BerriAI/litellm/pull/9353) + - Dynamic API Key/Space param support [Get Started](../../docs/observability/arize_integration#pass-arize-spacekey-per-request) +- StandardLoggingPayload - Log litellm_model_name in payload. Allows knowing what the model sent to API provider was [Get Started](../../docs/proxy/logging_spec#standardlogginghiddenparams) +- Prompt Management - Allow building custom prompt management integration [Get Started](../../docs/proxy/custom_prompt_management.md) + +## Performance / Reliability improvements + +- Redis Caching - add 5s default timeout, prevents hanging redis connection from impacting llm calls [PR](https://github.com/BerriAI/litellm/commit/db92956ae33ed4c4e3233d7e1b0c7229817159bf) +- Allow disabling all spend updates / writes to DB - patch to allow disabling all spend updates to DB with a flag [PR](https://github.com/BerriAI/litellm/pull/9331) +- Azure OpenAI - correctly re-use azure openai client, fixes perf issue from previous Stable release [PR](https://github.com/BerriAI/litellm/commit/f2026ef907c06d94440930917add71314b901413) +- Azure OpenAI - uses litellm.ssl_verify on Azure/OpenAI clients [PR](https://github.com/BerriAI/litellm/commit/f2026ef907c06d94440930917add71314b901413) +- Usage-based routing - Wildcard model support [Get Started](../../docs/proxy/usage_based_routing#wildcard-model-support) +- Usage-based routing - Support batch writing increments to redis - reduces latency to same as ‘simple-shuffle’ [PR](https://github.com/BerriAI/litellm/pull/9357) +- Router - show reason for model cooldown on ‘no healthy deployments available error’ [PR](https://github.com/BerriAI/litellm/pull/9438) +- Caching - add max value limit to an item in in-memory cache (1MB) - prevents OOM errors on large image url’s being sent through proxy [PR](https://github.com/BerriAI/litellm/pull/9448) + + +## General Improvements + +- Passthrough Endpoints - support returning api-base on pass-through endpoints Response Headers [Docs](../../docs/proxy/response_headers#litellm-specific-headers) +- SSL - support reading ssl security level from env var - Allows user to specify lower security settings [Get Started](../../docs/guides/security_settings) +- Credentials - only poll Credentials table when `STORE_MODEL_IN_DB` is True [PR](https://github.com/BerriAI/litellm/pull/9376) +- Image URL Handling - new architecture doc on image url handling [Docs](../../docs/proxy/image_handling) +- OpenAI - bump to pip install "openai==1.68.2" [PR](https://github.com/BerriAI/litellm/commit/e85e3bc52a9de86ad85c3dbb12d87664ee567a5a) +- Gunicorn - security fix - bump gunicorn==23.0.0 [PR](https://github.com/BerriAI/litellm/commit/7e9fc92f5c7fea1e7294171cd3859d55384166eb) + + +## Complete Git Diff + +[Here's the complete git diff](https://github.com/BerriAI/litellm/compare/v1.63.11-stable...v1.63.14.rc) \ No newline at end of file diff --git a/docs/my-website/sidebars.js b/docs/my-website/sidebars.js index b0f6db7c44..5aeee715d1 100644 --- a/docs/my-website/sidebars.js +++ b/docs/my-website/sidebars.js @@ -243,6 +243,7 @@ const sidebars = { "exception_mapping", "completion/provider_specific_params", "guides/finetuned_models", + "guides/security_settings", "completion/audio", "completion/web_search", "completion/document_understanding",