diff --git a/docs/my-website/docs/providers/azure.md b/docs/my-website/docs/providers/azure.md index 2cfb92546a..dfb7761822 100644 --- a/docs/my-website/docs/providers/azure.md +++ b/docs/my-website/docs/providers/azure.md @@ -1076,32 +1076,24 @@ print(response) ``` -### Parallel Function calling +### Tool Calling / Function Calling + See a detailed walthrough of parallel function calling with litellm [here](https://docs.litellm.ai/docs/completion/function_call) + + + + + ```python # set Azure env variables import os +import litellm +import json + os.environ['AZURE_API_KEY'] = "" # litellm reads AZURE_API_KEY from .env and sends the request os.environ['AZURE_API_BASE'] = "https://openai-gpt-4-test-v-1.openai.azure.com/" os.environ['AZURE_API_VERSION'] = "2023-07-01-preview" -import litellm -import json -# Example dummy function hard coded to return the same weather -# In production, this could be your backend API or an external API -def get_current_weather(location, unit="fahrenheit"): - """Get the current weather in a given location""" - if "tokyo" in location.lower(): - return json.dumps({"location": "Tokyo", "temperature": "10", "unit": "celsius"}) - elif "san francisco" in location.lower(): - return json.dumps({"location": "San Francisco", "temperature": "72", "unit": "fahrenheit"}) - elif "paris" in location.lower(): - return json.dumps({"location": "Paris", "temperature": "22", "unit": "celsius"}) - else: - return json.dumps({"location": location, "temperature": "unknown"}) - -## Step 1: send the conversation and available functions to the model -messages = [{"role": "user", "content": "What's the weather like in San Francisco, Tokyo, and Paris?"}] tools = [ { "type": "function", @@ -1125,7 +1117,7 @@ tools = [ response = litellm.completion( model="azure/chatgpt-functioncalling", # model = azure/ - messages=messages, + messages=[{"role": "user", "content": "What's the weather like in San Francisco, Tokyo, and Paris?"}], tools=tools, tool_choice="auto", # auto is default, but we'll be explicit ) @@ -1134,8 +1126,49 @@ response_message = response.choices[0].message tool_calls = response.choices[0].message.tool_calls print("\nTool Choice:\n", tool_calls) ``` + + + +1. Setup config.yaml + +```yaml +model_list: + - model_name: azure-gpt-3.5 + litellm_params: + model: azure/chatgpt-functioncalling + api_base: os.environ/AZURE_API_BASE + api_key: os.environ/AZURE_API_KEY + api_version: "2023-07-01-preview" +``` + +2. Start proxy + +```bash +litellm --config config.yaml +``` + +3. Test it + +```bash +curl -L -X POST 'http://localhost:4000/v1/chat/completions' \ +-H 'Content-Type: application/json' \ +-H 'Authorization: Bearer sk-1234' \ +-d '{ + "model": "azure-gpt-3.5", + "messages": [ + { + "role": "user", + "content": "Hey, how'\''s it going? Thinking long and hard before replying - what is the meaning of the world and life itself" + } + ] +}' +``` + + + + ### Spend Tracking for Azure OpenAI Models (PROXY) Set base model for cost tracking azure image-gen call diff --git a/docs/my-website/docs/providers/databricks.md b/docs/my-website/docs/providers/databricks.md index 395a544db4..8631cbfdad 100644 --- a/docs/my-website/docs/providers/databricks.md +++ b/docs/my-website/docs/providers/databricks.md @@ -1,7 +1,7 @@ import Tabs from '@theme/Tabs'; import TabItem from '@theme/TabItem'; -# 🆕 Databricks +# Databricks LiteLLM supports all models on Databricks @@ -154,7 +154,205 @@ response = completion( temperature: 0.5 ``` -## Passings Databricks specific params - 'instruction' + +## Usage - Thinking / `reasoning_content` + +LiteLLM translates OpenAI's `reasoning_effort` to Anthropic's `thinking` parameter. [Code](https://github.com/BerriAI/litellm/blob/23051d89dd3611a81617d84277059cd88b2df511/litellm/llms/anthropic/chat/transformation.py#L298) + +| reasoning_effort | thinking | +| ---------------- | -------- | +| "low" | "budget_tokens": 1024 | +| "medium" | "budget_tokens": 2048 | +| "high" | "budget_tokens": 4096 | + + +Known Limitations: +- Support for passing thinking blocks back to Claude [Issue](https://github.com/BerriAI/litellm/issues/9790) + + + + + +```python +from litellm import completion +import os + +# set ENV variables (can also be passed in to .completion() - e.g. `api_base`, `api_key`) +os.environ["DATABRICKS_API_KEY"] = "databricks key" +os.environ["DATABRICKS_API_BASE"] = "databricks base url" + +resp = completion( + model="databricks/databricks-claude-3-7-sonnet", + messages=[{"role": "user", "content": "What is the capital of France?"}], + reasoning_effort="low", +) + +``` + + + + + +1. Setup config.yaml + +```yaml +- model_name: claude-3-7-sonnet + litellm_params: + model: databricks/databricks-claude-3-7-sonnet + api_key: os.environ/DATABRICKS_API_KEY + api_base: os.environ/DATABRICKS_API_BASE +``` + +2. Start proxy + +```bash +litellm --config /path/to/config.yaml +``` + +3. Test it! + +```bash +curl http://0.0.0.0:4000/v1/chat/completions \ + -H "Content-Type: application/json" \ + -H "Authorization: Bearer " \ + -d '{ + "model": "claude-3-7-sonnet", + "messages": [{"role": "user", "content": "What is the capital of France?"}], + "reasoning_effort": "low" + }' +``` + + + + + +**Expected Response** + +```python +ModelResponse( + id='chatcmpl-c542d76d-f675-4e87-8e5f-05855f5d0f5e', + created=1740470510, + model='claude-3-7-sonnet-20250219', + object='chat.completion', + system_fingerprint=None, + choices=[ + Choices( + finish_reason='stop', + index=0, + message=Message( + content="The capital of France is Paris.", + role='assistant', + tool_calls=None, + function_call=None, + provider_specific_fields={ + 'citations': None, + 'thinking_blocks': [ + { + 'type': 'thinking', + 'thinking': 'The capital of France is Paris. This is a very straightforward factual question.', + 'signature': 'EuYBCkQYAiJAy6...' + } + ] + } + ), + thinking_blocks=[ + { + 'type': 'thinking', + 'thinking': 'The capital of France is Paris. This is a very straightforward factual question.', + 'signature': 'EuYBCkQYAiJAy6AGB...' + } + ], + reasoning_content='The capital of France is Paris. This is a very straightforward factual question.' + ) + ], + usage=Usage( + completion_tokens=68, + prompt_tokens=42, + total_tokens=110, + completion_tokens_details=None, + prompt_tokens_details=PromptTokensDetailsWrapper( + audio_tokens=None, + cached_tokens=0, + text_tokens=None, + image_tokens=None + ), + cache_creation_input_tokens=0, + cache_read_input_tokens=0 + ) +) +``` + +### Pass `thinking` to Anthropic models + +You can also pass the `thinking` parameter to Anthropic models. + + +You can also pass the `thinking` parameter to Anthropic models. + + + + +```python +from litellm import completion +import os + +# set ENV variables (can also be passed in to .completion() - e.g. `api_base`, `api_key`) +os.environ["DATABRICKS_API_KEY"] = "databricks key" +os.environ["DATABRICKS_API_BASE"] = "databricks base url" + +response = litellm.completion( + model="databricks/databricks-claude-3-7-sonnet", + messages=[{"role": "user", "content": "What is the capital of France?"}], + thinking={"type": "enabled", "budget_tokens": 1024}, +) +``` + + + + +```bash +curl http://0.0.0.0:4000/v1/chat/completions \ + -H "Content-Type: application/json" \ + -H "Authorization: Bearer $LITELLM_KEY" \ + -d '{ + "model": "databricks/databricks-claude-3-7-sonnet", + "messages": [{"role": "user", "content": "What is the capital of France?"}], + "thinking": {"type": "enabled", "budget_tokens": 1024} + }' +``` + + + + + + + + +## Supported Databricks Chat Completion Models + +:::tip + +**We support ALL Databricks models, just set `model=databricks/` as a prefix when sending litellm requests** + +::: + + +| Model Name | Command | +|----------------------------|------------------------------------------------------------------| +| databricks/databricks-claude-3-7-sonnet | `completion(model='databricks/databricks/databricks-claude-3-7-sonnet', messages=messages)` | +| databricks-meta-llama-3-1-70b-instruct | `completion(model='databricks/databricks-meta-llama-3-1-70b-instruct', messages=messages)` | +| databricks-meta-llama-3-1-405b-instruct | `completion(model='databricks/databricks-meta-llama-3-1-405b-instruct', messages=messages)` | +| databricks-dbrx-instruct | `completion(model='databricks/databricks-dbrx-instruct', messages=messages)` | +| databricks-meta-llama-3-70b-instruct | `completion(model='databricks/databricks-meta-llama-3-70b-instruct', messages=messages)` | +| databricks-llama-2-70b-chat | `completion(model='databricks/databricks-llama-2-70b-chat', messages=messages)` | +| databricks-mixtral-8x7b-instruct | `completion(model='databricks/databricks-mixtral-8x7b-instruct', messages=messages)` | +| databricks-mpt-30b-instruct | `completion(model='databricks/databricks-mpt-30b-instruct', messages=messages)` | +| databricks-mpt-7b-instruct | `completion(model='databricks/databricks-mpt-7b-instruct', messages=messages)` | + + +## Embedding Models + +### Passing Databricks specific params - 'instruction' For embedding models, databricks lets you pass in an additional param 'instruction'. [Full Spec](https://github.com/BerriAI/litellm/blob/43353c28b341df0d9992b45c6ce464222ebd7984/litellm/llms/databricks.py#L164) @@ -187,27 +385,6 @@ response = litellm.embedding( instruction: "Represent this sentence for searching relevant passages:" ``` - -## Supported Databricks Chat Completion Models - -:::tip - -**We support ALL Databricks models, just set `model=databricks/` as a prefix when sending litellm requests** - -::: - - -| Model Name | Command | -|----------------------------|------------------------------------------------------------------| -| databricks-meta-llama-3-1-70b-instruct | `completion(model='databricks/databricks-meta-llama-3-1-70b-instruct', messages=messages)` | -| databricks-meta-llama-3-1-405b-instruct | `completion(model='databricks/databricks-meta-llama-3-1-405b-instruct', messages=messages)` | -| databricks-dbrx-instruct | `completion(model='databricks/databricks-dbrx-instruct', messages=messages)` | -| databricks-meta-llama-3-70b-instruct | `completion(model='databricks/databricks-meta-llama-3-70b-instruct', messages=messages)` | -| databricks-llama-2-70b-chat | `completion(model='databricks/databricks-llama-2-70b-chat', messages=messages)` | -| databricks-mixtral-8x7b-instruct | `completion(model='databricks/databricks-mixtral-8x7b-instruct', messages=messages)` | -| databricks-mpt-30b-instruct | `completion(model='databricks/databricks-mpt-30b-instruct', messages=messages)` | -| databricks-mpt-7b-instruct | `completion(model='databricks/databricks-mpt-7b-instruct', messages=messages)` | - ## Supported Databricks Embedding Models :::tip diff --git a/docs/my-website/docs/providers/gemini.md b/docs/my-website/docs/providers/gemini.md index 67eca21c65..42783286f1 100644 --- a/docs/my-website/docs/providers/gemini.md +++ b/docs/my-website/docs/providers/gemini.md @@ -887,3 +887,54 @@ response = await client.chat.completions.create( + +## Image Generation + + + + +```python +from litellm import completion + +response = completion( + model="gemini/gemini-2.0-flash-exp-image-generation", + messages=[{"role": "user", "content": "Generate an image of a cat"}], + modalities=["image", "text"], +) +assert response.choices[0].message.content is not None # ".." +``` + + + +1. Setup config.yaml + +```yaml +model_list: + - model_name: gemini-2.0-flash-exp-image-generation + litellm_params: + model: gemini/gemini-2.0-flash-exp-image-generation + api_key: os.environ/GEMINI_API_KEY +``` + +2. Start proxy + +```bash +litellm --config /path/to/config.yaml +``` + +3. Test it! + +```bash +curl -L -X POST 'http://localhost:4000/v1/chat/completions' \ +-H 'Content-Type: application/json' \ +-H 'Authorization: Bearer sk-1234' \ +-d '{ + "model": "gemini-2.0-flash-exp-image-generation", + "messages": [{"role": "user", "content": "Generate an image of a cat"}], + "modalities": ["image", "text"] +}' +``` + + + + diff --git a/docs/my-website/docs/providers/google_ai_studio/files.md b/docs/my-website/docs/providers/google_ai_studio/files.md new file mode 100644 index 0000000000..500f1d5718 --- /dev/null +++ b/docs/my-website/docs/providers/google_ai_studio/files.md @@ -0,0 +1,161 @@ +import Tabs from '@theme/Tabs'; +import TabItem from '@theme/TabItem'; + +# [BETA] Google AI Studio (Gemini) Files API + +Use this to upload files to Google AI Studio (Gemini). + +Useful to pass in large media files to Gemini's `/generateContent` endpoint. + +| Action | Supported | +|----------|-----------| +| `create` | Yes | +| `delete` | No | +| `retrieve` | No | +| `list` | No | + +## Usage + + + + +```python +import base64 +import requests +from litellm import completion, create_file +import os + + +### UPLOAD FILE ### + +# Fetch the audio file and convert it to a base64 encoded string +url = "https://cdn.openai.com/API/docs/audio/alloy.wav" +response = requests.get(url) +response.raise_for_status() +wav_data = response.content +encoded_string = base64.b64encode(wav_data).decode('utf-8') + + +file = create_file( + file=wav_data, + purpose="user_data", + extra_body={"custom_llm_provider": "gemini"}, + api_key=os.getenv("GEMINI_API_KEY"), +) + +print(f"file: {file}") + +assert file is not None + + +### GENERATE CONTENT ### +completion = completion( + model="gemini-2.0-flash", + messages=[ + { + "role": "user", + "content": [ + { + "type": "text", + "text": "What is in this recording?" + }, + { + "type": "file", + "file": { + "file_id": file.id, + "filename": "my-test-name", + "format": "audio/wav" + } + } + ] + }, + ] +) + +print(completion.choices[0].message) +``` + + + + +1. Setup config.yaml + +```yaml +model_list: + - model_name: "gemini-2.0-flash" + litellm_params: + model: gemini/gemini-2.0-flash + api_key: os.environ/GEMINI_API_KEY +``` + +2. Start proxy + +```bash +litellm --config config.yaml +``` + +3. Test it + +```python +import base64 +import requests +from openai import OpenAI + +client = OpenAI( + base_url="http://0.0.0.0:4000", + api_key="sk-1234" +) + +# Fetch the audio file and convert it to a base64 encoded string +url = "https://cdn.openai.com/API/docs/audio/alloy.wav" +response = requests.get(url) +response.raise_for_status() +wav_data = response.content +encoded_string = base64.b64encode(wav_data).decode('utf-8') + + +file = client.files.create( + file=wav_data, + purpose="user_data", + extra_body={"target_model_names": "gemini-2.0-flash"} +) + +print(f"file: {file}") + +assert file is not None + +completion = client.chat.completions.create( + model="gemini-2.0-flash", + modalities=["text", "audio"], + audio={"voice": "alloy", "format": "wav"}, + messages=[ + { + "role": "user", + "content": [ + { + "type": "text", + "text": "What is in this recording?" + }, + { + "type": "file", + "file": { + "file_id": file.id, + "filename": "my-test-name", + "format": "audio/wav" + } + } + ] + }, + ], + extra_body={"drop_params": True} +) + +print(completion.choices[0].message) +``` + + + + + + + diff --git a/docs/my-website/docs/proxy/cost_tracking.md b/docs/my-website/docs/proxy/cost_tracking.md index 7f90273c39..784de1da81 100644 --- a/docs/my-website/docs/proxy/cost_tracking.md +++ b/docs/my-website/docs/proxy/cost_tracking.md @@ -6,6 +6,8 @@ import Image from '@theme/IdealImage'; Track spend for keys, users, and teams across 100+ LLMs. +LiteLLM automatically tracks spend for all known models. See our [model cost map](https://github.com/BerriAI/litellm/blob/main/model_prices_and_context_window.json) + ### How to Track Spend with LiteLLM **Step 1** @@ -35,10 +37,10 @@ response = client.chat.completions.create( "content": "this is a test request, write a short poem" } ], - user="palantir", - extra_body={ + user="palantir", # OPTIONAL: pass user to track spend by user + extra_body={ "metadata": { - "tags": ["jobID:214590dsff09fds", "taskName:run_page_classification"] + "tags": ["jobID:214590dsff09fds", "taskName:run_page_classification"] # ENTERPRISE: pass tags to track spend by tags } } ) @@ -63,9 +65,9 @@ curl --location 'http://0.0.0.0:4000/chat/completions' \ "content": "what llm are you" } ], - "user": "palantir", + "user": "palantir", # OPTIONAL: pass user to track spend by user "metadata": { - "tags": ["jobID:214590dsff09fds", "taskName:run_page_classification"] + "tags": ["jobID:214590dsff09fds", "taskName:run_page_classification"] # ENTERPRISE: pass tags to track spend by tags } }' ``` @@ -90,7 +92,7 @@ chat = ChatOpenAI( user="palantir", extra_body={ "metadata": { - "tags": ["jobID:214590dsff09fds", "taskName:run_page_classification"] + "tags": ["jobID:214590dsff09fds", "taskName:run_page_classification"] # ENTERPRISE: pass tags to track spend by tags } } ) @@ -150,8 +152,134 @@ Navigate to the Usage Tab on the LiteLLM UI (found on https://your-proxy-endpoin -## ✨ (Enterprise) API Endpoints to get Spend -### Getting Spend Reports - To Charge Other Teams, Customers, Users +### Allowing Non-Proxy Admins to access `/spend` endpoints + +Use this when you want non-proxy admins to access `/spend` endpoints + +:::info + +Schedule a [meeting with us to get your Enterprise License](https://calendly.com/d/4mp-gd3-k5k/litellm-1-1-onboarding-chat) + +::: + +##### Create Key +Create Key with with `permissions={"get_spend_routes": true}` +```shell +curl --location 'http://0.0.0.0:4000/key/generate' \ + --header 'Authorization: Bearer sk-1234' \ + --header 'Content-Type: application/json' \ + --data '{ + "permissions": {"get_spend_routes": true} + }' +``` + +##### Use generated key on `/spend` endpoints + +Access spend Routes with newly generate keys +```shell +curl -X GET 'http://localhost:4000/global/spend/report?start_date=2024-04-01&end_date=2024-06-30' \ + -H 'Authorization: Bearer sk-H16BKvrSNConSsBYLGc_7A' +``` + + + +#### Reset Team, API Key Spend - MASTER KEY ONLY + +Use `/global/spend/reset` if you want to: +- Reset the Spend for all API Keys, Teams. The `spend` for ALL Teams and Keys in `LiteLLM_TeamTable` and `LiteLLM_VerificationToken` will be set to `spend=0` + +- LiteLLM will maintain all the logs in `LiteLLMSpendLogs` for Auditing Purposes + +##### Request +Only the `LITELLM_MASTER_KEY` you set can access this route +```shell +curl -X POST \ + 'http://localhost:4000/global/spend/reset' \ + -H 'Authorization: Bearer sk-1234' \ + -H 'Content-Type: application/json' +``` + +##### Expected Responses + +```shell +{"message":"Spend for all API Keys and Teams reset successfully","status":"success"} +``` + + +## Set 'base_model' for Cost Tracking (e.g. Azure deployments) + +**Problem**: Azure returns `gpt-4` in the response when `azure/gpt-4-1106-preview` is used. This leads to inaccurate cost tracking + +**Solution** ✅ : Set `base_model` on your config so litellm uses the correct model for calculating azure cost + +Get the base model name from [here](https://github.com/BerriAI/litellm/blob/main/model_prices_and_context_window.json) + +Example config with `base_model` +```yaml +model_list: + - model_name: azure-gpt-3.5 + litellm_params: + model: azure/chatgpt-v-2 + api_base: os.environ/AZURE_API_BASE + api_key: os.environ/AZURE_API_KEY + api_version: "2023-07-01-preview" + model_info: + base_model: azure/gpt-4-1106-preview +``` + +## Daily Spend Breakdown API + +Retrieve granular daily usage data for a user (by model, provider, and API key) with a single endpoint. + +Example Request: + +```shell title="Daily Spend Breakdown API" showLineNumbers +curl -L -X GET 'http://localhost:4000/user/daily/activity?start_date=2025-03-20&end_date=2025-03-27' \ +-H 'Authorization: Bearer sk-...' +``` + +```json title="Daily Spend Breakdown API Response" showLineNumbers +{ + "results": [ + { + "date": "2025-03-27", + "metrics": { + "spend": 0.0177072, + "prompt_tokens": 111, + "completion_tokens": 1711, + "total_tokens": 1822, + "api_requests": 11 + }, + "breakdown": { + "models": { + "gpt-4o-mini": { + "spend": 1.095e-05, + "prompt_tokens": 37, + "completion_tokens": 9, + "total_tokens": 46, + "api_requests": 1 + }, + "providers": { "openai": { ... }, "azure_ai": { ... } }, + "api_keys": { "3126b6eaf1...": { ... } } + } + } + ], + "metadata": { + "total_spend": 0.7274667, + "total_prompt_tokens": 280990, + "total_completion_tokens": 376674, + "total_api_requests": 14 + } +} +``` + +### API Reference + +See our [Swagger API](https://litellm-api.up.railway.app/#/Budget%20%26%20Spend%20Tracking/get_user_daily_activity_user_daily_activity_get) for more details on the `/user/daily/activity` endpoint + +## ✨ (Enterprise) Generate Spend Reports + +Use this to charge other teams, customers, users Use the `/global/spend/report` endpoint to get spend reports @@ -470,105 +598,6 @@ curl -X GET 'http://localhost:4000/global/spend/report?start_date=2024-04-01&end -### Allowing Non-Proxy Admins to access `/spend` endpoints - -Use this when you want non-proxy admins to access `/spend` endpoints - -:::info - -Schedule a [meeting with us to get your Enterprise License](https://calendly.com/d/4mp-gd3-k5k/litellm-1-1-onboarding-chat) - -::: - -##### Create Key -Create Key with with `permissions={"get_spend_routes": true}` -```shell -curl --location 'http://0.0.0.0:4000/key/generate' \ - --header 'Authorization: Bearer sk-1234' \ - --header 'Content-Type: application/json' \ - --data '{ - "permissions": {"get_spend_routes": true} - }' -``` - -##### Use generated key on `/spend` endpoints - -Access spend Routes with newly generate keys -```shell -curl -X GET 'http://localhost:4000/global/spend/report?start_date=2024-04-01&end_date=2024-06-30' \ - -H 'Authorization: Bearer sk-H16BKvrSNConSsBYLGc_7A' -``` - - - -#### Reset Team, API Key Spend - MASTER KEY ONLY - -Use `/global/spend/reset` if you want to: -- Reset the Spend for all API Keys, Teams. The `spend` for ALL Teams and Keys in `LiteLLM_TeamTable` and `LiteLLM_VerificationToken` will be set to `spend=0` - -- LiteLLM will maintain all the logs in `LiteLLMSpendLogs` for Auditing Purposes - -##### Request -Only the `LITELLM_MASTER_KEY` you set can access this route -```shell -curl -X POST \ - 'http://localhost:4000/global/spend/reset' \ - -H 'Authorization: Bearer sk-1234' \ - -H 'Content-Type: application/json' -``` - -##### Expected Responses - -```shell -{"message":"Spend for all API Keys and Teams reset successfully","status":"success"} -``` - - - - -## Spend Tracking for Azure OpenAI Models - -Set base model for cost tracking azure image-gen call - -#### Image Generation - -```yaml -model_list: - - model_name: dall-e-3 - litellm_params: - model: azure/dall-e-3-test - api_version: 2023-06-01-preview - api_base: https://openai-gpt-4-test-v-1.openai.azure.com/ - api_key: os.environ/AZURE_API_KEY - base_model: dall-e-3 # 👈 set dall-e-3 as base model - model_info: - mode: image_generation -``` - -#### Chat Completions / Embeddings - -**Problem**: Azure returns `gpt-4` in the response when `azure/gpt-4-1106-preview` is used. This leads to inaccurate cost tracking - -**Solution** ✅ : Set `base_model` on your config so litellm uses the correct model for calculating azure cost - -Get the base model name from [here](https://github.com/BerriAI/litellm/blob/main/model_prices_and_context_window.json) - -Example config with `base_model` -```yaml -model_list: - - model_name: azure-gpt-3.5 - litellm_params: - model: azure/chatgpt-v-2 - api_base: os.environ/AZURE_API_BASE - api_key: os.environ/AZURE_API_KEY - api_version: "2023-07-01-preview" - model_info: - base_model: azure/gpt-4-1106-preview -``` - -## Custom Input/Output Pricing - -👉 Head to [Custom Input/Output Pricing](https://docs.litellm.ai/docs/proxy/custom_pricing) to setup custom pricing or your models ## ✨ Custom Spend Log metadata @@ -587,4 +616,5 @@ Logging specific key,value pairs in spend logs metadata is an enterprise feature Tracking spend with Custom tags is an enterprise feature. [See here](./enterprise.md#tracking-spend-for-custom-tags) -::: \ No newline at end of file +::: + diff --git a/docs/my-website/docs/proxy/guardrails/aim_security.md b/docs/my-website/docs/proxy/guardrails/aim_security.md index 3138dc7d12..d76c4e0c1c 100644 --- a/docs/my-website/docs/proxy/guardrails/aim_security.md +++ b/docs/my-website/docs/proxy/guardrails/aim_security.md @@ -140,7 +140,7 @@ The above request should not be blocked, and you should receive a regular LLM re -# Advanced +## Advanced Aim Guard provides user-specific Guardrail policies, enabling you to apply tailored policies to individual users. To utilize this feature, include the end-user's email in the request payload by setting the `x-aim-user-email` header of your request. diff --git a/docs/my-website/docs/proxy/prod.md b/docs/my-website/docs/proxy/prod.md index 1c1cbeedb4..2d09502d52 100644 --- a/docs/my-website/docs/proxy/prod.md +++ b/docs/my-website/docs/proxy/prod.md @@ -177,6 +177,50 @@ export LITELLM_SALT_KEY="sk-1234" [**See Code**](https://github.com/BerriAI/litellm/blob/036a6821d588bd36d170713dcf5a72791a694178/litellm/proxy/common_utils/encrypt_decrypt_utils.py#L15) + +## 9. Use `prisma migrate deploy` + +Use this to handle db migrations across LiteLLM versions in production + + + + +```bash +USE_PRISMA_MIGRATE="True" +``` + + + + + +```bash +litellm --use_prisma_migrate +``` + + + + +Benefits: + +The migrate deploy command: + +- **Does not** issue a warning if an already applied migration is missing from migration history +- **Does not** detect drift (production database schema differs from migration history end state - for example, due to a hotfix) +- **Does not** reset the database or generate artifacts (such as Prisma Client) +- **Does not** rely on a shadow database + + +### How does LiteLLM handle DB migrations in production? + +1. A new migration file is written to our `litellm-proxy-extras` package. [See all](https://github.com/BerriAI/litellm/tree/main/litellm-proxy-extras/litellm_proxy_extras/migrations) + +2. The core litellm pip package is bumped to point to the new `litellm-proxy-extras` package. This ensures, older versions of LiteLLM will continue to use the old migrations. [See code](https://github.com/BerriAI/litellm/blob/52b35cd8093b9ad833987b24f494586a1e923209/pyproject.toml#L58) + +3. When you upgrade to a new version of LiteLLM, the migration file is applied to the database. [See code](https://github.com/BerriAI/litellm/blob/52b35cd8093b9ad833987b24f494586a1e923209/litellm-proxy-extras/litellm_proxy_extras/utils.py#L42) + + + + ## Extras ### Expected Performance in Production diff --git a/docs/my-website/img/release_notes/new_activity_tab.png b/docs/my-website/img/release_notes/new_activity_tab.png new file mode 100644 index 0000000000..e8cea22a90 Binary files /dev/null and b/docs/my-website/img/release_notes/new_activity_tab.png differ diff --git a/docs/my-website/release_notes/v1.55.10/index.md b/docs/my-website/release_notes/v1.55.10/index.md index 7f9839c2b5..2b5ce75cf0 100644 --- a/docs/my-website/release_notes/v1.55.10/index.md +++ b/docs/my-website/release_notes/v1.55.10/index.md @@ -6,7 +6,7 @@ authors: - name: Krrish Dholakia title: CEO, LiteLLM url: https://www.linkedin.com/in/krish-d/ - image_url: https://media.licdn.com/dms/image/v2/D4D03AQGrlsJ3aqpHmQ/profile-displayphoto-shrink_400_400/B4DZSAzgP7HYAg-/0/1737327772964?e=1743638400&v=beta&t=39KOXMUFedvukiWWVPHf3qI45fuQD7lNglICwN31DrI + image_url: https://media.licdn.com/dms/image/v2/D4D03AQGrlsJ3aqpHmQ/profile-displayphoto-shrink_400_400/B4DZSAzgP7HYAg-/0/1737327772964?e=1749686400&v=beta&t=Hkl3U8Ps0VtvNxX0BNNq24b4dtX5wQaPFp6oiKCIHD8 - name: Ishaan Jaffer title: CTO, LiteLLM url: https://www.linkedin.com/in/reffajnaahsi/ diff --git a/docs/my-website/release_notes/v1.55.8-stable/index.md b/docs/my-website/release_notes/v1.55.8-stable/index.md index 7e82e94747..38c78eb537 100644 --- a/docs/my-website/release_notes/v1.55.8-stable/index.md +++ b/docs/my-website/release_notes/v1.55.8-stable/index.md @@ -6,7 +6,7 @@ authors: - name: Krrish Dholakia title: CEO, LiteLLM url: https://www.linkedin.com/in/krish-d/ - image_url: https://media.licdn.com/dms/image/v2/D4D03AQGrlsJ3aqpHmQ/profile-displayphoto-shrink_400_400/B4DZSAzgP7HYAg-/0/1737327772964?e=1743638400&v=beta&t=39KOXMUFedvukiWWVPHf3qI45fuQD7lNglICwN31DrI + image_url: https://media.licdn.com/dms/image/v2/D4D03AQGrlsJ3aqpHmQ/profile-displayphoto-shrink_400_400/B4DZSAzgP7HYAg-/0/1737327772964?e=1749686400&v=beta&t=Hkl3U8Ps0VtvNxX0BNNq24b4dtX5wQaPFp6oiKCIHD8 - name: Ishaan Jaffer title: CTO, LiteLLM url: https://www.linkedin.com/in/reffajnaahsi/ diff --git a/docs/my-website/release_notes/v1.56.1/index.md b/docs/my-website/release_notes/v1.56.1/index.md index 7c4ccc74ea..74f3606b90 100644 --- a/docs/my-website/release_notes/v1.56.1/index.md +++ b/docs/my-website/release_notes/v1.56.1/index.md @@ -6,7 +6,7 @@ authors: - name: Krrish Dholakia title: CEO, LiteLLM url: https://www.linkedin.com/in/krish-d/ - image_url: https://media.licdn.com/dms/image/v2/D4D03AQGrlsJ3aqpHmQ/profile-displayphoto-shrink_400_400/B4DZSAzgP7HYAg-/0/1737327772964?e=1743638400&v=beta&t=39KOXMUFedvukiWWVPHf3qI45fuQD7lNglICwN31DrI + image_url: https://media.licdn.com/dms/image/v2/D4D03AQGrlsJ3aqpHmQ/profile-displayphoto-shrink_400_400/B4DZSAzgP7HYAg-/0/1737327772964?e=1749686400&v=beta&t=Hkl3U8Ps0VtvNxX0BNNq24b4dtX5wQaPFp6oiKCIHD8 - name: Ishaan Jaffer title: CTO, LiteLLM url: https://www.linkedin.com/in/reffajnaahsi/ diff --git a/docs/my-website/release_notes/v1.56.3/index.md b/docs/my-website/release_notes/v1.56.3/index.md index 95205633ea..3d996ba5b8 100644 --- a/docs/my-website/release_notes/v1.56.3/index.md +++ b/docs/my-website/release_notes/v1.56.3/index.md @@ -6,7 +6,7 @@ authors: - name: Krrish Dholakia title: CEO, LiteLLM url: https://www.linkedin.com/in/krish-d/ - image_url: https://media.licdn.com/dms/image/v2/D4D03AQGrlsJ3aqpHmQ/profile-displayphoto-shrink_400_400/B4DZSAzgP7HYAg-/0/1737327772964?e=1743638400&v=beta&t=39KOXMUFedvukiWWVPHf3qI45fuQD7lNglICwN31DrI + image_url: https://media.licdn.com/dms/image/v2/D4D03AQGrlsJ3aqpHmQ/profile-displayphoto-shrink_400_400/B4DZSAzgP7HYAg-/0/1737327772964?e=1749686400&v=beta&t=Hkl3U8Ps0VtvNxX0BNNq24b4dtX5wQaPFp6oiKCIHD8 - name: Ishaan Jaffer title: CTO, LiteLLM url: https://www.linkedin.com/in/reffajnaahsi/ diff --git a/docs/my-website/release_notes/v1.56.4/index.md b/docs/my-website/release_notes/v1.56.4/index.md index 93f8725632..bf9cc2d94e 100644 --- a/docs/my-website/release_notes/v1.56.4/index.md +++ b/docs/my-website/release_notes/v1.56.4/index.md @@ -6,7 +6,7 @@ authors: - name: Krrish Dholakia title: CEO, LiteLLM url: https://www.linkedin.com/in/krish-d/ - image_url: https://media.licdn.com/dms/image/v2/D4D03AQGrlsJ3aqpHmQ/profile-displayphoto-shrink_400_400/B4DZSAzgP7HYAg-/0/1737327772964?e=1743638400&v=beta&t=39KOXMUFedvukiWWVPHf3qI45fuQD7lNglICwN31DrI + image_url: https://media.licdn.com/dms/image/v2/D4D03AQGrlsJ3aqpHmQ/profile-displayphoto-shrink_400_400/B4DZSAzgP7HYAg-/0/1737327772964?e=1749686400&v=beta&t=Hkl3U8Ps0VtvNxX0BNNq24b4dtX5wQaPFp6oiKCIHD8 - name: Ishaan Jaffer title: CTO, LiteLLM url: https://www.linkedin.com/in/reffajnaahsi/ diff --git a/docs/my-website/release_notes/v1.57.3/index.md b/docs/my-website/release_notes/v1.57.3/index.md index 3bee71a8e1..ab1154a0a8 100644 --- a/docs/my-website/release_notes/v1.57.3/index.md +++ b/docs/my-website/release_notes/v1.57.3/index.md @@ -6,7 +6,7 @@ authors: - name: Krrish Dholakia title: CEO, LiteLLM url: https://www.linkedin.com/in/krish-d/ - image_url: https://media.licdn.com/dms/image/v2/D4D03AQGrlsJ3aqpHmQ/profile-displayphoto-shrink_400_400/B4DZSAzgP7HYAg-/0/1737327772964?e=1743638400&v=beta&t=39KOXMUFedvukiWWVPHf3qI45fuQD7lNglICwN31DrI + image_url: https://media.licdn.com/dms/image/v2/D4D03AQGrlsJ3aqpHmQ/profile-displayphoto-shrink_400_400/B4DZSAzgP7HYAg-/0/1737327772964?e=1749686400&v=beta&t=Hkl3U8Ps0VtvNxX0BNNq24b4dtX5wQaPFp6oiKCIHD8 - name: Ishaan Jaffer title: CTO, LiteLLM url: https://www.linkedin.com/in/reffajnaahsi/ diff --git a/docs/my-website/release_notes/v1.57.7/index.md b/docs/my-website/release_notes/v1.57.7/index.md index ce987baf77..4da2402efa 100644 --- a/docs/my-website/release_notes/v1.57.7/index.md +++ b/docs/my-website/release_notes/v1.57.7/index.md @@ -6,7 +6,7 @@ authors: - name: Krrish Dholakia title: CEO, LiteLLM url: https://www.linkedin.com/in/krish-d/ - image_url: https://media.licdn.com/dms/image/v2/D4D03AQGrlsJ3aqpHmQ/profile-displayphoto-shrink_400_400/B4DZSAzgP7HYAg-/0/1737327772964?e=1743638400&v=beta&t=39KOXMUFedvukiWWVPHf3qI45fuQD7lNglICwN31DrI + image_url: https://media.licdn.com/dms/image/v2/D4D03AQGrlsJ3aqpHmQ/profile-displayphoto-shrink_400_400/B4DZSAzgP7HYAg-/0/1737327772964?e=1749686400&v=beta&t=Hkl3U8Ps0VtvNxX0BNNq24b4dtX5wQaPFp6oiKCIHD8 - name: Ishaan Jaffer title: CTO, LiteLLM url: https://www.linkedin.com/in/reffajnaahsi/ diff --git a/docs/my-website/release_notes/v1.57.8-stable/index.md b/docs/my-website/release_notes/v1.57.8-stable/index.md index d37a7b9ff8..56eb09a20f 100644 --- a/docs/my-website/release_notes/v1.57.8-stable/index.md +++ b/docs/my-website/release_notes/v1.57.8-stable/index.md @@ -6,7 +6,7 @@ authors: - name: Krrish Dholakia title: CEO, LiteLLM url: https://www.linkedin.com/in/krish-d/ - image_url: https://media.licdn.com/dms/image/v2/D4D03AQGrlsJ3aqpHmQ/profile-displayphoto-shrink_400_400/B4DZSAzgP7HYAg-/0/1737327772964?e=1743638400&v=beta&t=39KOXMUFedvukiWWVPHf3qI45fuQD7lNglICwN31DrI + image_url: https://media.licdn.com/dms/image/v2/D4D03AQGrlsJ3aqpHmQ/profile-displayphoto-shrink_400_400/B4DZSAzgP7HYAg-/0/1737327772964?e=1749686400&v=beta&t=Hkl3U8Ps0VtvNxX0BNNq24b4dtX5wQaPFp6oiKCIHD8 - name: Ishaan Jaffer title: CTO, LiteLLM url: https://www.linkedin.com/in/reffajnaahsi/ diff --git a/docs/my-website/release_notes/v1.59.0/index.md b/docs/my-website/release_notes/v1.59.0/index.md index 5343ba49ad..2699e42020 100644 --- a/docs/my-website/release_notes/v1.59.0/index.md +++ b/docs/my-website/release_notes/v1.59.0/index.md @@ -6,7 +6,7 @@ authors: - name: Krrish Dholakia title: CEO, LiteLLM url: https://www.linkedin.com/in/krish-d/ - image_url: https://media.licdn.com/dms/image/v2/D4D03AQGrlsJ3aqpHmQ/profile-displayphoto-shrink_400_400/B4DZSAzgP7HYAg-/0/1737327772964?e=1743638400&v=beta&t=39KOXMUFedvukiWWVPHf3qI45fuQD7lNglICwN31DrI + image_url: https://media.licdn.com/dms/image/v2/D4D03AQGrlsJ3aqpHmQ/profile-displayphoto-shrink_400_400/B4DZSAzgP7HYAg-/0/1737327772964?e=1749686400&v=beta&t=Hkl3U8Ps0VtvNxX0BNNq24b4dtX5wQaPFp6oiKCIHD8 - name: Ishaan Jaffer title: CTO, LiteLLM url: https://www.linkedin.com/in/reffajnaahsi/ diff --git a/docs/my-website/release_notes/v1.59.8-stable/index.md b/docs/my-website/release_notes/v1.59.8-stable/index.md index fa9825fb66..023f284ad5 100644 --- a/docs/my-website/release_notes/v1.59.8-stable/index.md +++ b/docs/my-website/release_notes/v1.59.8-stable/index.md @@ -6,7 +6,7 @@ authors: - name: Krrish Dholakia title: CEO, LiteLLM url: https://www.linkedin.com/in/krish-d/ - image_url: https://media.licdn.com/dms/image/v2/D4D03AQGrlsJ3aqpHmQ/profile-displayphoto-shrink_400_400/B4DZSAzgP7HYAg-/0/1737327772964?e=1743638400&v=beta&t=39KOXMUFedvukiWWVPHf3qI45fuQD7lNglICwN31DrI + image_url: https://media.licdn.com/dms/image/v2/D4D03AQGrlsJ3aqpHmQ/profile-displayphoto-shrink_400_400/B4DZSAzgP7HYAg-/0/1737327772964?e=1749686400&v=beta&t=Hkl3U8Ps0VtvNxX0BNNq24b4dtX5wQaPFp6oiKCIHD8 - name: Ishaan Jaffer title: CTO, LiteLLM url: https://www.linkedin.com/in/reffajnaahsi/ diff --git a/docs/my-website/release_notes/v1.61.20-stable/index.md b/docs/my-website/release_notes/v1.61.20-stable/index.md index 132c1aa318..5012e2aa90 100644 --- a/docs/my-website/release_notes/v1.61.20-stable/index.md +++ b/docs/my-website/release_notes/v1.61.20-stable/index.md @@ -6,7 +6,7 @@ authors: - name: Krrish Dholakia title: CEO, LiteLLM url: https://www.linkedin.com/in/krish-d/ - image_url: https://media.licdn.com/dms/image/v2/D4D03AQGrlsJ3aqpHmQ/profile-displayphoto-shrink_400_400/B4DZSAzgP7HYAg-/0/1737327772964?e=1743638400&v=beta&t=39KOXMUFedvukiWWVPHf3qI45fuQD7lNglICwN31DrI + image_url: https://media.licdn.com/dms/image/v2/D4D03AQGrlsJ3aqpHmQ/profile-displayphoto-shrink_400_400/B4DZSAzgP7HYAg-/0/1737327772964?e=1749686400&v=beta&t=Hkl3U8Ps0VtvNxX0BNNq24b4dtX5wQaPFp6oiKCIHD8 - name: Ishaan Jaffer title: CTO, LiteLLM url: https://www.linkedin.com/in/reffajnaahsi/ diff --git a/docs/my-website/release_notes/v1.63.0/index.md b/docs/my-website/release_notes/v1.63.0/index.md index e74a2f9b86..ab74b11b4d 100644 --- a/docs/my-website/release_notes/v1.63.0/index.md +++ b/docs/my-website/release_notes/v1.63.0/index.md @@ -6,7 +6,7 @@ authors: - name: Krrish Dholakia title: CEO, LiteLLM url: https://www.linkedin.com/in/krish-d/ - image_url: https://media.licdn.com/dms/image/v2/D4D03AQGrlsJ3aqpHmQ/profile-displayphoto-shrink_400_400/B4DZSAzgP7HYAg-/0/1737327772964?e=1743638400&v=beta&t=39KOXMUFedvukiWWVPHf3qI45fuQD7lNglICwN31DrI + image_url: https://media.licdn.com/dms/image/v2/D4D03AQGrlsJ3aqpHmQ/profile-displayphoto-shrink_400_400/B4DZSAzgP7HYAg-/0/1737327772964?e=1749686400&v=beta&t=Hkl3U8Ps0VtvNxX0BNNq24b4dtX5wQaPFp6oiKCIHD8 - name: Ishaan Jaffer title: CTO, LiteLLM url: https://www.linkedin.com/in/reffajnaahsi/ diff --git a/docs/my-website/release_notes/v1.63.11-stable/index.md b/docs/my-website/release_notes/v1.63.11-stable/index.md index 91336fc681..882747a07b 100644 --- a/docs/my-website/release_notes/v1.63.11-stable/index.md +++ b/docs/my-website/release_notes/v1.63.11-stable/index.md @@ -6,7 +6,7 @@ authors: - name: Krrish Dholakia title: CEO, LiteLLM url: https://www.linkedin.com/in/krish-d/ - image_url: https://media.licdn.com/dms/image/v2/D4D03AQGrlsJ3aqpHmQ/profile-displayphoto-shrink_400_400/B4DZSAzgP7HYAg-/0/1737327772964?e=1743638400&v=beta&t=39KOXMUFedvukiWWVPHf3qI45fuQD7lNglICwN31DrI + image_url: https://media.licdn.com/dms/image/v2/D4D03AQGrlsJ3aqpHmQ/profile-displayphoto-shrink_400_400/B4DZSAzgP7HYAg-/0/1737327772964?e=1749686400&v=beta&t=Hkl3U8Ps0VtvNxX0BNNq24b4dtX5wQaPFp6oiKCIHD8 - name: Ishaan Jaffer title: CTO, LiteLLM url: https://www.linkedin.com/in/reffajnaahsi/ diff --git a/docs/my-website/release_notes/v1.63.14/index.md b/docs/my-website/release_notes/v1.63.14/index.md index aaeac639d4..ff2630468c 100644 --- a/docs/my-website/release_notes/v1.63.14/index.md +++ b/docs/my-website/release_notes/v1.63.14/index.md @@ -6,7 +6,7 @@ authors: - name: Krrish Dholakia title: CEO, LiteLLM url: https://www.linkedin.com/in/krish-d/ - image_url: https://media.licdn.com/dms/image/v2/D4D03AQGrlsJ3aqpHmQ/profile-displayphoto-shrink_400_400/B4DZSAzgP7HYAg-/0/1737327772964?e=1743638400&v=beta&t=39KOXMUFedvukiWWVPHf3qI45fuQD7lNglICwN31DrI + image_url: https://media.licdn.com/dms/image/v2/D4D03AQGrlsJ3aqpHmQ/profile-displayphoto-shrink_400_400/B4DZSAzgP7HYAg-/0/1737327772964?e=1749686400&v=beta&t=Hkl3U8Ps0VtvNxX0BNNq24b4dtX5wQaPFp6oiKCIHD8 - name: Ishaan Jaffer title: CTO, LiteLLM url: https://www.linkedin.com/in/reffajnaahsi/ diff --git a/docs/my-website/release_notes/v1.63.2-stable/index.md b/docs/my-website/release_notes/v1.63.2-stable/index.md index 0c359452dc..3d47e02ac1 100644 --- a/docs/my-website/release_notes/v1.63.2-stable/index.md +++ b/docs/my-website/release_notes/v1.63.2-stable/index.md @@ -6,7 +6,7 @@ authors: - name: Krrish Dholakia title: CEO, LiteLLM url: https://www.linkedin.com/in/krish-d/ - image_url: https://media.licdn.com/dms/image/v2/D4D03AQGrlsJ3aqpHmQ/profile-displayphoto-shrink_400_400/B4DZSAzgP7HYAg-/0/1737327772964?e=1743638400&v=beta&t=39KOXMUFedvukiWWVPHf3qI45fuQD7lNglICwN31DrI + image_url: https://media.licdn.com/dms/image/v2/D4D03AQGrlsJ3aqpHmQ/profile-displayphoto-shrink_400_400/B4DZSAzgP7HYAg-/0/1737327772964?e=1749686400&v=beta&t=Hkl3U8Ps0VtvNxX0BNNq24b4dtX5wQaPFp6oiKCIHD8 - name: Ishaan Jaffer title: CTO, LiteLLM url: https://www.linkedin.com/in/reffajnaahsi/ diff --git a/docs/my-website/release_notes/v1.65.0-stable/index.md b/docs/my-website/release_notes/v1.65.0-stable/index.md index ec5f3bd441..3696f5023c 100644 --- a/docs/my-website/release_notes/v1.65.0-stable/index.md +++ b/docs/my-website/release_notes/v1.65.0-stable/index.md @@ -6,7 +6,7 @@ authors: - name: Krrish Dholakia title: CEO, LiteLLM url: https://www.linkedin.com/in/krish-d/ - image_url: https://media.licdn.com/dms/image/v2/D4D03AQGrlsJ3aqpHmQ/profile-displayphoto-shrink_400_400/B4DZSAzgP7HYAg-/0/1737327772964?e=1743638400&v=beta&t=39KOXMUFedvukiWWVPHf3qI45fuQD7lNglICwN31DrI + image_url: https://media.licdn.com/dms/image/v2/D4D03AQGrlsJ3aqpHmQ/profile-displayphoto-shrink_400_400/B4DZSAzgP7HYAg-/0/1737327772964?e=1749686400&v=beta&t=Hkl3U8Ps0VtvNxX0BNNq24b4dtX5wQaPFp6oiKCIHD8 - name: Ishaan Jaffer title: CTO, LiteLLM url: https://www.linkedin.com/in/reffajnaahsi/ diff --git a/docs/my-website/release_notes/v1.65.0/index.md b/docs/my-website/release_notes/v1.65.0/index.md index 46525ea55f..84276c997d 100644 --- a/docs/my-website/release_notes/v1.65.0/index.md +++ b/docs/my-website/release_notes/v1.65.0/index.md @@ -6,7 +6,7 @@ authors: - name: Krrish Dholakia title: CEO, LiteLLM url: https://www.linkedin.com/in/krish-d/ - image_url: https://media.licdn.com/dms/image/v2/D4D03AQGrlsJ3aqpHmQ/profile-displayphoto-shrink_400_400/B4DZSAzgP7HYAg-/0/1737327772964?e=1743638400&v=beta&t=39KOXMUFedvukiWWVPHf3qI45fuQD7lNglICwN31DrI + image_url: https://media.licdn.com/dms/image/v2/D4D03AQGrlsJ3aqpHmQ/profile-displayphoto-shrink_400_400/B4DZSAzgP7HYAg-/0/1737327772964?e=1749686400&v=beta&t=Hkl3U8Ps0VtvNxX0BNNq24b4dtX5wQaPFp6oiKCIHD8 - name: Ishaan Jaffer title: CTO, LiteLLM url: https://www.linkedin.com/in/reffajnaahsi/ diff --git a/docs/my-website/release_notes/v1.65.4-stable/index.md b/docs/my-website/release_notes/v1.65.4-stable/index.md index 27921c08f0..b219c5e1f3 100644 --- a/docs/my-website/release_notes/v1.65.4-stable/index.md +++ b/docs/my-website/release_notes/v1.65.4-stable/index.md @@ -6,7 +6,7 @@ authors: - name: Krrish Dholakia title: CEO, LiteLLM url: https://www.linkedin.com/in/krish-d/ - image_url: https://media.licdn.com/dms/image/v2/D4D03AQGrlsJ3aqpHmQ/profile-displayphoto-shrink_400_400/B4DZSAzgP7HYAg-/0/1737327772964?e=1743638400&v=beta&t=39KOXMUFedvukiWWVPHf3qI45fuQD7lNglICwN31DrI + image_url: https://media.licdn.com/dms/image/v2/D4D03AQGrlsJ3aqpHmQ/profile-displayphoto-shrink_400_400/B4DZSAzgP7HYAg-/0/1737327772964?e=1749686400&v=beta&t=Hkl3U8Ps0VtvNxX0BNNq24b4dtX5wQaPFp6oiKCIHD8 - name: Ishaan Jaffer title: CTO, LiteLLM url: https://www.linkedin.com/in/reffajnaahsi/ @@ -39,4 +39,102 @@ ghcr.io/berriai/litellm:main-v1.65.4-stable pip install litellm==1.65.4.post1 ``` - \ No newline at end of file + + +## New Models / Updated Models +1. Databricks - claude-3-7-sonnet cost tracking [PR](https://github.com/BerriAI/litellm/blob/52b35cd8093b9ad833987b24f494586a1e923209/model_prices_and_context_window.json#L10350) +2. VertexAI - `gemini-2.5-pro-exp-03-25` cost tracking [PR](https://github.com/BerriAI/litellm/blob/52b35cd8093b9ad833987b24f494586a1e923209/model_prices_and_context_window.json#L4492) +3. VertexAI - `gemini-2.0-flash` cost tracking [PR](https://github.com/BerriAI/litellm/blob/52b35cd8093b9ad833987b24f494586a1e923209/model_prices_and_context_window.json#L4689) +4. Groq - add whisper ASR models to model cost map [PR](https://github.com/BerriAI/litellm/blob/52b35cd8093b9ad833987b24f494586a1e923209/model_prices_and_context_window.json#L3324) +5. IBM - Add watsonx/ibm/granite-3-8b-instruct to model cost map [PR](https://github.com/BerriAI/litellm/blob/52b35cd8093b9ad833987b24f494586a1e923209/model_prices_and_context_window.json#L91) +6. Google AI Studio - add gemini/gemini-2.5-pro-preview-03-25 to model cost map [PR](https://github.com/BerriAI/litellm/blob/52b35cd8093b9ad833987b24f494586a1e923209/model_prices_and_context_window.json#L4850) + +## LLM Translation +1. Vertex AI - Support anyOf param for OpenAI json schema translation [Get Started](https://docs.litellm.ai/docs/providers/vertex#json-schema) +2. Anthropic- response_format + thinking param support (works across Anthropic API, Bedrock, Vertex) [Get Started](https://docs.litellm.ai/docs/reasoning_content) +3. Anthropic - if thinking token is specified and max tokens is not - ensure max token to anthropic is higher than thinking tokens (works across Anthropic API, Bedrock, Vertex) [PR](https://github.com/BerriAI/litellm/pull/9594) +4. Bedrock - latency optimized inference support [Get Started](https://docs.litellm.ai/docs/providers/bedrock#usage---latency-optimized-inference) +5. Sagemaker - handle special tokens + multibyte character code in response [Get Started](https://docs.litellm.ai/docs/providers/aws_sagemaker) +6. MCP - add support for using SSE MCP servers [Get Started](https://docs.litellm.ai/docs/mcp#usage) +8. Anthropic - new `litellm.messages.create` interface for calling Anthropic `/v1/messages` via passthrough [Get Started](https://docs.litellm.ai/docs/anthropic_unified#usage) +11. Anthropic - support ‘file’ content type in message param (works across Anthropic API, Bedrock, Vertex) [Get Started](https://docs.litellm.ai/docs/providers/anthropic#usage---pdf) +12. Anthropic - map openai 'reasoning_effort' to anthropic 'thinking' param (works across Anthropic API, Bedrock, Vertex) [Get Started](https://docs.litellm.ai/docs/providers/anthropic#usage---thinking--reasoning_content) +13. Google AI Studio (Gemini) - [BETA] `/v1/files` upload support [Get Started](../../docs/providers/google_ai_studio/files) +14. Azure - fix o-series tool calling [Get Started](../../docs/providers/azure#tool-calling--function-calling) +15. Unified file id - [ALPHA] allow calling multiple providers with same file id [PR](https://github.com/BerriAI/litellm/pull/9718) + - This is experimental, and not recommended for production use. + - We plan to have a production-ready implementation by next week. +16. Google AI Studio (Gemini) - return logprobs [PR](https://github.com/BerriAI/litellm/pull/9713) +17. Anthropic - Support prompt caching for Anthropic tool calls [Get Started](https://docs.litellm.ai/docs/completion/prompt_caching) +18. OpenRouter - unwrap extra body on open router calls [PR](https://github.com/BerriAI/litellm/pull/9747) +19. VertexAI - fix credential caching issue [PR](https://github.com/BerriAI/litellm/pull/9756) +20. XAI - filter out 'name' param for XAI [PR](https://github.com/BerriAI/litellm/pull/9761) +21. Gemini - image generation output support [Get Started](../../docs/providers/gemini#image-generation) +22. Databricks - support claude-3-7-sonnet w/ thinking + response_format [Get Started](../../docs/providers/databricks#usage---thinking--reasoning_content) + +## Spend Tracking Improvements +1. Reliability fix - Check sent and received model for cost calculation [PR](https://github.com/BerriAI/litellm/pull/9669) +2. Vertex AI - Multimodal embedding cost tracking [Get Started](https://docs.litellm.ai/docs/providers/vertex#multi-modal-embeddings), [PR](https://github.com/BerriAI/litellm/pull/9623) + +## Management Endpoints / UI + + + +1. New Usage Tab + - Report 'total_tokens' + report success/failure calls + - Remove double bars on scroll + - Ensure ‘daily spend’ chart ordered from earliest to latest date + - showing spend per model per day + - show key alias on usage tab + - Allow non-admins to view their activity + - Add date picker to new usage tab +2. Virtual Keys Tab + - remove 'default key' on user signup + - fix showing user models available for personal key creation +3. Test Key Tab + - Allow testing image generation models +4. Models Tab + - Fix bulk adding models + - support reusable credentials for passthrough endpoints + - Allow team members to see team models +5. Teams Tab + - Fix json serialization error on update team metadata +6. Request Logs Tab + - Add reasoning_content token tracking across all providers on streaming +7. API + - return key alias on /user/daily/activity [Get Started](../../docs/proxy/cost_tracking#daily-spend-breakdown-api) +8. SSO + - Allow assigning SSO users to teams on MSFT SSO [PR](https://github.com/BerriAI/litellm/pull/9745) + +## Logging / Guardrail Integrations + +1. Console Logs - Add json formatting for uncaught exceptions [PR](https://github.com/BerriAI/litellm/pull/9619) +2. Guardrails - AIM Guardrails support for virtual key based policies [Get Started](../../docs/proxy/guardrails/aim_security) +3. Logging - fix completion start time tracking [PR](https://github.com/BerriAI/litellm/pull/9688) +4. Prometheus + - Allow adding authentication on Prometheus /metrics endpoints [PR](https://github.com/BerriAI/litellm/pull/9766) + - Distinguish LLM Provider Exception vs. LiteLLM Exception in metric naming [PR](https://github.com/BerriAI/litellm/pull/9760) + - Emit operational metrics for new DB Transaction architecture [PR](https://github.com/BerriAI/litellm/pull/9719) + +## Performance / Loadbalancing / Reliability improvements +1. Preventing Deadlocks + - Reduce DB Deadlocks by storing spend updates in Redis and then committing to DB [PR](https://github.com/BerriAI/litellm/pull/9608) + - Ensure no deadlocks occur when updating DailyUserSpendTransaction [PR](https://github.com/BerriAI/litellm/pull/9690) + - High Traffic fix - ensure new DB + Redis architecture accurately tracks spend [PR](https://github.com/BerriAI/litellm/pull/9673) + - Use Redis for PodLock Manager instead of PG (ensures no deadlocks occur) [PR](https://github.com/BerriAI/litellm/pull/9715) + - v2 DB Deadlock Reduction Architecture – Add Max Size for In-Memory Queue + Backpressure Mechanism [PR](https://github.com/BerriAI/litellm/pull/9759) + +2. Prisma Migrations [Get Started](../../docs/proxy/prod#9-use-prisma-migrate-deploy) + - connects litellm proxy to litellm's prisma migration files + - Handle db schema updates from new `litellm-proxy-extras` sdk +3. Redis - support password for sync sentinel clients [PR](https://github.com/BerriAI/litellm/pull/9622) +4. Fix "Circular reference detected" error when max_parallel_requests = 0 [PR](https://github.com/BerriAI/litellm/pull/9671) +5. Code QA - Ban hardcoded numbers [PR](https://github.com/BerriAI/litellm/pull/9709) + +## Helm +1. fix: wrong indentation of ttlSecondsAfterFinished in chart [PR](https://github.com/BerriAI/litellm/pull/9611) + +## General Proxy Improvements +1. Fix - only apply service_account_settings.enforced_params on service accounts [PR](https://github.com/BerriAI/litellm/pull/9683) +2. Fix - handle metadata null on `/chat/completion` [PR](https://github.com/BerriAI/litellm/issues/9717) +3. Fix - Move daily user transaction logging outside of 'disable_spend_logs' flag, as they’re unrelated [PR](https://github.com/BerriAI/litellm/pull/9772) diff --git a/docs/my-website/sidebars.js b/docs/my-website/sidebars.js index 1cb3e02707..b8591cb993 100644 --- a/docs/my-website/sidebars.js +++ b/docs/my-website/sidebars.js @@ -188,7 +188,15 @@ const sidebars = { "providers/azure_ai", "providers/aiml", "providers/vertex", - "providers/gemini", + + { + type: "category", + label: "Google AI Studio", + items: [ + "providers/gemini", + "providers/google_ai_studio/files", + ] + }, "providers/anthropic", "providers/aws_sagemaker", "providers/bedrock", diff --git a/litellm/proxy/_experimental/out/onboarding.html b/litellm/proxy/_experimental/out/onboarding.html deleted file mode 100644 index 31aa80b7d4..0000000000 --- a/litellm/proxy/_experimental/out/onboarding.html +++ /dev/null @@ -1 +0,0 @@ -LiteLLM Dashboard \ No newline at end of file diff --git a/litellm/proxy/_new_secret_config.yaml b/litellm/proxy/_new_secret_config.yaml index 8595b46ddc..a3131f563c 100644 --- a/litellm/proxy/_new_secret_config.yaml +++ b/litellm/proxy/_new_secret_config.yaml @@ -29,6 +29,10 @@ model_list: model: databricks/databricks-claude-3-7-sonnet api_key: os.environ/DATABRICKS_API_KEY api_base: os.environ/DATABRICKS_API_BASE + - model_name: "gemini/gemini-2.0-flash" + litellm_params: + model: gemini/gemini-2.0-flash + api_key: os.environ/GEMINI_API_KEY litellm_settings: num_retries: 0