Merge branch 'main' into litellm_bedrock_converse_api

This commit is contained in:
Krish Dholakia 2024-06-07 08:49:52 -07:00 committed by GitHub
commit 26993c067e
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
82 changed files with 2540 additions and 1147 deletions

View file

@ -344,4 +344,4 @@ workflows:
filters: filters:
branches: branches:
only: only:
- main - main

View file

@ -225,37 +225,37 @@ curl 'http://0.0.0.0:4000/key/generate' \
## Supported Providers ([Docs](https://docs.litellm.ai/docs/providers)) ## Supported Providers ([Docs](https://docs.litellm.ai/docs/providers))
| Provider | [Completion](https://docs.litellm.ai/docs/#basic-usage) | [Streaming](https://docs.litellm.ai/docs/completion/stream#streaming-responses) | [Async Completion](https://docs.litellm.ai/docs/completion/stream#async-completion) | [Async Streaming](https://docs.litellm.ai/docs/completion/stream#async-streaming) | [Async Embedding](https://docs.litellm.ai/docs/embedding/supported_embedding) | [Async Image Generation](https://docs.litellm.ai/docs/image_generation) | | Provider | [Completion](https://docs.litellm.ai/docs/#basic-usage) | [Streaming](https://docs.litellm.ai/docs/completion/stream#streaming-responses) | [Async Completion](https://docs.litellm.ai/docs/completion/stream#async-completion) | [Async Streaming](https://docs.litellm.ai/docs/completion/stream#async-streaming) | [Async Embedding](https://docs.litellm.ai/docs/embedding/supported_embedding) | [Async Image Generation](https://docs.litellm.ai/docs/image_generation) |
| ----------------------------------------------------------------------------------- | ------------------------------------------------------- | ------------------------------------------------------------------------------- | ----------------------------------------------------------------------------------- | --------------------------------------------------------------------------------- | ----------------------------------------------------------------------------- | ----------------------------------------------------------------------- | |-------------------------------------------------------------------------------------|---------------------------------------------------------|---------------------------------------------------------------------------------|-------------------------------------------------------------------------------------|-----------------------------------------------------------------------------------|-------------------------------------------------------------------------------|-------------------------------------------------------------------------|
| [openai](https://docs.litellm.ai/docs/providers/openai) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | | [openai](https://docs.litellm.ai/docs/providers/openai) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ |
| [azure](https://docs.litellm.ai/docs/providers/azure) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | | [azure](https://docs.litellm.ai/docs/providers/azure) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ |
| [aws - sagemaker](https://docs.litellm.ai/docs/providers/aws_sagemaker) | ✅ | ✅ | ✅ | ✅ | ✅ | | [aws - sagemaker](https://docs.litellm.ai/docs/providers/aws_sagemaker) | ✅ | ✅ | ✅ | ✅ | ✅ | |
| [aws - bedrock](https://docs.litellm.ai/docs/providers/bedrock) | ✅ | ✅ | ✅ | ✅ | ✅ | | [aws - bedrock](https://docs.litellm.ai/docs/providers/bedrock) | ✅ | ✅ | ✅ | ✅ | ✅ | |
| [google - vertex_ai](https://docs.litellm.ai/docs/providers/vertex) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | [google - vertex_ai](https://docs.litellm.ai/docs/providers/vertex) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ |
| [google - palm](https://docs.litellm.ai/docs/providers/palm) | ✅ | ✅ | ✅ | ✅ | | [google - palm](https://docs.litellm.ai/docs/providers/palm) | ✅ | ✅ | ✅ | ✅ | | |
| [google AI Studio - gemini](https://docs.litellm.ai/docs/providers/gemini) | ✅ | ✅ | ✅ | ✅ | | | [google AI Studio - gemini](https://docs.litellm.ai/docs/providers/gemini) | ✅ | ✅ | ✅ | | | |
| [mistral ai api](https://docs.litellm.ai/docs/providers/mistral) | ✅ | ✅ | ✅ | ✅ | ✅ | | [mistral ai api](https://docs.litellm.ai/docs/providers/mistral) | ✅ | ✅ | ✅ | ✅ | ✅ | |
| [cloudflare AI Workers](https://docs.litellm.ai/docs/providers/cloudflare_workers) | ✅ | ✅ | ✅ | ✅ | | [cloudflare AI Workers](https://docs.litellm.ai/docs/providers/cloudflare_workers) | ✅ | ✅ | ✅ | ✅ | | |
| [cohere](https://docs.litellm.ai/docs/providers/cohere) | ✅ | ✅ | ✅ | ✅ | ✅ | | [cohere](https://docs.litellm.ai/docs/providers/cohere) | ✅ | ✅ | ✅ | ✅ | ✅ | |
| [anthropic](https://docs.litellm.ai/docs/providers/anthropic) | ✅ | ✅ | ✅ | ✅ | | [anthropic](https://docs.litellm.ai/docs/providers/anthropic) | ✅ | ✅ | ✅ | ✅ | | |
| [huggingface](https://docs.litellm.ai/docs/providers/huggingface) | ✅ | ✅ | ✅ | ✅ | ✅ | | [huggingface](https://docs.litellm.ai/docs/providers/huggingface) | ✅ | ✅ | ✅ | ✅ | ✅ | |
| [replicate](https://docs.litellm.ai/docs/providers/replicate) | ✅ | ✅ | ✅ | ✅ | | [replicate](https://docs.litellm.ai/docs/providers/replicate) | ✅ | ✅ | ✅ | ✅ | | |
| [together_ai](https://docs.litellm.ai/docs/providers/togetherai) | ✅ | ✅ | ✅ | ✅ | | [together_ai](https://docs.litellm.ai/docs/providers/togetherai) | ✅ | ✅ | ✅ | ✅ | | |
| [openrouter](https://docs.litellm.ai/docs/providers/openrouter) | ✅ | ✅ | ✅ | ✅ | | [openrouter](https://docs.litellm.ai/docs/providers/openrouter) | ✅ | ✅ | ✅ | ✅ | | |
| [ai21](https://docs.litellm.ai/docs/providers/ai21) | ✅ | ✅ | ✅ | ✅ | | [ai21](https://docs.litellm.ai/docs/providers/ai21) | ✅ | ✅ | ✅ | ✅ | | |
| [baseten](https://docs.litellm.ai/docs/providers/baseten) | ✅ | ✅ | ✅ | ✅ | | [baseten](https://docs.litellm.ai/docs/providers/baseten) | ✅ | ✅ | ✅ | ✅ | | |
| [vllm](https://docs.litellm.ai/docs/providers/vllm) | ✅ | ✅ | ✅ | ✅ | | [vllm](https://docs.litellm.ai/docs/providers/vllm) | ✅ | ✅ | ✅ | ✅ | | |
| [nlp_cloud](https://docs.litellm.ai/docs/providers/nlp_cloud) | ✅ | ✅ | ✅ | ✅ | | [nlp_cloud](https://docs.litellm.ai/docs/providers/nlp_cloud) | ✅ | ✅ | ✅ | ✅ | | |
| [aleph alpha](https://docs.litellm.ai/docs/providers/aleph_alpha) | ✅ | ✅ | ✅ | ✅ | | [aleph alpha](https://docs.litellm.ai/docs/providers/aleph_alpha) | ✅ | ✅ | ✅ | ✅ | | |
| [petals](https://docs.litellm.ai/docs/providers/petals) | ✅ | ✅ | ✅ | ✅ | | [petals](https://docs.litellm.ai/docs/providers/petals) | ✅ | ✅ | ✅ | ✅ | | |
| [ollama](https://docs.litellm.ai/docs/providers/ollama) | ✅ | ✅ | ✅ | ✅ | ✅ | | [ollama](https://docs.litellm.ai/docs/providers/ollama) | ✅ | ✅ | ✅ | ✅ | ✅ | |
| [deepinfra](https://docs.litellm.ai/docs/providers/deepinfra) | ✅ | ✅ | ✅ | ✅ | | [deepinfra](https://docs.litellm.ai/docs/providers/deepinfra) | ✅ | ✅ | ✅ | ✅ | | |
| [perplexity-ai](https://docs.litellm.ai/docs/providers/perplexity) | ✅ | ✅ | ✅ | ✅ | | [perplexity-ai](https://docs.litellm.ai/docs/providers/perplexity) | ✅ | ✅ | ✅ | ✅ | | |
| [Groq AI](https://docs.litellm.ai/docs/providers/groq) | ✅ | ✅ | ✅ | ✅ | | [Groq AI](https://docs.litellm.ai/docs/providers/groq) | ✅ | ✅ | ✅ | ✅ | | |
| [Deepseek](https://docs.litellm.ai/docs/providers/deepseek) | ✅ | ✅ | ✅ | ✅ | | [Deepseek](https://docs.litellm.ai/docs/providers/deepseek) | ✅ | ✅ | ✅ | ✅ | | |
| [anyscale](https://docs.litellm.ai/docs/providers/anyscale) | ✅ | ✅ | ✅ | ✅ | | [anyscale](https://docs.litellm.ai/docs/providers/anyscale) | ✅ | ✅ | ✅ | ✅ | | |
| [IBM - watsonx.ai](https://docs.litellm.ai/docs/providers/watsonx) | ✅ | ✅ | ✅ | ✅ | ✅ | [IBM - watsonx.ai](https://docs.litellm.ai/docs/providers/watsonx) | ✅ | ✅ | ✅ | ✅ | ✅ | |
| [voyage ai](https://docs.litellm.ai/docs/providers/voyage) | | | | | ✅ | | [voyage ai](https://docs.litellm.ai/docs/providers/voyage) | | | | | ✅ | |
| [xinference [Xorbits Inference]](https://docs.litellm.ai/docs/providers/xinference) | | | | | ✅ | | [xinference [Xorbits Inference]](https://docs.litellm.ai/docs/providers/xinference) | | | | | ✅ | |
[**Read the Docs**](https://docs.litellm.ai/docs/) [**Read the Docs**](https://docs.litellm.ai/docs/)

View file

@ -10,6 +10,7 @@ For companies that need SSO, user management and professional support for LiteLL
This covers: This covers:
- ✅ **Features under the [LiteLLM Commercial License (Content Mod, Custom Tags, etc.)](https://docs.litellm.ai/docs/proxy/enterprise)** - ✅ **Features under the [LiteLLM Commercial License (Content Mod, Custom Tags, etc.)](https://docs.litellm.ai/docs/proxy/enterprise)**
- ✅ [**Secure UI access with Single Sign-On**](../docs/proxy/ui.md#setup-ssoauth-for-ui) - ✅ [**Secure UI access with Single Sign-On**](../docs/proxy/ui.md#setup-ssoauth-for-ui)
- ✅ [**Audit Logs with retention policy**](../docs/proxy/enterprise.md#audit-logs)
- ✅ [**JWT-Auth**](../docs/proxy/token_auth.md) - ✅ [**JWT-Auth**](../docs/proxy/token_auth.md)
- ✅ [**Prompt Injection Detection**](#prompt-injection-detection-lakeraai) - ✅ [**Prompt Injection Detection**](#prompt-injection-detection-lakeraai)
- ✅ [**Invite Team Members to access `/spend` Routes**](../docs/proxy/cost_tracking#allowing-non-proxy-admins-to-access-spend-endpoints) - ✅ [**Invite Team Members to access `/spend` Routes**](../docs/proxy/cost_tracking#allowing-non-proxy-admins-to-access-spend-endpoints)

View file

@ -38,7 +38,7 @@ class MyCustomHandler(CustomLogger):
print(f"On Async Success") print(f"On Async Success")
async def async_log_failure_event(self, kwargs, response_obj, start_time, end_time): async def async_log_failure_event(self, kwargs, response_obj, start_time, end_time):
print(f"On Async Success") print(f"On Async Failure")
customHandler = MyCustomHandler() customHandler = MyCustomHandler()

View file

@ -144,6 +144,26 @@ print(response)
``` ```
You can also pass `metadata` as part of the request header with a `langfuse_*` prefix:
```shell
curl --location 'http://0.0.0.0:4000/chat/completions' \
--header 'Content-Type: application/json' \
--header 'langfuse_trace_id: trace-id22' \
--header 'langfuse_trace_user_id: user-id2' \
--header 'langfuse_trace_metadata: {"key":"value"}' \
--data '{
"model": "gpt-3.5-turbo",
"messages": [
{
"role": "user",
"content": "what llm are you"
}
]
}'
```
### Trace & Generation Parameters ### Trace & Generation Parameters
#### Trace Specific Parameters #### Trace Specific Parameters

View file

@ -0,0 +1,3 @@
llmcord.py lets you and your friends chat with LLMs directly in your Discord server. It works with practically any LLM, remote or locally hosted.
Github: https://github.com/jakobdylanc/discord-llm-chatbot

View file

@ -46,13 +46,13 @@ for chunk in response:
## Supported Models - ALL Groq Models Supported! ## Supported Models - ALL Groq Models Supported!
We support ALL Groq models, just set `groq/` as a prefix when sending completion requests We support ALL Groq models, just set `groq/` as a prefix when sending completion requests
| Model Name | Function Call | | Model Name | Function Call |
|--------------------------|------------------------------------------------------------------------------------------------------------------------------------------------------------------| |--------------------|---------------------------------------------------------|
| llama3-8b-8192 | `completion(model="groq/llama3-8b-8192", messages)` | | llama3-8b-8192 | `completion(model="groq/llama3-8b-8192", messages)` |
| llama3-70b-8192 | `completion(model="groq/llama3-70b-8192", messages)` | | llama3-70b-8192 | `completion(model="groq/llama3-70b-8192", messages)` |
| llama2-70b-4096 | `completion(model="groq/llama2-70b-4096", messages)` | | llama2-70b-4096 | `completion(model="groq/llama2-70b-4096", messages)` |
| mixtral-8x7b-32768 | `completion(model="groq/mixtral-8x7b-32768", messages)` | | mixtral-8x7b-32768 | `completion(model="groq/mixtral-8x7b-32768", messages)` |
| gemma-7b-it | `completion(model="groq/gemma-7b-it", messages)` | | gemma-7b-it | `completion(model="groq/gemma-7b-it", messages)` |
## Groq - Tool / Function Calling Example ## Groq - Tool / Function Calling Example

View file

@ -26,52 +26,52 @@ Example TogetherAI Usage - Note: liteLLM supports all models deployed on Togethe
### Llama LLMs - Chat ### Llama LLMs - Chat
| Model Name | Function Call | Required OS Variables | | Model Name | Function Call | Required OS Variables |
|-----------------------------------|------------------------------------------------------------------------|---------------------------------| |-----------------------------------|-------------------------------------------------------------------------|------------------------------------|
| togethercomputer/llama-2-70b-chat | `completion('together_ai/togethercomputer/llama-2-70b-chat', messages)` | `os.environ['TOGETHERAI_API_KEY']` | | togethercomputer/llama-2-70b-chat | `completion('together_ai/togethercomputer/llama-2-70b-chat', messages)` | `os.environ['TOGETHERAI_API_KEY']` |
### Llama LLMs - Language / Instruct ### Llama LLMs - Language / Instruct
| Model Name | Function Call | Required OS Variables | | Model Name | Function Call | Required OS Variables |
|-----------------------------------|------------------------------------------------------------------------|---------------------------------| |------------------------------------------|--------------------------------------------------------------------------------|------------------------------------|
| togethercomputer/llama-2-70b | `completion('together_ai/togethercomputer/llama-2-70b', messages)` | `os.environ['TOGETHERAI_API_KEY']` | | togethercomputer/llama-2-70b | `completion('together_ai/togethercomputer/llama-2-70b', messages)` | `os.environ['TOGETHERAI_API_KEY']` |
| togethercomputer/LLaMA-2-7B-32K | `completion('together_ai/togethercomputer/LLaMA-2-7B-32K', messages)` | `os.environ['TOGETHERAI_API_KEY']` | | togethercomputer/LLaMA-2-7B-32K | `completion('together_ai/togethercomputer/LLaMA-2-7B-32K', messages)` | `os.environ['TOGETHERAI_API_KEY']` |
| togethercomputer/Llama-2-7B-32K-Instruct | `completion('together_ai/togethercomputer/Llama-2-7B-32K-Instruct', messages)` | `os.environ['TOGETHERAI_API_KEY']` | | togethercomputer/Llama-2-7B-32K-Instruct | `completion('together_ai/togethercomputer/Llama-2-7B-32K-Instruct', messages)` | `os.environ['TOGETHERAI_API_KEY']` |
| togethercomputer/llama-2-7b | `completion('together_ai/togethercomputer/llama-2-7b', messages)` | `os.environ['TOGETHERAI_API_KEY']` | | togethercomputer/llama-2-7b | `completion('together_ai/togethercomputer/llama-2-7b', messages)` | `os.environ['TOGETHERAI_API_KEY']` |
### Falcon LLMs ### Falcon LLMs
| Model Name | Function Call | Required OS Variables | | Model Name | Function Call | Required OS Variables |
|-----------------------------------|------------------------------------------------------------------------|---------------------------------| |--------------------------------------|----------------------------------------------------------------------------|------------------------------------|
| togethercomputer/falcon-40b-instruct | `completion('together_ai/togethercomputer/falcon-40b-instruct', messages)` | `os.environ['TOGETHERAI_API_KEY']` | | togethercomputer/falcon-40b-instruct | `completion('together_ai/togethercomputer/falcon-40b-instruct', messages)` | `os.environ['TOGETHERAI_API_KEY']` |
| togethercomputer/falcon-7b-instruct | `completion('together_ai/togethercomputer/falcon-7b-instruct', messages)` | `os.environ['TOGETHERAI_API_KEY']` | | togethercomputer/falcon-7b-instruct | `completion('together_ai/togethercomputer/falcon-7b-instruct', messages)` | `os.environ['TOGETHERAI_API_KEY']` |
### Alpaca LLMs ### Alpaca LLMs
| Model Name | Function Call | Required OS Variables | | Model Name | Function Call | Required OS Variables |
|-----------------------------------|------------------------------------------------------------------------|---------------------------------| |----------------------------|------------------------------------------------------------------|------------------------------------|
| togethercomputer/alpaca-7b | `completion('together_ai/togethercomputer/alpaca-7b', messages)` | `os.environ['TOGETHERAI_API_KEY']` | | togethercomputer/alpaca-7b | `completion('together_ai/togethercomputer/alpaca-7b', messages)` | `os.environ['TOGETHERAI_API_KEY']` |
### Other Chat LLMs ### Other Chat LLMs
| Model Name | Function Call | Required OS Variables | | Model Name | Function Call | Required OS Variables |
|-----------------------------------|------------------------------------------------------------------------|---------------------------------| |------------------------------|--------------------------------------------------------------------|------------------------------------|
| HuggingFaceH4/starchat-alpha | `completion('together_ai/HuggingFaceH4/starchat-alpha', messages)` | `os.environ['TOGETHERAI_API_KEY']` | | HuggingFaceH4/starchat-alpha | `completion('together_ai/HuggingFaceH4/starchat-alpha', messages)` | `os.environ['TOGETHERAI_API_KEY']` |
### Code LLMs ### Code LLMs
| Model Name | Function Call | Required OS Variables | | Model Name | Function Call | Required OS Variables |
|-----------------------------------|------------------------------------------------------------------------|---------------------------------| |-----------------------------------------|-------------------------------------------------------------------------------|------------------------------------|
| togethercomputer/CodeLlama-34b | `completion('together_ai/togethercomputer/CodeLlama-34b', messages)` | `os.environ['TOGETHERAI_API_KEY']` | | togethercomputer/CodeLlama-34b | `completion('together_ai/togethercomputer/CodeLlama-34b', messages)` | `os.environ['TOGETHERAI_API_KEY']` |
| togethercomputer/CodeLlama-34b-Instruct | `completion('together_ai/togethercomputer/CodeLlama-34b-Instruct', messages)` | `os.environ['TOGETHERAI_API_KEY']` | | togethercomputer/CodeLlama-34b-Instruct | `completion('together_ai/togethercomputer/CodeLlama-34b-Instruct', messages)` | `os.environ['TOGETHERAI_API_KEY']` |
| togethercomputer/CodeLlama-34b-Python | `completion('together_ai/togethercomputer/CodeLlama-34b-Python', messages)` | `os.environ['TOGETHERAI_API_KEY']` | | togethercomputer/CodeLlama-34b-Python | `completion('together_ai/togethercomputer/CodeLlama-34b-Python', messages)` | `os.environ['TOGETHERAI_API_KEY']` |
| defog/sqlcoder | `completion('together_ai/defog/sqlcoder', messages)` | `os.environ['TOGETHERAI_API_KEY']` | | defog/sqlcoder | `completion('together_ai/defog/sqlcoder', messages)` | `os.environ['TOGETHERAI_API_KEY']` |
| NumbersStation/nsql-llama-2-7B | `completion('together_ai/NumbersStation/nsql-llama-2-7B', messages)` | `os.environ['TOGETHERAI_API_KEY']` | | NumbersStation/nsql-llama-2-7B | `completion('together_ai/NumbersStation/nsql-llama-2-7B', messages)` | `os.environ['TOGETHERAI_API_KEY']` |
| WizardLM/WizardCoder-15B-V1.0 | `completion('together_ai/WizardLM/WizardCoder-15B-V1.0', messages)` | `os.environ['TOGETHERAI_API_KEY']` | | WizardLM/WizardCoder-15B-V1.0 | `completion('together_ai/WizardLM/WizardCoder-15B-V1.0', messages)` | `os.environ['TOGETHERAI_API_KEY']` |
| WizardLM/WizardCoder-Python-34B-V1.0 | `completion('together_ai/WizardLM/WizardCoder-Python-34B-V1.0', messages)` | `os.environ['TOGETHERAI_API_KEY']` | | WizardLM/WizardCoder-Python-34B-V1.0 | `completion('together_ai/WizardLM/WizardCoder-Python-34B-V1.0', messages)` | `os.environ['TOGETHERAI_API_KEY']` |
### Language LLMs ### Language LLMs
| Model Name | Function Call | Required OS Variables | | Model Name | Function Call | Required OS Variables |
|-----------------------------------|------------------------------------------------------------------------|---------------------------------| |-------------------------------------|---------------------------------------------------------------------------|------------------------------------|
| NousResearch/Nous-Hermes-Llama2-13b | `completion('together_ai/NousResearch/Nous-Hermes-Llama2-13b', messages)` | `os.environ['TOGETHERAI_API_KEY']` | | NousResearch/Nous-Hermes-Llama2-13b | `completion('together_ai/NousResearch/Nous-Hermes-Llama2-13b', messages)` | `os.environ['TOGETHERAI_API_KEY']` |
| Austism/chronos-hermes-13b | `completion('together_ai/Austism/chronos-hermes-13b', messages)` | `os.environ['TOGETHERAI_API_KEY']` | | Austism/chronos-hermes-13b | `completion('together_ai/Austism/chronos-hermes-13b', messages)` | `os.environ['TOGETHERAI_API_KEY']` |
| upstage/SOLAR-0-70b-16bit | `completion('together_ai/upstage/SOLAR-0-70b-16bit', messages)` | `os.environ['TOGETHERAI_API_KEY']` | | upstage/SOLAR-0-70b-16bit | `completion('together_ai/upstage/SOLAR-0-70b-16bit', messages)` | `os.environ['TOGETHERAI_API_KEY']` |
| WizardLM/WizardLM-70B-V1.0 | `completion('together_ai/WizardLM/WizardLM-70B-V1.0', messages)` | `os.environ['TOGETHERAI_API_KEY']` | | WizardLM/WizardLM-70B-V1.0 | `completion('together_ai/WizardLM/WizardLM-70B-V1.0', messages)` | `os.environ['TOGETHERAI_API_KEY']` |
## Prompt Templates ## Prompt Templates

View file

@ -155,14 +155,14 @@ def default_pt(messages):
#### Models we already have Prompt Templates for #### Models we already have Prompt Templates for
| Model Name | Works for Models | Function Call | | Model Name | Works for Models | Function Call |
| -------- | -------- | -------- | |--------------------------------------|-----------------------------------|------------------------------------------------------------------------------------------------------------------|
| meta-llama/Llama-2-7b-chat | All meta-llama llama2 chat models| `completion(model='vllm/meta-llama/Llama-2-7b', messages=messages, api_base="your_api_endpoint")` | | meta-llama/Llama-2-7b-chat | All meta-llama llama2 chat models | `completion(model='vllm/meta-llama/Llama-2-7b', messages=messages, api_base="your_api_endpoint")` |
| tiiuae/falcon-7b-instruct | All falcon instruct models | `completion(model='vllm/tiiuae/falcon-7b-instruct', messages=messages, api_base="your_api_endpoint")` | | tiiuae/falcon-7b-instruct | All falcon instruct models | `completion(model='vllm/tiiuae/falcon-7b-instruct', messages=messages, api_base="your_api_endpoint")` |
| mosaicml/mpt-7b-chat | All mpt chat models | `completion(model='vllm/mosaicml/mpt-7b-chat', messages=messages, api_base="your_api_endpoint")` | | mosaicml/mpt-7b-chat | All mpt chat models | `completion(model='vllm/mosaicml/mpt-7b-chat', messages=messages, api_base="your_api_endpoint")` |
| codellama/CodeLlama-34b-Instruct-hf | All codellama instruct models | `completion(model='vllm/codellama/CodeLlama-34b-Instruct-hf', messages=messages, api_base="your_api_endpoint")` | | codellama/CodeLlama-34b-Instruct-hf | All codellama instruct models | `completion(model='vllm/codellama/CodeLlama-34b-Instruct-hf', messages=messages, api_base="your_api_endpoint")` |
| WizardLM/WizardCoder-Python-34B-V1.0 | All wizardcoder models | `completion(model='vllm/WizardLM/WizardCoder-Python-34B-V1.0', messages=messages, api_base="your_api_endpoint")` | | WizardLM/WizardCoder-Python-34B-V1.0 | All wizardcoder models | `completion(model='vllm/WizardLM/WizardCoder-Python-34B-V1.0', messages=messages, api_base="your_api_endpoint")` |
| Phind/Phind-CodeLlama-34B-v2 | All phind-codellama models | `completion(model='vllm/Phind/Phind-CodeLlama-34B-v2', messages=messages, api_base="your_api_endpoint")` | | Phind/Phind-CodeLlama-34B-v2 | All phind-codellama models | `completion(model='vllm/Phind/Phind-CodeLlama-34B-v2', messages=messages, api_base="your_api_endpoint")` |
#### Custom prompt templates #### Custom prompt templates

View file

@ -251,23 +251,23 @@ response = completion(
Here are some examples of models available in IBM watsonx.ai that you can use with LiteLLM: Here are some examples of models available in IBM watsonx.ai that you can use with LiteLLM:
| Mode Name | Command | | Mode Name | Command |
| ---------- | --------- | |------------------------------------|------------------------------------------------------------------------------------------|
| Flan T5 XXL | `completion(model=watsonx/google/flan-t5-xxl, messages=messages)` | | Flan T5 XXL | `completion(model=watsonx/google/flan-t5-xxl, messages=messages)` |
| Flan Ul2 | `completion(model=watsonx/google/flan-ul2, messages=messages)` | | Flan Ul2 | `completion(model=watsonx/google/flan-ul2, messages=messages)` |
| Mt0 XXL | `completion(model=watsonx/bigscience/mt0-xxl, messages=messages)` | | Mt0 XXL | `completion(model=watsonx/bigscience/mt0-xxl, messages=messages)` |
| Gpt Neox | `completion(model=watsonx/eleutherai/gpt-neox-20b, messages=messages)` | | Gpt Neox | `completion(model=watsonx/eleutherai/gpt-neox-20b, messages=messages)` |
| Mpt 7B Instruct2 | `completion(model=watsonx/ibm/mpt-7b-instruct2, messages=messages)` | | Mpt 7B Instruct2 | `completion(model=watsonx/ibm/mpt-7b-instruct2, messages=messages)` |
| Starcoder | `completion(model=watsonx/bigcode/starcoder, messages=messages)` | | Starcoder | `completion(model=watsonx/bigcode/starcoder, messages=messages)` |
| Llama 2 70B Chat | `completion(model=watsonx/meta-llama/llama-2-70b-chat, messages=messages)` | | Llama 2 70B Chat | `completion(model=watsonx/meta-llama/llama-2-70b-chat, messages=messages)` |
| Llama 2 13B Chat | `completion(model=watsonx/meta-llama/llama-2-13b-chat, messages=messages)` | | Llama 2 13B Chat | `completion(model=watsonx/meta-llama/llama-2-13b-chat, messages=messages)` |
| Granite 13B Instruct | `completion(model=watsonx/ibm/granite-13b-instruct-v1, messages=messages)` | | Granite 13B Instruct | `completion(model=watsonx/ibm/granite-13b-instruct-v1, messages=messages)` |
| Granite 13B Chat | `completion(model=watsonx/ibm/granite-13b-chat-v1, messages=messages)` | | Granite 13B Chat | `completion(model=watsonx/ibm/granite-13b-chat-v1, messages=messages)` |
| Flan T5 XL | `completion(model=watsonx/google/flan-t5-xl, messages=messages)` | | Flan T5 XL | `completion(model=watsonx/google/flan-t5-xl, messages=messages)` |
| Granite 13B Chat V2 | `completion(model=watsonx/ibm/granite-13b-chat-v2, messages=messages)` | | Granite 13B Chat V2 | `completion(model=watsonx/ibm/granite-13b-chat-v2, messages=messages)` |
| Granite 13B Instruct V2 | `completion(model=watsonx/ibm/granite-13b-instruct-v2, messages=messages)` | | Granite 13B Instruct V2 | `completion(model=watsonx/ibm/granite-13b-instruct-v2, messages=messages)` |
| Elyza Japanese Llama 2 7B Instruct | `completion(model=watsonx/elyza/elyza-japanese-llama-2-7b-instruct, messages=messages)` | | Elyza Japanese Llama 2 7B Instruct | `completion(model=watsonx/elyza/elyza-japanese-llama-2-7b-instruct, messages=messages)` |
| Mixtral 8X7B Instruct V01 Q | `completion(model=watsonx/ibm-mistralai/mixtral-8x7b-instruct-v01-q, messages=messages)` | | Mixtral 8X7B Instruct V01 Q | `completion(model=watsonx/ibm-mistralai/mixtral-8x7b-instruct-v01-q, messages=messages)` |
For a list of all available models in watsonx.ai, see [here](https://dataplatform.cloud.ibm.com/docs/content/wsj/analyze-data/fm-models.html?context=wx&locale=en&audience=wdp). For a list of all available models in watsonx.ai, see [here](https://dataplatform.cloud.ibm.com/docs/content/wsj/analyze-data/fm-models.html?context=wx&locale=en&audience=wdp).
@ -275,10 +275,10 @@ For a list of all available models in watsonx.ai, see [here](https://dataplatfor
## Supported IBM watsonx.ai Embedding Models ## Supported IBM watsonx.ai Embedding Models
| Model Name | Function Call | | Model Name | Function Call |
|----------------------|---------------------------------------------| |------------|------------------------------------------------------------------------|
| Slate 30m | `embedding(model="watsonx/ibm/slate-30m-english-rtrvr", input=input)` | | Slate 30m | `embedding(model="watsonx/ibm/slate-30m-english-rtrvr", input=input)` |
| Slate 125m | `embedding(model="watsonx/ibm/slate-125m-english-rtrvr", input=input)` | | Slate 125m | `embedding(model="watsonx/ibm/slate-125m-english-rtrvr", input=input)` |
For a list of all available embedding models in watsonx.ai, see [here](https://dataplatform.cloud.ibm.com/docs/content/wsj/analyze-data/fm-models-embed.html?context=wx). For a list of all available embedding models in watsonx.ai, see [here](https://dataplatform.cloud.ibm.com/docs/content/wsj/analyze-data/fm-models-embed.html?context=wx).

View file

@ -37,26 +37,26 @@ print(response)
## Supported Models ## Supported Models
All models listed here https://inference.readthedocs.io/en/latest/models/builtin/embedding/index.html are supported All models listed here https://inference.readthedocs.io/en/latest/models/builtin/embedding/index.html are supported
| Model Name | Function Call | | Model Name | Function Call |
|------------------------------|--------------------------------------------------------| |-----------------------------|--------------------------------------------------------------------|
| bge-base-en | `embedding(model="xinference/bge-base-en", input)` | | bge-base-en | `embedding(model="xinference/bge-base-en", input)` |
| bge-base-en-v1.5 | `embedding(model="xinference/bge-base-en-v1.5", input)` | | bge-base-en-v1.5 | `embedding(model="xinference/bge-base-en-v1.5", input)` |
| bge-base-zh | `embedding(model="xinference/bge-base-zh", input)` | | bge-base-zh | `embedding(model="xinference/bge-base-zh", input)` |
| bge-base-zh-v1.5 | `embedding(model="xinference/bge-base-zh-v1.5", input)` | | bge-base-zh-v1.5 | `embedding(model="xinference/bge-base-zh-v1.5", input)` |
| bge-large-en | `embedding(model="xinference/bge-large-en", input)` | | bge-large-en | `embedding(model="xinference/bge-large-en", input)` |
| bge-large-en-v1.5 | `embedding(model="xinference/bge-large-en-v1.5", input)` | | bge-large-en-v1.5 | `embedding(model="xinference/bge-large-en-v1.5", input)` |
| bge-large-zh | `embedding(model="xinference/bge-large-zh", input)` | | bge-large-zh | `embedding(model="xinference/bge-large-zh", input)` |
| bge-large-zh-noinstruct | `embedding(model="xinference/bge-large-zh-noinstruct", input)` | | bge-large-zh-noinstruct | `embedding(model="xinference/bge-large-zh-noinstruct", input)` |
| bge-large-zh-v1.5 | `embedding(model="xinference/bge-large-zh-v1.5", input)` | | bge-large-zh-v1.5 | `embedding(model="xinference/bge-large-zh-v1.5", input)` |
| bge-small-en-v1.5 | `embedding(model="xinference/bge-small-en-v1.5", input)` | | bge-small-en-v1.5 | `embedding(model="xinference/bge-small-en-v1.5", input)` |
| bge-small-zh | `embedding(model="xinference/bge-small-zh", input)` | | bge-small-zh | `embedding(model="xinference/bge-small-zh", input)` |
| bge-small-zh-v1.5 | `embedding(model="xinference/bge-small-zh-v1.5", input)` | | bge-small-zh-v1.5 | `embedding(model="xinference/bge-small-zh-v1.5", input)` |
| e5-large-v2 | `embedding(model="xinference/e5-large-v2", input)` | | e5-large-v2 | `embedding(model="xinference/e5-large-v2", input)` |
| gte-base | `embedding(model="xinference/gte-base", input)` | | gte-base | `embedding(model="xinference/gte-base", input)` |
| gte-large | `embedding(model="xinference/gte-large", input)` | | gte-large | `embedding(model="xinference/gte-large", input)` |
| jina-embeddings-v2-base-en | `embedding(model="xinference/jina-embeddings-v2-base-en", input)` | | jina-embeddings-v2-base-en | `embedding(model="xinference/jina-embeddings-v2-base-en", input)` |
| jina-embeddings-v2-small-en | `embedding(model="xinference/jina-embeddings-v2-small-en", input)` | | jina-embeddings-v2-small-en | `embedding(model="xinference/jina-embeddings-v2-small-en", input)` |
| multilingual-e5-large | `embedding(model="xinference/multilingual-e5-large", input)` | | multilingual-e5-large | `embedding(model="xinference/multilingual-e5-large", input)` |

View file

@ -260,7 +260,7 @@ Requirements:
<TabItem value="docker-deploy" label="Dockerfile"> <TabItem value="docker-deploy" label="Dockerfile">
We maintain a [seperate Dockerfile](https://github.com/BerriAI/litellm/pkgs/container/litellm-database) for reducing build time when running LiteLLM proxy with a connected Postgres Database We maintain a [separate Dockerfile](https://github.com/BerriAI/litellm/pkgs/container/litellm-database) for reducing build time when running LiteLLM proxy with a connected Postgres Database
```shell ```shell
docker pull ghcr.io/berriai/litellm-database:main-latest docker pull ghcr.io/berriai/litellm-database:main-latest

View file

@ -2,30 +2,213 @@ import Image from '@theme/IdealImage';
import Tabs from '@theme/Tabs'; import Tabs from '@theme/Tabs';
import TabItem from '@theme/TabItem'; import TabItem from '@theme/TabItem';
# ✨ Enterprise Features - Content Mod, SSO, Custom Swagger # ✨ Enterprise Features - SSO, Audit Logs, Guardrails
Features here are behind a commercial license in our `/enterprise` folder. [**See Code**](https://github.com/BerriAI/litellm/tree/main/enterprise) :::tip
:::info Get in touch with us [here](https://calendly.com/d/4mp-gd3-k5k/litellm-1-1-onboarding-chat)
[Get Started with Enterprise here](https://github.com/BerriAI/litellm/tree/main/enterprise)
::: :::
Features: Features:
- ✅ [SSO for Admin UI](./ui.md#✨-enterprise-features) - ✅ [SSO for Admin UI](./ui.md#✨-enterprise-features)
- ✅ Content Moderation with LLM Guard, LlamaGuard, Google Text Moderations - ✅ [Audit Logs](#audit-logs)
- ✅ [Prompt Injection Detection (with LakeraAI API)](#prompt-injection-detection-lakeraai) - ✅ [Tracking Spend for Custom Tags](#tracking-spend-for-custom-tags)
- ✅ [Content Moderation with LLM Guard, LlamaGuard, Google Text Moderations](#content-moderation)
- ✅ [Prompt Injection Detection (with LakeraAI API)](#prompt-injection-detection---lakeraai)
- ✅ [Custom Branding + Routes on Swagger Docs](#swagger-docs---custom-routes--branding)
- ✅ Reject calls from Blocked User list - ✅ Reject calls from Blocked User list
- ✅ Reject calls (incoming / outgoing) with Banned Keywords (e.g. competitors) - ✅ Reject calls (incoming / outgoing) with Banned Keywords (e.g. competitors)
- ✅ Don't log/store specific requests to Langfuse, Sentry, etc. (eg confidential LLM requests)
- ✅ Tracking Spend for Custom Tags ## Audit Logs
- ✅ Custom Branding + Routes on Swagger Docs
- ✅ Audit Logs for `Created At, Created By` when Models Added Store Audit logs for **Create, Update Delete Operations** done on `Teams` and `Virtual Keys`
**Step 1** Switch on audit Logs
```shell
litellm_settings:
store_audit_logs: true
```
Start the litellm proxy with this config
**Step 2** Test it - Create a Team
```shell
curl --location 'http://0.0.0.0:4000/team/new' \
--header 'Authorization: Bearer sk-1234' \
--header 'Content-Type: application/json' \
--data '{
"max_budget": 2
}'
```
**Step 3** Expected Log
```json
{
"id": "e1760e10-4264-4499-82cd-c08c86c8d05b",
"updated_at": "2024-06-06T02:10:40.836420+00:00",
"changed_by": "109010464461339474872",
"action": "created",
"table_name": "LiteLLM_TeamTable",
"object_id": "82e725b5-053f-459d-9a52-867191635446",
"before_value": null,
"updated_values": {
"team_id": "82e725b5-053f-459d-9a52-867191635446",
"admins": [],
"members": [],
"members_with_roles": [
{
"role": "admin",
"user_id": "109010464461339474872"
}
],
"max_budget": 2.0,
"models": [],
"blocked": false
}
}
```
## Tracking Spend for Custom Tags
Requirements:
- Virtual Keys & a database should be set up, see [virtual keys](https://docs.litellm.ai/docs/proxy/virtual_keys)
#### Usage - /chat/completions requests with request tags
<Tabs>
<TabItem value="openai" label="OpenAI Python v1.0.0+">
Set `extra_body={"metadata": { }}` to `metadata` you want to pass
```python
import openai
client = openai.OpenAI(
api_key="anything",
base_url="http://0.0.0.0:4000"
)
# request sent to model set on litellm proxy, `litellm --model`
response = client.chat.completions.create(
model="gpt-3.5-turbo",
messages = [
{
"role": "user",
"content": "this is a test request, write a short poem"
}
],
extra_body={
"metadata": {
"tags": ["model-anthropic-claude-v2.1", "app-ishaan-prod"]
}
}
)
print(response)
```
</TabItem>
<TabItem value="Curl" label="Curl Request">
Pass `metadata` as part of the request body
```shell
curl --location 'http://0.0.0.0:4000/chat/completions' \
--header 'Content-Type: application/json' \
--data '{
"model": "gpt-3.5-turbo",
"messages": [
{
"role": "user",
"content": "what llm are you"
}
],
"metadata": {"tags": ["model-anthropic-claude-v2.1", "app-ishaan-prod"]}
}'
```
</TabItem>
<TabItem value="langchain" label="Langchain">
```python
from langchain.chat_models import ChatOpenAI
from langchain.prompts.chat import (
ChatPromptTemplate,
HumanMessagePromptTemplate,
SystemMessagePromptTemplate,
)
from langchain.schema import HumanMessage, SystemMessage
chat = ChatOpenAI(
openai_api_base="http://0.0.0.0:4000",
model = "gpt-3.5-turbo",
temperature=0.1,
extra_body={
"metadata": {
"tags": ["model-anthropic-claude-v2.1", "app-ishaan-prod"]
}
}
)
messages = [
SystemMessage(
content="You are a helpful assistant that im using to make a test request to."
),
HumanMessage(
content="test from litellm. tell me why it's amazing in 1 sentence"
),
]
response = chat(messages)
print(response)
```
</TabItem>
</Tabs>
#### Viewing Spend per tag
#### `/spend/tags` Request Format
```shell
curl -X GET "http://0.0.0.0:4000/spend/tags" \
-H "Authorization: Bearer sk-1234"
```
#### `/spend/tags`Response Format
```shell
[
{
"individual_request_tag": "model-anthropic-claude-v2.1",
"log_count": 6,
"total_spend": 0.000672
},
{
"individual_request_tag": "app-ishaan-local",
"log_count": 4,
"total_spend": 0.000448
},
{
"individual_request_tag": "app-ishaan-prod",
"log_count": 2,
"total_spend": 0.000224
}
]
```
## Content Moderation ## Content Moderation
### Content Moderation with LLM Guard #### Content Moderation with LLM Guard
Set the LLM Guard API Base in your environment Set the LLM Guard API Base in your environment
@ -160,7 +343,7 @@ curl --location 'http://0.0.0.0:4000/v1/chat/completions' \
</TabItem> </TabItem>
</Tabs> </Tabs>
### Content Moderation with LlamaGuard #### Content Moderation with LlamaGuard
Currently works with Sagemaker's LlamaGuard endpoint. Currently works with Sagemaker's LlamaGuard endpoint.
@ -194,7 +377,7 @@ callbacks: ["llamaguard_moderations"]
### Content Moderation with Google Text Moderation #### Content Moderation with Google Text Moderation
Requires your GOOGLE_APPLICATION_CREDENTIALS to be set in your .env (same as VertexAI). Requires your GOOGLE_APPLICATION_CREDENTIALS to be set in your .env (same as VertexAI).
@ -250,7 +433,7 @@ Here are the category specific values:
### Content Moderation with OpenAI Moderations #### Content Moderation with OpenAI Moderations
Use this if you want to reject /chat, /completions, /embeddings calls that fail OpenAI Moderations checks Use this if you want to reject /chat, /completions, /embeddings calls that fail OpenAI Moderations checks
@ -276,7 +459,7 @@ Step 1 Set a `LAKERA_API_KEY` in your env
LAKERA_API_KEY="7a91a1a6059da*******" LAKERA_API_KEY="7a91a1a6059da*******"
``` ```
Step 2. Add `lakera_prompt_injection` to your calbacks Step 2. Add `lakera_prompt_injection` to your callbacks
```yaml ```yaml
litellm_settings: litellm_settings:
@ -302,6 +485,42 @@ curl --location 'http://localhost:4000/chat/completions' \
}' }'
``` ```
## Swagger Docs - Custom Routes + Branding
:::info
Requires a LiteLLM Enterprise key to use. Get a free 2-week license [here](https://forms.gle/sTDVprBs18M4V8Le8)
:::
Set LiteLLM Key in your environment
```bash
LITELLM_LICENSE=""
```
#### Customize Title + Description
In your environment, set:
```bash
DOCS_TITLE="TotalGPT"
DOCS_DESCRIPTION="Sample Company Description"
```
#### Customize Routes
Hide admin routes from users.
In your environment, set:
```bash
DOCS_FILTERED="True" # only shows openai routes to user
```
<Image img={require('../../img/custom_swagger.png')} style={{ width: '900px', height: 'auto' }} />
## Enable Blocked User Lists ## Enable Blocked User Lists
If any call is made to proxy with this user id, it'll be rejected - use this if you want to let users opt-out of ai features If any call is made to proxy with this user id, it'll be rejected - use this if you want to let users opt-out of ai features
@ -417,176 +636,6 @@ curl --location 'http://0.0.0.0:4000/chat/completions' \
} }
' '
``` ```
## Tracking Spend for Custom Tags
Requirements:
- Virtual Keys & a database should be set up, see [virtual keys](https://docs.litellm.ai/docs/proxy/virtual_keys)
### Usage - /chat/completions requests with request tags
<Tabs>
<TabItem value="openai" label="OpenAI Python v1.0.0+">
Set `extra_body={"metadata": { }}` to `metadata` you want to pass
```python
import openai
client = openai.OpenAI(
api_key="anything",
base_url="http://0.0.0.0:4000"
)
# request sent to model set on litellm proxy, `litellm --model`
response = client.chat.completions.create(
model="gpt-3.5-turbo",
messages = [
{
"role": "user",
"content": "this is a test request, write a short poem"
}
],
extra_body={
"metadata": {
"tags": ["model-anthropic-claude-v2.1", "app-ishaan-prod"]
}
}
)
print(response)
```
</TabItem>
<TabItem value="Curl" label="Curl Request">
Pass `metadata` as part of the request body
```shell
curl --location 'http://0.0.0.0:4000/chat/completions' \
--header 'Content-Type: application/json' \
--data '{
"model": "gpt-3.5-turbo",
"messages": [
{
"role": "user",
"content": "what llm are you"
}
],
"metadata": {"tags": ["model-anthropic-claude-v2.1", "app-ishaan-prod"]}
}'
```
</TabItem>
<TabItem value="langchain" label="Langchain">
```python
from langchain.chat_models import ChatOpenAI
from langchain.prompts.chat import (
ChatPromptTemplate,
HumanMessagePromptTemplate,
SystemMessagePromptTemplate,
)
from langchain.schema import HumanMessage, SystemMessage
chat = ChatOpenAI(
openai_api_base="http://0.0.0.0:4000",
model = "gpt-3.5-turbo",
temperature=0.1,
extra_body={
"metadata": {
"tags": ["model-anthropic-claude-v2.1", "app-ishaan-prod"]
}
}
)
messages = [
SystemMessage(
content="You are a helpful assistant that im using to make a test request to."
),
HumanMessage(
content="test from litellm. tell me why it's amazing in 1 sentence"
),
]
response = chat(messages)
print(response)
```
</TabItem>
</Tabs>
### Viewing Spend per tag
#### `/spend/tags` Request Format
```shell
curl -X GET "http://0.0.0.0:4000/spend/tags" \
-H "Authorization: Bearer sk-1234"
```
#### `/spend/tags`Response Format
```shell
[
{
"individual_request_tag": "model-anthropic-claude-v2.1",
"log_count": 6,
"total_spend": 0.000672
},
{
"individual_request_tag": "app-ishaan-local",
"log_count": 4,
"total_spend": 0.000448
},
{
"individual_request_tag": "app-ishaan-prod",
"log_count": 2,
"total_spend": 0.000224
}
]
```
<!-- ## Tracking Spend per Key
## Tracking Spend per User -->
## Swagger Docs - Custom Routes + Branding
:::info
Requires a LiteLLM Enterprise key to use. Get a free 2-week license [here](https://forms.gle/sTDVprBs18M4V8Le8)
:::
Set LiteLLM Key in your environment
```bash
LITELLM_LICENSE=""
```
### Customize Title + Description
In your environment, set:
```bash
DOCS_TITLE="TotalGPT"
DOCS_DESCRIPTION="Sample Company Description"
```
### Customize Routes
Hide admin routes from users.
In your environment, set:
```bash
DOCS_FILTERED="True" # only shows openai routes to user
```
<Image img={require('../../img/custom_swagger.png')} style={{ width: '900px', height: 'auto' }} />
## Public Model Hub ## Public Model Hub

View file

@ -41,7 +41,9 @@ litellm_settings:
**Step 3**: Set required env variables for logging to langfuse **Step 3**: Set required env variables for logging to langfuse
```shell ```shell
export LANGFUSE_PUBLIC_KEY="pk_kk" export LANGFUSE_PUBLIC_KEY="pk_kk"
export LANGFUSE_SECRET_KEY="sk_ss export LANGFUSE_SECRET_KEY="sk_ss"
# Optional, defaults to https://cloud.langfuse.com
export LANGFUSE_HOST="https://xxx.langfuse.com"
``` ```
**Step 4**: Start the proxy, make a test request **Step 4**: Start the proxy, make a test request

View file

@ -100,4 +100,76 @@ print(response)
``` ```
</TabItem> </TabItem>
</Tabs> </Tabs>
## Advanced - Redis Caching
Use redis caching to do request prioritization across multiple instances of LiteLLM.
### SDK
```python
from litellm import Router
router = Router(
model_list=[
{
"model_name": "gpt-3.5-turbo",
"litellm_params": {
"model": "gpt-3.5-turbo",
"mock_response": "Hello world this is Macintosh!", # fakes the LLM API call
"rpm": 1,
},
},
],
### REDIS PARAMS ###
redis_host=os.environ["REDIS_HOST"],
redis_password=os.environ["REDIS_PASSWORD"],
redis_port=os.environ["REDIS_PORT"],
)
try:
_response = await router.schedule_acompletion( # 👈 ADDS TO QUEUE + POLLS + MAKES CALL
model="gpt-3.5-turbo",
messages=[{"role": "user", "content": "Hey!"}],
priority=0, # 👈 LOWER IS BETTER
)
except Exception as e:
print("didn't make request")
```
### PROXY
```yaml
model_list:
- model_name: gpt-3.5-turbo-fake-model
litellm_params:
model: gpt-3.5-turbo
mock_response: "hello world!"
api_key: my-good-key
router_settings:
redis_host; os.environ/REDIS_HOST
redis_password: os.environ/REDIS_PASSWORD
redis_port: os.environ/REDIS_PORT
```
```bash
$ litellm --config /path/to/config.yaml
# RUNNING on http://0.0.0.0:4000s
```
```bash
curl -X POST 'http://localhost:4000/queue/chat/completions' \
-H 'Content-Type: application/json' \
-H 'Authorization: Bearer sk-1234' \
-D '{
"model": "gpt-3.5-turbo-fake-model",
"messages": [
{
"role": "user",
"content": "what is the meaning of the universe? 1234"
}],
"priority": 0 👈 SET VALUE HERE
}'
```

View file

@ -1,11 +1,31 @@
# Secret Manager # Secret Manager
LiteLLM supports reading secrets from Azure Key Vault and Infisical LiteLLM supports reading secrets from Azure Key Vault and Infisical
- AWS Key Managemenet Service
- AWS Secret Manager
- [Azure Key Vault](#azure-key-vault) - [Azure Key Vault](#azure-key-vault)
- Google Key Management Service - Google Key Management Service
- [Infisical Secret Manager](#infisical-secret-manager) - [Infisical Secret Manager](#infisical-secret-manager)
- [.env Files](#env-files) - [.env Files](#env-files)
## AWS Key Management Service
Use AWS KMS to storing a hashed copy of your Proxy Master Key in the environment.
```bash
export LITELLM_MASTER_KEY="djZ9xjVaZ..." # 👈 ENCRYPTED KEY
export AWS_REGION_NAME="us-west-2"
```
```yaml
general_settings:
key_management_system: "aws_kms"
key_management_settings:
hosted_keys: ["LITELLM_MASTER_KEY"] # 👈 WHICH KEYS ARE STORED ON KMS
```
[**See Decryption Code**](https://github.com/BerriAI/litellm/blob/a2da2a8f168d45648b61279d4795d647d94f90c9/litellm/utils.py#L10182)
## AWS Secret Manager ## AWS Secret Manager
Store your proxy keys in AWS Secret Manager. Store your proxy keys in AWS Secret Manager.

View file

@ -1,8 +1,8 @@
# Using Fine-Tuned gpt-3.5-turbo # Using Fine-Tuned gpt-3.5-turbo
LiteLLM allows you to call `completion` with your fine-tuned gpt-3.5-turbo models LiteLLM allows you to call `completion` with your fine-tuned gpt-3.5-turbo models
If you're trying to create your custom finetuned gpt-3.5-turbo model following along on this tutorial: https://platform.openai.com/docs/guides/fine-tuning/preparing-your-dataset If you're trying to create your custom fine-tuned gpt-3.5-turbo model following along on this tutorial: https://platform.openai.com/docs/guides/fine-tuning/preparing-your-dataset
Once you've created your fine tuned model, you can call it with `litellm.completion()` Once you've created your fine-tuned model, you can call it with `litellm.completion()`
## Usage ## Usage
```python ```python

View file

@ -5975,9 +5975,9 @@
} }
}, },
"node_modules/caniuse-lite": { "node_modules/caniuse-lite": {
"version": "1.0.30001519", "version": "1.0.30001629",
"resolved": "https://registry.npmjs.org/caniuse-lite/-/caniuse-lite-1.0.30001519.tgz", "resolved": "https://registry.npmjs.org/caniuse-lite/-/caniuse-lite-1.0.30001629.tgz",
"integrity": "sha512-0QHgqR+Jv4bxHMp8kZ1Kn8CH55OikjKJ6JmKkZYP1F3D7w+lnFXF70nG5eNfsZS89jadi5Ywy5UCSKLAglIRkg==", "integrity": "sha512-c3dl911slnQhmxUIT4HhYzT7wnBK/XYpGnYLOj4nJBaRiw52Ibe7YxlDaAeRECvA786zCuExhxIUJ2K7nHMrBw==",
"funding": [ "funding": [
{ {
"type": "opencollective", "type": "opencollective",

View file

@ -36,6 +36,7 @@ const sidebars = {
label: "📖 All Endpoints (Swagger)", label: "📖 All Endpoints (Swagger)",
href: "https://litellm-api.up.railway.app/", href: "https://litellm-api.up.railway.app/",
}, },
"proxy/enterprise",
"proxy/demo", "proxy/demo",
"proxy/configs", "proxy/configs",
"proxy/reliability", "proxy/reliability",
@ -45,7 +46,6 @@ const sidebars = {
"proxy/customers", "proxy/customers",
"proxy/billing", "proxy/billing",
"proxy/user_keys", "proxy/user_keys",
"proxy/enterprise",
"proxy/virtual_keys", "proxy/virtual_keys",
"proxy/alerting", "proxy/alerting",
{ {

File diff suppressed because it is too large Load diff

View file

@ -18,10 +18,6 @@ async def log_event(request: Request):
return {"message": "Request received successfully"} return {"message": "Request received successfully"}
except Exception as e: except Exception as e:
print(f"Error processing request: {str(e)}")
import traceback
traceback.print_exc()
raise HTTPException(status_code=500, detail="Internal Server Error") raise HTTPException(status_code=500, detail="Internal Server Error")

View file

@ -120,6 +120,5 @@ class GenericAPILogger:
) )
return response return response
except Exception as e: except Exception as e:
traceback.print_exc() verbose_logger.error(f"Generic - {str(e)}\n{traceback.format_exc()}")
verbose_logger.debug(f"Generic - {str(e)}\n{traceback.format_exc()}")
pass pass

View file

@ -82,7 +82,7 @@ class _ENTERPRISE_BannedKeywords(CustomLogger):
except HTTPException as e: except HTTPException as e:
raise e raise e
except Exception as e: except Exception as e:
traceback.print_exc() verbose_proxy_logger.error(traceback.format_exc())
async def async_post_call_success_hook( async def async_post_call_success_hook(
self, self,

View file

@ -118,4 +118,4 @@ class _ENTERPRISE_BlockedUserList(CustomLogger):
except HTTPException as e: except HTTPException as e:
raise e raise e
except Exception as e: except Exception as e:
traceback.print_exc() verbose_proxy_logger.error(traceback.format_exc())

View file

@ -92,7 +92,7 @@ class _ENTERPRISE_LLMGuard(CustomLogger):
}, },
) )
except Exception as e: except Exception as e:
traceback.print_exc() verbose_proxy_logger.error(traceback.format_exc())
raise e raise e
def should_proceed(self, user_api_key_dict: UserAPIKeyAuth, data: dict) -> bool: def should_proceed(self, user_api_key_dict: UserAPIKeyAuth, data: dict) -> bool:

View file

@ -60,6 +60,7 @@ _async_failure_callback: List[Callable] = (
pre_call_rules: List[Callable] = [] pre_call_rules: List[Callable] = []
post_call_rules: List[Callable] = [] post_call_rules: List[Callable] = []
turn_off_message_logging: Optional[bool] = False turn_off_message_logging: Optional[bool] = False
store_audit_logs = False # Enterprise feature, allow users to see audit logs
## end of callbacks ############# ## end of callbacks #############
email: Optional[str] = ( email: Optional[str] = (
@ -808,6 +809,7 @@ from .exceptions import (
APIConnectionError, APIConnectionError,
APIResponseValidationError, APIResponseValidationError,
UnprocessableEntityError, UnprocessableEntityError,
InternalServerError,
LITELLM_EXCEPTION_TYPES, LITELLM_EXCEPTION_TYPES,
) )
from .budget_manager import BudgetManager from .budget_manager import BudgetManager

View file

@ -1,5 +1,6 @@
import logging, os, json import logging, os, json
from logging import Formatter from logging import Formatter
import traceback
set_verbose = False set_verbose = False
json_logs = bool(os.getenv("JSON_LOGS", False)) json_logs = bool(os.getenv("JSON_LOGS", False))

View file

@ -253,7 +253,6 @@ class RedisCache(BaseCache):
str(e), str(e),
value, value,
) )
traceback.print_exc()
raise e raise e
async def async_scan_iter(self, pattern: str, count: int = 100) -> list: async def async_scan_iter(self, pattern: str, count: int = 100) -> list:
@ -313,7 +312,6 @@ class RedisCache(BaseCache):
str(e), str(e),
value, value,
) )
traceback.print_exc()
key = self.check_and_fix_namespace(key=key) key = self.check_and_fix_namespace(key=key)
async with _redis_client as redis_client: async with _redis_client as redis_client:
@ -352,7 +350,6 @@ class RedisCache(BaseCache):
str(e), str(e),
value, value,
) )
traceback.print_exc()
async def async_set_cache_pipeline(self, cache_list, ttl=None): async def async_set_cache_pipeline(self, cache_list, ttl=None):
""" """
@ -413,7 +410,6 @@ class RedisCache(BaseCache):
str(e), str(e),
cache_value, cache_value,
) )
traceback.print_exc()
async def batch_cache_write(self, key, value, **kwargs): async def batch_cache_write(self, key, value, **kwargs):
print_verbose( print_verbose(
@ -458,7 +454,6 @@ class RedisCache(BaseCache):
str(e), str(e),
value, value,
) )
traceback.print_exc()
raise e raise e
async def flush_cache_buffer(self): async def flush_cache_buffer(self):
@ -495,8 +490,9 @@ class RedisCache(BaseCache):
return self._get_cache_logic(cached_response=cached_response) return self._get_cache_logic(cached_response=cached_response)
except Exception as e: except Exception as e:
# NON blocking - notify users Redis is throwing an exception # NON blocking - notify users Redis is throwing an exception
traceback.print_exc() verbose_logger.error(
logging.debug("LiteLLM Caching: get() - Got exception from REDIS: ", e) "LiteLLM Caching: get() - Got exception from REDIS: ", e
)
def batch_get_cache(self, key_list) -> dict: def batch_get_cache(self, key_list) -> dict:
""" """
@ -646,10 +642,9 @@ class RedisCache(BaseCache):
error=e, error=e,
call_type="sync_ping", call_type="sync_ping",
) )
print_verbose( verbose_logger.error(
f"LiteLLM Redis Cache PING: - Got exception from REDIS : {str(e)}" f"LiteLLM Redis Cache PING: - Got exception from REDIS : {str(e)}"
) )
traceback.print_exc()
raise e raise e
async def ping(self) -> bool: async def ping(self) -> bool:
@ -683,10 +678,9 @@ class RedisCache(BaseCache):
call_type="async_ping", call_type="async_ping",
) )
) )
print_verbose( verbose_logger.error(
f"LiteLLM Redis Cache PING: - Got exception from REDIS : {str(e)}" f"LiteLLM Redis Cache PING: - Got exception from REDIS : {str(e)}"
) )
traceback.print_exc()
raise e raise e
async def delete_cache_keys(self, keys): async def delete_cache_keys(self, keys):
@ -1138,22 +1132,23 @@ class S3Cache(BaseCache):
cached_response = ast.literal_eval(cached_response) cached_response = ast.literal_eval(cached_response)
if type(cached_response) is not dict: if type(cached_response) is not dict:
cached_response = dict(cached_response) cached_response = dict(cached_response)
print_verbose( verbose_logger.debug(
f"Got S3 Cache: key: {key}, cached_response {cached_response}. Type Response {type(cached_response)}" f"Got S3 Cache: key: {key}, cached_response {cached_response}. Type Response {type(cached_response)}"
) )
return cached_response return cached_response
except botocore.exceptions.ClientError as e: except botocore.exceptions.ClientError as e:
if e.response["Error"]["Code"] == "NoSuchKey": if e.response["Error"]["Code"] == "NoSuchKey":
print_verbose( verbose_logger.error(
f"S3 Cache: The specified key '{key}' does not exist in the S3 bucket." f"S3 Cache: The specified key '{key}' does not exist in the S3 bucket."
) )
return None return None
except Exception as e: except Exception as e:
# NON blocking - notify users S3 is throwing an exception # NON blocking - notify users S3 is throwing an exception
traceback.print_exc() verbose_logger.error(
print_verbose(f"S3 Caching: get_cache() - Got exception from S3: {e}") f"S3 Caching: get_cache() - Got exception from S3: {e}"
)
async def async_get_cache(self, key, **kwargs): async def async_get_cache(self, key, **kwargs):
return self.get_cache(key=key, **kwargs) return self.get_cache(key=key, **kwargs)
@ -1234,8 +1229,7 @@ class DualCache(BaseCache):
return result return result
except Exception as e: except Exception as e:
print_verbose(f"LiteLLM Cache: Excepton async add_cache: {str(e)}") verbose_logger.error(f"LiteLLM Cache: Excepton async add_cache: {str(e)}")
traceback.print_exc()
raise e raise e
def get_cache(self, key, local_only: bool = False, **kwargs): def get_cache(self, key, local_only: bool = False, **kwargs):
@ -1262,7 +1256,7 @@ class DualCache(BaseCache):
print_verbose(f"get cache: cache result: {result}") print_verbose(f"get cache: cache result: {result}")
return result return result
except Exception as e: except Exception as e:
traceback.print_exc() verbose_logger.error(traceback.format_exc())
def batch_get_cache(self, keys: list, local_only: bool = False, **kwargs): def batch_get_cache(self, keys: list, local_only: bool = False, **kwargs):
try: try:
@ -1295,7 +1289,7 @@ class DualCache(BaseCache):
print_verbose(f"async batch get cache: cache result: {result}") print_verbose(f"async batch get cache: cache result: {result}")
return result return result
except Exception as e: except Exception as e:
traceback.print_exc() verbose_logger.error(traceback.format_exc())
async def async_get_cache(self, key, local_only: bool = False, **kwargs): async def async_get_cache(self, key, local_only: bool = False, **kwargs):
# Try to fetch from in-memory cache first # Try to fetch from in-memory cache first
@ -1328,7 +1322,7 @@ class DualCache(BaseCache):
print_verbose(f"get cache: cache result: {result}") print_verbose(f"get cache: cache result: {result}")
return result return result
except Exception as e: except Exception as e:
traceback.print_exc() verbose_logger.error(traceback.format_exc())
async def async_batch_get_cache( async def async_batch_get_cache(
self, keys: list, local_only: bool = False, **kwargs self, keys: list, local_only: bool = False, **kwargs
@ -1368,7 +1362,7 @@ class DualCache(BaseCache):
return result return result
except Exception as e: except Exception as e:
traceback.print_exc() verbose_logger.error(traceback.format_exc())
async def async_set_cache(self, key, value, local_only: bool = False, **kwargs): async def async_set_cache(self, key, value, local_only: bool = False, **kwargs):
print_verbose( print_verbose(
@ -1381,8 +1375,8 @@ class DualCache(BaseCache):
if self.redis_cache is not None and local_only == False: if self.redis_cache is not None and local_only == False:
await self.redis_cache.async_set_cache(key, value, **kwargs) await self.redis_cache.async_set_cache(key, value, **kwargs)
except Exception as e: except Exception as e:
print_verbose(f"LiteLLM Cache: Excepton async add_cache: {str(e)}") verbose_logger.error(f"LiteLLM Cache: Excepton async add_cache: {str(e)}")
traceback.print_exc() verbose_logger.debug(traceback.format_exc())
async def async_batch_set_cache( async def async_batch_set_cache(
self, cache_list: list, local_only: bool = False, **kwargs self, cache_list: list, local_only: bool = False, **kwargs
@ -1404,8 +1398,8 @@ class DualCache(BaseCache):
cache_list=cache_list, ttl=kwargs.get("ttl", None) cache_list=cache_list, ttl=kwargs.get("ttl", None)
) )
except Exception as e: except Exception as e:
print_verbose(f"LiteLLM Cache: Excepton async add_cache: {str(e)}") verbose_logger.error(f"LiteLLM Cache: Excepton async add_cache: {str(e)}")
traceback.print_exc() verbose_logger.debug(traceback.format_exc())
async def async_increment_cache( async def async_increment_cache(
self, key, value: float, local_only: bool = False, **kwargs self, key, value: float, local_only: bool = False, **kwargs
@ -1429,8 +1423,8 @@ class DualCache(BaseCache):
return result return result
except Exception as e: except Exception as e:
print_verbose(f"LiteLLM Cache: Excepton async add_cache: {str(e)}") verbose_logger.error(f"LiteLLM Cache: Excepton async add_cache: {str(e)}")
traceback.print_exc() verbose_logger.debug(traceback.format_exc())
raise e raise e
def flush_cache(self): def flush_cache(self):
@ -1846,8 +1840,8 @@ class Cache:
) )
self.cache.set_cache(cache_key, cached_data, **kwargs) self.cache.set_cache(cache_key, cached_data, **kwargs)
except Exception as e: except Exception as e:
print_verbose(f"LiteLLM Cache: Excepton add_cache: {str(e)}") verbose_logger.error(f"LiteLLM Cache: Excepton add_cache: {str(e)}")
traceback.print_exc() verbose_logger.debug(traceback.format_exc())
pass pass
async def async_add_cache(self, result, *args, **kwargs): async def async_add_cache(self, result, *args, **kwargs):
@ -1864,8 +1858,8 @@ class Cache:
) )
await self.cache.async_set_cache(cache_key, cached_data, **kwargs) await self.cache.async_set_cache(cache_key, cached_data, **kwargs)
except Exception as e: except Exception as e:
print_verbose(f"LiteLLM Cache: Excepton add_cache: {str(e)}") verbose_logger.error(f"LiteLLM Cache: Excepton add_cache: {str(e)}")
traceback.print_exc() verbose_logger.debug(traceback.format_exc())
async def async_add_cache_pipeline(self, result, *args, **kwargs): async def async_add_cache_pipeline(self, result, *args, **kwargs):
""" """
@ -1897,8 +1891,8 @@ class Cache:
) )
await asyncio.gather(*tasks) await asyncio.gather(*tasks)
except Exception as e: except Exception as e:
print_verbose(f"LiteLLM Cache: Excepton add_cache: {str(e)}") verbose_logger.error(f"LiteLLM Cache: Excepton add_cache: {str(e)}")
traceback.print_exc() verbose_logger.debug(traceback.format_exc())
async def batch_cache_write(self, result, *args, **kwargs): async def batch_cache_write(self, result, *args, **kwargs):
cache_key, cached_data, kwargs = self._add_cache_logic( cache_key, cached_data, kwargs = self._add_cache_logic(

View file

@ -638,6 +638,7 @@ LITELLM_EXCEPTION_TYPES = [
APIConnectionError, APIConnectionError,
APIResponseValidationError, APIResponseValidationError,
OpenAIError, OpenAIError,
InternalServerError,
] ]

View file

@ -169,6 +169,5 @@ class AISpendLogger:
print_verbose(f"AISpend Logging - final data object: {data}") print_verbose(f"AISpend Logging - final data object: {data}")
except: except:
# traceback.print_exc()
print_verbose(f"AISpend Logging Error - {traceback.format_exc()}") print_verbose(f"AISpend Logging Error - {traceback.format_exc()}")
pass pass

View file

@ -178,6 +178,5 @@ class BerriSpendLogger:
print_verbose(f"BerriSpend Logging - final data object: {data}") print_verbose(f"BerriSpend Logging - final data object: {data}")
response = requests.post(url, headers=headers, json=data) response = requests.post(url, headers=headers, json=data)
except: except:
# traceback.print_exc()
print_verbose(f"BerriSpend Logging Error - {traceback.format_exc()}") print_verbose(f"BerriSpend Logging Error - {traceback.format_exc()}")
pass pass

View file

@ -297,6 +297,5 @@ class ClickhouseLogger:
# make request to endpoint with payload # make request to endpoint with payload
verbose_logger.debug(f"Clickhouse Logger - final response = {response}") verbose_logger.debug(f"Clickhouse Logger - final response = {response}")
except Exception as e: except Exception as e:
traceback.print_exc()
verbose_logger.debug(f"Clickhouse - {str(e)}\n{traceback.format_exc()}") verbose_logger.debug(f"Clickhouse - {str(e)}\n{traceback.format_exc()}")
pass pass

View file

@ -115,7 +115,6 @@ class CustomLogger: # https://docs.litellm.ai/docs/observability/custom_callbac
) )
print_verbose(f"Custom Logger - model call details: {kwargs}") print_verbose(f"Custom Logger - model call details: {kwargs}")
except: except:
traceback.print_exc()
print_verbose(f"Custom Logger Error - {traceback.format_exc()}") print_verbose(f"Custom Logger Error - {traceback.format_exc()}")
async def async_log_input_event( async def async_log_input_event(
@ -130,7 +129,6 @@ class CustomLogger: # https://docs.litellm.ai/docs/observability/custom_callbac
) )
print_verbose(f"Custom Logger - model call details: {kwargs}") print_verbose(f"Custom Logger - model call details: {kwargs}")
except: except:
traceback.print_exc()
print_verbose(f"Custom Logger Error - {traceback.format_exc()}") print_verbose(f"Custom Logger Error - {traceback.format_exc()}")
def log_event( def log_event(
@ -146,7 +144,6 @@ class CustomLogger: # https://docs.litellm.ai/docs/observability/custom_callbac
end_time, end_time,
) )
except: except:
# traceback.print_exc()
print_verbose(f"Custom Logger Error - {traceback.format_exc()}") print_verbose(f"Custom Logger Error - {traceback.format_exc()}")
pass pass
@ -163,6 +160,5 @@ class CustomLogger: # https://docs.litellm.ai/docs/observability/custom_callbac
end_time, end_time,
) )
except: except:
# traceback.print_exc()
print_verbose(f"Custom Logger Error - {traceback.format_exc()}") print_verbose(f"Custom Logger Error - {traceback.format_exc()}")
pass pass

View file

@ -134,7 +134,6 @@ class DataDogLogger:
f"Datadog Layer Logging - final response object: {response_obj}" f"Datadog Layer Logging - final response object: {response_obj}"
) )
except Exception as e: except Exception as e:
traceback.print_exc()
verbose_logger.debug( verbose_logger.debug(
f"Datadog Layer Error - {str(e)}\n{traceback.format_exc()}" f"Datadog Layer Error - {str(e)}\n{traceback.format_exc()}"
) )

View file

@ -85,6 +85,5 @@ class DyanmoDBLogger:
) )
return response return response
except: except:
traceback.print_exc()
print_verbose(f"DynamoDB Layer Error - {traceback.format_exc()}") print_verbose(f"DynamoDB Layer Error - {traceback.format_exc()}")
pass pass

View file

@ -112,6 +112,5 @@ class HeliconeLogger:
) )
print_verbose(f"Helicone Logging - Error {response.text}") print_verbose(f"Helicone Logging - Error {response.text}")
except: except:
# traceback.print_exc()
print_verbose(f"Helicone Logging Error - {traceback.format_exc()}") print_verbose(f"Helicone Logging Error - {traceback.format_exc()}")
pass pass

View file

@ -69,6 +69,43 @@ class LangFuseLogger:
else: else:
self.upstream_langfuse = None self.upstream_langfuse = None
@staticmethod
def add_metadata_from_header(litellm_params: dict, metadata: dict) -> dict:
"""
Adds metadata from proxy request headers to Langfuse logging if keys start with "langfuse_"
and overwrites litellm_params.metadata if already included.
For example if you want to append your trace to an existing `trace_id` via header, send
`headers: { ..., langfuse_existing_trace_id: your-existing-trace-id }` via proxy request.
"""
if litellm_params is None:
return metadata
if litellm_params.get("proxy_server_request") is None:
return metadata
if metadata is None:
metadata = {}
proxy_headers = (
litellm_params.get("proxy_server_request", {}).get("headers", {}) or {}
)
for metadata_param_key in proxy_headers:
if metadata_param_key.startswith("langfuse_"):
trace_param_key = metadata_param_key.replace("langfuse_", "", 1)
if trace_param_key in metadata:
verbose_logger.warning(
f"Overwriting Langfuse `{trace_param_key}` from request header"
)
else:
verbose_logger.debug(
f"Found Langfuse `{trace_param_key}` in request header"
)
metadata[trace_param_key] = proxy_headers.get(metadata_param_key)
return metadata
# def log_error(kwargs, response_obj, start_time, end_time): # def log_error(kwargs, response_obj, start_time, end_time):
# generation = trace.generation( # generation = trace.generation(
# level ="ERROR" # can be any of DEBUG, DEFAULT, WARNING or ERROR # level ="ERROR" # can be any of DEBUG, DEFAULT, WARNING or ERROR
@ -97,6 +134,7 @@ class LangFuseLogger:
metadata = ( metadata = (
litellm_params.get("metadata", {}) or {} litellm_params.get("metadata", {}) or {}
) # if litellm_params['metadata'] == None ) # if litellm_params['metadata'] == None
metadata = self.add_metadata_from_header(litellm_params, metadata)
optional_params = copy.deepcopy(kwargs.get("optional_params", {})) optional_params = copy.deepcopy(kwargs.get("optional_params", {}))
prompt = {"messages": kwargs.get("messages")} prompt = {"messages": kwargs.get("messages")}
@ -182,9 +220,11 @@ class LangFuseLogger:
verbose_logger.info(f"Langfuse Layer Logging - logging success") verbose_logger.info(f"Langfuse Layer Logging - logging success")
return {"trace_id": trace_id, "generation_id": generation_id} return {"trace_id": trace_id, "generation_id": generation_id}
except: except Exception as e:
traceback.print_exc() verbose_logger.error(
verbose_logger.debug(f"Langfuse Layer Error - {traceback.format_exc()}") "Langfuse Layer Error(): Exception occured - {}".format(str(e))
)
verbose_logger.debug(traceback.format_exc())
return {"trace_id": None, "generation_id": None} return {"trace_id": None, "generation_id": None}
async def _async_log_event( async def _async_log_event(

View file

@ -44,7 +44,9 @@ class LangsmithLogger:
print_verbose( print_verbose(
f"Langsmith Logging - project_name: {project_name}, run_name {run_name}" f"Langsmith Logging - project_name: {project_name}, run_name {run_name}"
) )
langsmith_base_url = os.getenv("LANGSMITH_BASE_URL", "https://api.smith.langchain.com") langsmith_base_url = os.getenv(
"LANGSMITH_BASE_URL", "https://api.smith.langchain.com"
)
try: try:
print_verbose( print_verbose(
@ -89,9 +91,7 @@ class LangsmithLogger:
} }
url = f"{langsmith_base_url}/runs" url = f"{langsmith_base_url}/runs"
print_verbose( print_verbose(f"Langsmith Logging - About to send data to {url} ...")
f"Langsmith Logging - About to send data to {url} ..."
)
response = requests.post( response = requests.post(
url=url, url=url,
json=data, json=data,
@ -106,6 +106,5 @@ class LangsmithLogger:
f"Langsmith Layer Logging - final response object: {response_obj}" f"Langsmith Layer Logging - final response object: {response_obj}"
) )
except: except:
# traceback.print_exc()
print_verbose(f"Langsmith Layer Error - {traceback.format_exc()}") print_verbose(f"Langsmith Layer Error - {traceback.format_exc()}")
pass pass

View file

@ -171,7 +171,6 @@ class LogfireLogger:
f"Logfire Layer Logging - final response object: {response_obj}" f"Logfire Layer Logging - final response object: {response_obj}"
) )
except Exception as e: except Exception as e:
traceback.print_exc()
verbose_logger.debug( verbose_logger.debug(
f"Logfire Layer Error - {str(e)}\n{traceback.format_exc()}" f"Logfire Layer Error - {str(e)}\n{traceback.format_exc()}"
) )

View file

@ -14,6 +14,7 @@ def parse_usage(usage):
"prompt": usage["prompt_tokens"] if "prompt_tokens" in usage else 0, "prompt": usage["prompt_tokens"] if "prompt_tokens" in usage else 0,
} }
def parse_tool_calls(tool_calls): def parse_tool_calls(tool_calls):
if tool_calls is None: if tool_calls is None:
return None return None
@ -26,13 +27,13 @@ def parse_tool_calls(tool_calls):
"function": { "function": {
"name": tool_call.function.name, "name": tool_call.function.name,
"arguments": tool_call.function.arguments, "arguments": tool_call.function.arguments,
} },
} }
return serialized return serialized
return [clean_tool_call(tool_call) for tool_call in tool_calls] return [clean_tool_call(tool_call) for tool_call in tool_calls]
def parse_messages(input): def parse_messages(input):
@ -176,6 +177,5 @@ class LunaryLogger:
) )
except: except:
# traceback.print_exc()
print_verbose(f"Lunary Logging Error - {traceback.format_exc()}") print_verbose(f"Lunary Logging Error - {traceback.format_exc()}")
pass pass

View file

@ -109,8 +109,8 @@ class PrometheusLogger:
end_user_id, user_api_key, model, user_api_team, user_id end_user_id, user_api_key, model, user_api_team, user_id
).inc() ).inc()
except Exception as e: except Exception as e:
traceback.print_exc() verbose_logger.error(
verbose_logger.debug( "prometheus Layer Error(): Exception occured - {}".format(str(e))
f"prometheus Layer Error - {str(e)}\n{traceback.format_exc()}"
) )
verbose_logger.debug(traceback.format_exc())
pass pass

View file

@ -180,6 +180,5 @@ class S3Logger:
print_verbose(f"s3 Layer Logging - final response object: {response_obj}") print_verbose(f"s3 Layer Logging - final response object: {response_obj}")
return response return response
except Exception as e: except Exception as e:
traceback.print_exc()
verbose_logger.debug(f"s3 Layer Error - {str(e)}\n{traceback.format_exc()}") verbose_logger.debug(f"s3 Layer Error - {str(e)}\n{traceback.format_exc()}")
pass pass

View file

@ -110,6 +110,5 @@ class Supabase:
) )
except: except:
# traceback.print_exc()
print_verbose(f"Supabase Logging Error - {traceback.format_exc()}") print_verbose(f"Supabase Logging Error - {traceback.format_exc()}")
pass pass

View file

@ -217,6 +217,5 @@ class WeightsBiasesLogger:
f"W&B Logging Logging - final response object: {response_obj}" f"W&B Logging Logging - final response object: {response_obj}"
) )
except: except:
# traceback.print_exc()
print_verbose(f"W&B Logging Layer Error - {traceback.format_exc()}") print_verbose(f"W&B Logging Layer Error - {traceback.format_exc()}")
pass pass

View file

@ -1,13 +1,14 @@
import os, types, traceback, copy, asyncio import types
import json import traceback
from enum import Enum import copy
import time import time
from typing import Callable, Optional from typing import Callable, Optional
from litellm.utils import ModelResponse, get_secret, Choices, Message, Usage from litellm.utils import ModelResponse, Choices, Message, Usage
import litellm import litellm
import sys, httpx import httpx
from .prompt_templates.factory import prompt_factory, custom_prompt, get_system_prompt from .prompt_templates.factory import prompt_factory, custom_prompt, get_system_prompt
from packaging.version import Version from packaging.version import Version
from litellm import verbose_logger
class GeminiError(Exception): class GeminiError(Exception):
@ -264,7 +265,8 @@ def completion(
choices_list.append(choice_obj) choices_list.append(choice_obj)
model_response["choices"] = choices_list model_response["choices"] = choices_list
except Exception as e: except Exception as e:
traceback.print_exc() verbose_logger.error("LiteLLM.gemini.py: Exception occured - {}".format(str(e)))
verbose_logger.debug(traceback.format_exc())
raise GeminiError( raise GeminiError(
message=traceback.format_exc(), status_code=response.status_code message=traceback.format_exc(), status_code=response.status_code
) )
@ -356,7 +358,8 @@ async def async_completion(
choices_list.append(choice_obj) choices_list.append(choice_obj)
model_response["choices"] = choices_list model_response["choices"] = choices_list
except Exception as e: except Exception as e:
traceback.print_exc() verbose_logger.error("LiteLLM.gemini.py: Exception occured - {}".format(str(e)))
verbose_logger.debug(traceback.format_exc())
raise GeminiError( raise GeminiError(
message=traceback.format_exc(), status_code=response.status_code message=traceback.format_exc(), status_code=response.status_code
) )

View file

@ -2,10 +2,12 @@ from itertools import chain
import requests, types, time # type: ignore import requests, types, time # type: ignore
import json, uuid import json, uuid
import traceback import traceback
from typing import Optional from typing import Optional, List
import litellm import litellm
from litellm.types.utils import ProviderField
import httpx, aiohttp, asyncio # type: ignore import httpx, aiohttp, asyncio # type: ignore
from .prompt_templates.factory import prompt_factory, custom_prompt from .prompt_templates.factory import prompt_factory, custom_prompt
from litellm import verbose_logger
class OllamaError(Exception): class OllamaError(Exception):
@ -124,6 +126,19 @@ class OllamaConfig:
) )
and v is not None and v is not None
} }
def get_required_params(self) -> List[ProviderField]:
"""For a given provider, return it's required fields with a description"""
return [
ProviderField(
field_name="base_url",
field_type="string",
field_description="Your Ollama API Base",
field_value="http://10.10.11.249:11434",
)
]
def get_supported_openai_params( def get_supported_openai_params(
self, self,
): ):
@ -138,10 +153,12 @@ class OllamaConfig:
"response_format", "response_format",
] ]
# ollama wants plain base64 jpeg/png files as images. strip any leading dataURI # ollama wants plain base64 jpeg/png files as images. strip any leading dataURI
# and convert to jpeg if necessary. # and convert to jpeg if necessary.
def _convert_image(image): def _convert_image(image):
import base64, io import base64, io
try: try:
from PIL import Image from PIL import Image
except: except:
@ -391,7 +408,13 @@ async def ollama_async_streaming(url, data, model_response, encoding, logging_ob
async for transformed_chunk in streamwrapper: async for transformed_chunk in streamwrapper:
yield transformed_chunk yield transformed_chunk
except Exception as e: except Exception as e:
traceback.print_exc() verbose_logger.error(
"LiteLLM.ollama.py::ollama_async_streaming(): Exception occured - {}".format(
str(e)
)
)
verbose_logger.debug(traceback.format_exc())
raise e raise e
@ -455,7 +478,12 @@ async def ollama_acompletion(url, data, model_response, encoding, logging_obj):
) )
return model_response return model_response
except Exception as e: except Exception as e:
traceback.print_exc() verbose_logger.error(
"LiteLLM.ollama.py::ollama_acompletion(): Exception occured - {}".format(
str(e)
)
)
verbose_logger.debug(traceback.format_exc())
raise e raise e

View file

@ -1,11 +1,15 @@
from itertools import chain from itertools import chain
import requests, types, time import requests
import json, uuid import types
import time
import json
import uuid
import traceback import traceback
from typing import Optional from typing import Optional
from litellm import verbose_logger
import litellm import litellm
import httpx, aiohttp, asyncio import httpx
from .prompt_templates.factory import prompt_factory, custom_prompt import aiohttp
class OllamaError(Exception): class OllamaError(Exception):
@ -299,7 +303,10 @@ def get_ollama_response(
tool_calls=[ tool_calls=[
{ {
"id": f"call_{str(uuid.uuid4())}", "id": f"call_{str(uuid.uuid4())}",
"function": {"name": function_call["name"], "arguments": json.dumps(function_call["arguments"])}, "function": {
"name": function_call["name"],
"arguments": json.dumps(function_call["arguments"]),
},
"type": "function", "type": "function",
} }
], ],
@ -307,7 +314,9 @@ def get_ollama_response(
model_response["choices"][0]["message"] = message model_response["choices"][0]["message"] = message
model_response["choices"][0]["finish_reason"] = "tool_calls" model_response["choices"][0]["finish_reason"] = "tool_calls"
else: else:
model_response["choices"][0]["message"]["content"] = response_json["message"]["content"] model_response["choices"][0]["message"]["content"] = response_json["message"][
"content"
]
model_response["created"] = int(time.time()) model_response["created"] = int(time.time())
model_response["model"] = "ollama/" + model model_response["model"] = "ollama/" + model
prompt_tokens = response_json.get("prompt_eval_count", litellm.token_counter(messages=messages)) # type: ignore prompt_tokens = response_json.get("prompt_eval_count", litellm.token_counter(messages=messages)) # type: ignore
@ -361,7 +370,10 @@ def ollama_completion_stream(url, api_key, data, logging_obj):
tool_calls=[ tool_calls=[
{ {
"id": f"call_{str(uuid.uuid4())}", "id": f"call_{str(uuid.uuid4())}",
"function": {"name": function_call["name"], "arguments": json.dumps(function_call["arguments"])}, "function": {
"name": function_call["name"],
"arguments": json.dumps(function_call["arguments"]),
},
"type": "function", "type": "function",
} }
], ],
@ -410,9 +422,10 @@ async def ollama_async_streaming(
first_chunk_content = first_chunk.choices[0].delta.content or "" first_chunk_content = first_chunk.choices[0].delta.content or ""
response_content = first_chunk_content + "".join( response_content = first_chunk_content + "".join(
[ [
chunk.choices[0].delta.content chunk.choices[0].delta.content
async for chunk in streamwrapper async for chunk in streamwrapper
if chunk.choices[0].delta.content] if chunk.choices[0].delta.content
]
) )
function_call = json.loads(response_content) function_call = json.loads(response_content)
delta = litellm.utils.Delta( delta = litellm.utils.Delta(
@ -420,7 +433,10 @@ async def ollama_async_streaming(
tool_calls=[ tool_calls=[
{ {
"id": f"call_{str(uuid.uuid4())}", "id": f"call_{str(uuid.uuid4())}",
"function": {"name": function_call["name"], "arguments": json.dumps(function_call["arguments"])}, "function": {
"name": function_call["name"],
"arguments": json.dumps(function_call["arguments"]),
},
"type": "function", "type": "function",
} }
], ],
@ -433,7 +449,8 @@ async def ollama_async_streaming(
async for transformed_chunk in streamwrapper: async for transformed_chunk in streamwrapper:
yield transformed_chunk yield transformed_chunk
except Exception as e: except Exception as e:
traceback.print_exc() verbose_logger.error("LiteLLM.gemini(): Exception occured - {}".format(str(e)))
verbose_logger.debug(traceback.format_exc())
async def ollama_acompletion( async def ollama_acompletion(
@ -483,7 +500,10 @@ async def ollama_acompletion(
tool_calls=[ tool_calls=[
{ {
"id": f"call_{str(uuid.uuid4())}", "id": f"call_{str(uuid.uuid4())}",
"function": {"name": function_call["name"], "arguments": json.dumps(function_call["arguments"])}, "function": {
"name": function_call["name"],
"arguments": json.dumps(function_call["arguments"]),
},
"type": "function", "type": "function",
} }
], ],
@ -491,7 +511,9 @@ async def ollama_acompletion(
model_response["choices"][0]["message"] = message model_response["choices"][0]["message"] = message
model_response["choices"][0]["finish_reason"] = "tool_calls" model_response["choices"][0]["finish_reason"] = "tool_calls"
else: else:
model_response["choices"][0]["message"]["content"] = response_json["message"]["content"] model_response["choices"][0]["message"]["content"] = response_json[
"message"
]["content"]
model_response["created"] = int(time.time()) model_response["created"] = int(time.time())
model_response["model"] = "ollama_chat/" + data["model"] model_response["model"] = "ollama_chat/" + data["model"]
@ -509,5 +531,9 @@ async def ollama_acompletion(
) )
return model_response return model_response
except Exception as e: except Exception as e:
traceback.print_exc() verbose_logger.error(
"LiteLLM.ollama_acompletion(): Exception occured - {}".format(str(e))
)
verbose_logger.debug(traceback.format_exc())
raise e raise e

View file

@ -1,11 +1,12 @@
import os, types, traceback, copy import types
import json import traceback
from enum import Enum import copy
import time import time
from typing import Callable, Optional from typing import Callable, Optional
from litellm.utils import ModelResponse, get_secret, Choices, Message, Usage from litellm.utils import ModelResponse, Choices, Message, Usage
import litellm import litellm
import sys, httpx import httpx
from litellm import verbose_logger
class PalmError(Exception): class PalmError(Exception):
@ -165,7 +166,10 @@ def completion(
choices_list.append(choice_obj) choices_list.append(choice_obj)
model_response["choices"] = choices_list model_response["choices"] = choices_list
except Exception as e: except Exception as e:
traceback.print_exc() verbose_logger.error(
"litellm.llms.palm.py::completion(): Exception occured - {}".format(str(e))
)
verbose_logger.debug(traceback.format_exc())
raise PalmError( raise PalmError(
message=traceback.format_exc(), status_code=response.status_code message=traceback.format_exc(), status_code=response.status_code
) )

View file

@ -826,7 +826,7 @@ def anthropic_messages_pt_xml(messages: list):
) # either string or none ) # either string or none
if messages[msg_i].get( if messages[msg_i].get(
"tool_calls", [] "tool_calls", []
): # support assistant tool invoke convertion ): # support assistant tool invoke conversion
assistant_text += convert_to_anthropic_tool_invoke_xml( # type: ignore assistant_text += convert_to_anthropic_tool_invoke_xml( # type: ignore
messages[msg_i]["tool_calls"] messages[msg_i]["tool_calls"]
) )
@ -1217,7 +1217,7 @@ def anthropic_messages_pt(messages: list):
if messages[msg_i].get( if messages[msg_i].get(
"tool_calls", [] "tool_calls", []
): # support assistant tool invoke convertion ): # support assistant tool invoke conversion
assistant_content.extend( assistant_content.extend(
convert_to_anthropic_tool_invoke(messages[msg_i]["tool_calls"]) convert_to_anthropic_tool_invoke(messages[msg_i]["tool_calls"])
) )

View file

@ -297,24 +297,29 @@ def _convert_gemini_role(role: str) -> Literal["user", "model"]:
def _process_gemini_image(image_url: str) -> PartType: def _process_gemini_image(image_url: str) -> PartType:
try: try:
if "gs://" in image_url: if ".mp4" in image_url and "gs://" in image_url:
# Case 1: Images with Cloud Storage URIs # Case 1: Videos with Cloud Storage URIs
part_mime = "video/mp4"
_file_data = FileDataType(mime_type=part_mime, file_uri=image_url)
return PartType(file_data=_file_data)
elif ".pdf" in image_url and "gs://" in image_url:
# Case 2: PDF's with Cloud Storage URIs
part_mime = "application/pdf"
_file_data = FileDataType(mime_type=part_mime, file_uri=image_url)
return PartType(file_data=_file_data)
elif "gs://" in image_url:
# Case 3: Images with Cloud Storage URIs
# The supported MIME types for images include image/png and image/jpeg. # The supported MIME types for images include image/png and image/jpeg.
part_mime = "image/png" if "png" in image_url else "image/jpeg" part_mime = "image/png" if "png" in image_url else "image/jpeg"
_file_data = FileDataType(mime_type=part_mime, file_uri=image_url) _file_data = FileDataType(mime_type=part_mime, file_uri=image_url)
return PartType(file_data=_file_data) return PartType(file_data=_file_data)
elif "https:/" in image_url: elif "https:/" in image_url:
# Case 2: Images with direct links # Case 4: Images with direct links
image = _load_image_from_url(image_url) image = _load_image_from_url(image_url)
_blob = BlobType(data=image.data, mime_type=image._mime_type) _blob = BlobType(data=image.data, mime_type=image._mime_type)
return PartType(inline_data=_blob) return PartType(inline_data=_blob)
elif ".mp4" in image_url and "gs://" in image_url:
# Case 3: Videos with Cloud Storage URIs
part_mime = "video/mp4"
_file_data = FileDataType(mime_type=part_mime, file_uri=image_url)
return PartType(file_data=_file_data)
elif "base64" in image_url: elif "base64" in image_url:
# Case 4: Images with base64 encoding # Case 5: Images with base64 encoding
import base64, re import base64, re
# base 64 is passed as data:image/jpeg;base64,<base-64-encoded-image> # base 64 is passed as data:image/jpeg;base64,<base-64-encoded-image>
@ -390,7 +395,7 @@ def _gemini_convert_messages_with_history(messages: list) -> List[ContentType]:
assistant_content.extend(_parts) assistant_content.extend(_parts)
elif messages[msg_i].get( elif messages[msg_i].get(
"tool_calls", [] "tool_calls", []
): # support assistant tool invoke convertion ): # support assistant tool invoke conversion
assistant_content.extend( assistant_content.extend(
convert_to_gemini_tool_call_invoke(messages[msg_i]["tool_calls"]) convert_to_gemini_tool_call_invoke(messages[msg_i]["tool_calls"])
) )
@ -642,9 +647,9 @@ def completion(
prompt = " ".join( prompt = " ".join(
[ [
message["content"] message.get("content")
for message in messages for message in messages
if isinstance(message["content"], str) if isinstance(message.get("content", None), str)
] ]
) )

View file

@ -365,7 +365,10 @@ async def acompletion(
) # sets the logging event loop if the user does sync streaming (e.g. on proxy for sagemaker calls) ) # sets the logging event loop if the user does sync streaming (e.g. on proxy for sagemaker calls)
return response return response
except Exception as e: except Exception as e:
traceback.print_exc() verbose_logger.error(
"litellm.acompletion(): Exception occured - {}".format(str(e))
)
verbose_logger.debug(traceback.format_exc())
custom_llm_provider = custom_llm_provider or "openai" custom_llm_provider = custom_llm_provider or "openai"
raise exception_type( raise exception_type(
model=model, model=model,
@ -478,7 +481,10 @@ def mock_completion(
except Exception as e: except Exception as e:
if isinstance(e, openai.APIError): if isinstance(e, openai.APIError):
raise e raise e
traceback.print_exc() verbose_logger.error(
"litellm.mock_completion(): Exception occured - {}".format(str(e))
)
verbose_logger.debug(traceback.format_exc())
raise Exception("Mock completion response failed") raise Exception("Mock completion response failed")
@ -4449,7 +4455,10 @@ async def ahealth_check(
response = {} # args like remaining ratelimit etc. response = {} # args like remaining ratelimit etc.
return response return response
except Exception as e: except Exception as e:
traceback.print_exc() verbose_logger.error(
"litellm.ahealth_check(): Exception occured - {}".format(str(e))
)
verbose_logger.debug(traceback.format_exc())
stack_trace = traceback.format_exc() stack_trace = traceback.format_exc()
if isinstance(stack_trace, str): if isinstance(stack_trace, str):
stack_trace = stack_trace[:1000] stack_trace = stack_trace[:1000]

View file

@ -1,6 +1,7 @@
import json import json
import logging import logging
from logging import Formatter from logging import Formatter
import sys
class JsonFormatter(Formatter): class JsonFormatter(Formatter):

View file

@ -56,8 +56,10 @@ router_settings:
litellm_settings: litellm_settings:
success_callback: ["langfuse"] success_callback: ["langfuse"]
json_logs: true
general_settings: general_settings:
alerting: ["email"] alerting: ["email"]
key_management_system: "aws_kms"
key_management_settings:
hosted_keys: ["LITELLM_MASTER_KEY"]

View file

@ -76,6 +76,17 @@ class LitellmUserRoles(str, enum.Enum):
return ui_labels.get(self.value, "") return ui_labels.get(self.value, "")
class LitellmTableNames(str, enum.Enum):
"""
Enum for Table Names used by LiteLLM
"""
TEAM_TABLE_NAME: str = "LiteLLM_TeamTable"
USER_TABLE_NAME: str = "LiteLLM_UserTable"
KEY_TABLE_NAME: str = "LiteLLM_VerificationToken"
PROXY_MODEL_TABLE_NAME: str = "LiteLLM_ModelTable"
AlertType = Literal[ AlertType = Literal[
"llm_exceptions", "llm_exceptions",
"llm_too_slow", "llm_too_slow",
@ -935,6 +946,7 @@ class KeyManagementSystem(enum.Enum):
AZURE_KEY_VAULT = "azure_key_vault" AZURE_KEY_VAULT = "azure_key_vault"
AWS_SECRET_MANAGER = "aws_secret_manager" AWS_SECRET_MANAGER = "aws_secret_manager"
LOCAL = "local" LOCAL = "local"
AWS_KMS = "aws_kms"
class KeyManagementSettings(LiteLLMBase): class KeyManagementSettings(LiteLLMBase):
@ -1276,6 +1288,22 @@ class LiteLLM_ErrorLogs(LiteLLMBase):
endTime: Union[str, datetime, None] endTime: Union[str, datetime, None]
class LiteLLM_AuditLogs(LiteLLMBase):
id: str
updated_at: datetime
changed_by: str
action: Literal["created", "updated", "deleted"]
table_name: Literal[
LitellmTableNames.TEAM_TABLE_NAME,
LitellmTableNames.USER_TABLE_NAME,
LitellmTableNames.KEY_TABLE_NAME,
LitellmTableNames.PROXY_MODEL_TABLE_NAME,
]
object_id: str
before_value: Optional[Json] = None
updated_values: Optional[Json] = None
class LiteLLM_SpendLogs_ResponseObject(LiteLLMBase): class LiteLLM_SpendLogs_ResponseObject(LiteLLMBase):
response: Optional[List[Union[LiteLLM_SpendLogs, Any]]] = None response: Optional[List[Union[LiteLLM_SpendLogs, Any]]] = None

View file

@ -88,7 +88,7 @@ class _PROXY_AzureContentSafety(
verbose_proxy_logger.debug( verbose_proxy_logger.debug(
"Error in Azure Content-Safety: %s", traceback.format_exc() "Error in Azure Content-Safety: %s", traceback.format_exc()
) )
traceback.print_exc() verbose_proxy_logger.debug(traceback.format_exc())
raise raise
result = self._compute_result(response) result = self._compute_result(response)
@ -123,7 +123,12 @@ class _PROXY_AzureContentSafety(
except HTTPException as e: except HTTPException as e:
raise e raise e
except Exception as e: except Exception as e:
traceback.print_exc() verbose_proxy_logger.error(
"litellm.proxy.hooks.azure_content_safety.py::async_pre_call_hook(): Exception occured - {}".format(
str(e)
)
)
verbose_proxy_logger.debug(traceback.format_exc())
async def async_post_call_success_hook( async def async_post_call_success_hook(
self, self,

View file

@ -94,7 +94,12 @@ class _PROXY_BatchRedisRequests(CustomLogger):
except HTTPException as e: except HTTPException as e:
raise e raise e
except Exception as e: except Exception as e:
traceback.print_exc() verbose_proxy_logger.error(
"litellm.proxy.hooks.batch_redis_get.py::async_pre_call_hook(): Exception occured - {}".format(
str(e)
)
)
verbose_proxy_logger.debug(traceback.format_exc())
async def async_get_cache(self, *args, **kwargs): async def async_get_cache(self, *args, **kwargs):
""" """

View file

@ -1,13 +1,13 @@
# What this does? # What this does?
## Checks if key is allowed to use the cache controls passed in to the completion() call ## Checks if key is allowed to use the cache controls passed in to the completion() call
from typing import Optional
import litellm import litellm
from litellm import verbose_logger
from litellm.caching import DualCache from litellm.caching import DualCache
from litellm.proxy._types import UserAPIKeyAuth from litellm.proxy._types import UserAPIKeyAuth
from litellm.integrations.custom_logger import CustomLogger from litellm.integrations.custom_logger import CustomLogger
from fastapi import HTTPException from fastapi import HTTPException
import json, traceback import traceback
class _PROXY_CacheControlCheck(CustomLogger): class _PROXY_CacheControlCheck(CustomLogger):
@ -54,4 +54,9 @@ class _PROXY_CacheControlCheck(CustomLogger):
except HTTPException as e: except HTTPException as e:
raise e raise e
except Exception as e: except Exception as e:
traceback.print_exc() verbose_logger.error(
"litellm.proxy.hooks.cache_control_check.py::async_pre_call_hook(): Exception occured - {}".format(
str(e)
)
)
verbose_logger.debug(traceback.format_exc())

View file

@ -1,10 +1,10 @@
from typing import Optional from litellm import verbose_logger
import litellm import litellm
from litellm.caching import DualCache from litellm.caching import DualCache
from litellm.proxy._types import UserAPIKeyAuth from litellm.proxy._types import UserAPIKeyAuth
from litellm.integrations.custom_logger import CustomLogger from litellm.integrations.custom_logger import CustomLogger
from fastapi import HTTPException from fastapi import HTTPException
import json, traceback import traceback
class _PROXY_MaxBudgetLimiter(CustomLogger): class _PROXY_MaxBudgetLimiter(CustomLogger):
@ -44,4 +44,9 @@ class _PROXY_MaxBudgetLimiter(CustomLogger):
except HTTPException as e: except HTTPException as e:
raise e raise e
except Exception as e: except Exception as e:
traceback.print_exc() verbose_logger.error(
"litellm.proxy.hooks.max_budget_limiter.py::async_pre_call_hook(): Exception occured - {}".format(
str(e)
)
)
verbose_logger.debug(traceback.format_exc())

View file

@ -8,8 +8,8 @@
# Tell us how we can improve! - Krrish & Ishaan # Tell us how we can improve! - Krrish & Ishaan
from typing import Optional, Literal, Union from typing import Optional, Union
import litellm, traceback, sys, uuid, json import litellm, traceback, uuid, json # noqa: E401
from litellm.caching import DualCache from litellm.caching import DualCache
from litellm.proxy._types import UserAPIKeyAuth from litellm.proxy._types import UserAPIKeyAuth
from litellm.integrations.custom_logger import CustomLogger from litellm.integrations.custom_logger import CustomLogger
@ -21,8 +21,8 @@ from litellm.utils import (
ImageResponse, ImageResponse,
StreamingChoices, StreamingChoices,
) )
from datetime import datetime import aiohttp
import aiohttp, asyncio import asyncio
class _OPTIONAL_PresidioPIIMasking(CustomLogger): class _OPTIONAL_PresidioPIIMasking(CustomLogger):
@ -138,7 +138,12 @@ class _OPTIONAL_PresidioPIIMasking(CustomLogger):
else: else:
raise Exception(f"Invalid anonymizer response: {redacted_text}") raise Exception(f"Invalid anonymizer response: {redacted_text}")
except Exception as e: except Exception as e:
traceback.print_exc() verbose_proxy_logger.error(
"litellm.proxy.hooks.presidio_pii_masking.py::async_pre_call_hook(): Exception occured - {}".format(
str(e)
)
)
verbose_proxy_logger.debug(traceback.format_exc())
raise e raise e
async def async_pre_call_hook( async def async_pre_call_hook(

View file

@ -204,7 +204,12 @@ class _OPTIONAL_PromptInjectionDetection(CustomLogger):
return e.detail["error"] return e.detail["error"]
raise e raise e
except Exception as e: except Exception as e:
traceback.print_exc() verbose_proxy_logger.error(
"litellm.proxy.hooks.prompt_injection_detection.py::async_pre_call_hook(): Exception occured - {}".format(
str(e)
)
)
verbose_proxy_logger.debug(traceback.format_exc())
async def async_moderation_hook( async def async_moderation_hook(
self, self,

View file

@ -23,4 +23,5 @@ general_settings:
master_key: sk-1234 master_key: sk-1234
litellm_settings: litellm_settings:
callbacks: ["otel"] callbacks: ["otel"]
store_audit_logs: true

View file

@ -103,6 +103,7 @@ from litellm.proxy.utils import (
update_spend, update_spend,
encrypt_value, encrypt_value,
decrypt_value, decrypt_value,
get_error_message_str,
) )
from litellm import ( from litellm import (
CreateBatchRequest, CreateBatchRequest,
@ -112,7 +113,10 @@ from litellm import (
CreateFileRequest, CreateFileRequest,
) )
from litellm.proxy.secret_managers.google_kms import load_google_kms from litellm.proxy.secret_managers.google_kms import load_google_kms
from litellm.proxy.secret_managers.aws_secret_manager import load_aws_secret_manager from litellm.proxy.secret_managers.aws_secret_manager import (
load_aws_secret_manager,
load_aws_kms,
)
import pydantic import pydantic
from litellm.proxy._types import * from litellm.proxy._types import *
from litellm.caching import DualCache, RedisCache from litellm.caching import DualCache, RedisCache
@ -125,7 +129,10 @@ from litellm.router import (
AssistantsTypedDict, AssistantsTypedDict,
) )
from litellm.router import ModelInfo as RouterModelInfo from litellm.router import ModelInfo as RouterModelInfo
from litellm._logging import verbose_router_logger, verbose_proxy_logger from litellm._logging import (
verbose_router_logger,
verbose_proxy_logger,
)
from litellm.proxy.auth.handle_jwt import JWTHandler from litellm.proxy.auth.handle_jwt import JWTHandler
from litellm.proxy.auth.litellm_license import LicenseCheck from litellm.proxy.auth.litellm_license import LicenseCheck
from litellm.proxy.auth.model_checks import ( from litellm.proxy.auth.model_checks import (
@ -1471,7 +1478,12 @@ async def user_api_key_auth(
else: else:
raise Exception() raise Exception()
except Exception as e: except Exception as e:
traceback.print_exc() verbose_proxy_logger.error(
"litellm.proxy.proxy_server.user_api_key_auth(): Exception occured - {}".format(
str(e)
)
)
verbose_proxy_logger.debug(traceback.format_exc())
if isinstance(e, litellm.BudgetExceededError): if isinstance(e, litellm.BudgetExceededError):
raise ProxyException( raise ProxyException(
message=e.message, type="auth_error", param=None, code=400 message=e.message, type="auth_error", param=None, code=400
@ -2736,10 +2748,12 @@ class ProxyConfig:
load_google_kms(use_google_kms=True) load_google_kms(use_google_kms=True)
elif ( elif (
key_management_system key_management_system
== KeyManagementSystem.AWS_SECRET_MANAGER.value == KeyManagementSystem.AWS_SECRET_MANAGER.value # noqa: F405
): ):
### LOAD FROM AWS SECRET MANAGER ### ### LOAD FROM AWS SECRET MANAGER ###
load_aws_secret_manager(use_aws_secret_manager=True) load_aws_secret_manager(use_aws_secret_manager=True)
elif key_management_system == KeyManagementSystem.AWS_KMS.value:
load_aws_kms(use_aws_kms=True)
else: else:
raise ValueError("Invalid Key Management System selected") raise ValueError("Invalid Key Management System selected")
key_management_settings = general_settings.get( key_management_settings = general_settings.get(
@ -2773,6 +2787,7 @@ class ProxyConfig:
master_key = general_settings.get( master_key = general_settings.get(
"master_key", litellm.get_secret("LITELLM_MASTER_KEY", None) "master_key", litellm.get_secret("LITELLM_MASTER_KEY", None)
) )
if master_key and master_key.startswith("os.environ/"): if master_key and master_key.startswith("os.environ/"):
master_key = litellm.get_secret(master_key) master_key = litellm.get_secret(master_key)
if not isinstance(master_key, str): if not isinstance(master_key, str):
@ -3476,7 +3491,12 @@ async def generate_key_helper_fn(
) )
key_data["token_id"] = getattr(create_key_response, "token", None) key_data["token_id"] = getattr(create_key_response, "token", None)
except Exception as e: except Exception as e:
traceback.print_exc() verbose_proxy_logger.error(
"litellm.proxy.proxy_server.generate_key_helper_fn(): Exception occured - {}".format(
str(e)
)
)
verbose_proxy_logger.debug(traceback.format_exc())
if isinstance(e, HTTPException): if isinstance(e, HTTPException):
raise e raise e
raise HTTPException( raise HTTPException(
@ -3515,7 +3535,12 @@ async def delete_verification_token(tokens: List, user_id: Optional[str] = None)
else: else:
raise Exception("DB not connected. prisma_client is None") raise Exception("DB not connected. prisma_client is None")
except Exception as e: except Exception as e:
traceback.print_exc() verbose_proxy_logger.error(
"litellm.proxy.proxy_server.delete_verification_token(): Exception occured - {}".format(
str(e)
)
)
verbose_proxy_logger.debug(traceback.format_exc())
raise e raise e
return deleted_tokens return deleted_tokens
@ -3676,7 +3701,12 @@ async def async_assistants_data_generator(
done_message = "[DONE]" done_message = "[DONE]"
yield f"data: {done_message}\n\n" yield f"data: {done_message}\n\n"
except Exception as e: except Exception as e:
traceback.print_exc() verbose_proxy_logger.error(
"litellm.proxy.proxy_server.async_assistants_data_generator(): Exception occured - {}".format(
str(e)
)
)
verbose_proxy_logger.debug(traceback.format_exc())
await proxy_logging_obj.post_call_failure_hook( await proxy_logging_obj.post_call_failure_hook(
user_api_key_dict=user_api_key_dict, user_api_key_dict=user_api_key_dict,
original_exception=e, original_exception=e,
@ -3686,9 +3716,6 @@ async def async_assistants_data_generator(
f"\033[1;31mAn error occurred: {e}\n\n Debug this by setting `--debug`, e.g. `litellm --model gpt-3.5-turbo --debug`" f"\033[1;31mAn error occurred: {e}\n\n Debug this by setting `--debug`, e.g. `litellm --model gpt-3.5-turbo --debug`"
) )
router_model_names = llm_router.model_names if llm_router is not None else [] router_model_names = llm_router.model_names if llm_router is not None else []
if user_debug:
traceback.print_exc()
if isinstance(e, HTTPException): if isinstance(e, HTTPException):
raise e raise e
else: else:
@ -3728,7 +3755,12 @@ async def async_data_generator(
done_message = "[DONE]" done_message = "[DONE]"
yield f"data: {done_message}\n\n" yield f"data: {done_message}\n\n"
except Exception as e: except Exception as e:
traceback.print_exc() verbose_proxy_logger.error(
"litellm.proxy.proxy_server.async_data_generator(): Exception occured - {}".format(
str(e)
)
)
verbose_proxy_logger.debug(traceback.format_exc())
await proxy_logging_obj.post_call_failure_hook( await proxy_logging_obj.post_call_failure_hook(
user_api_key_dict=user_api_key_dict, user_api_key_dict=user_api_key_dict,
original_exception=e, original_exception=e,
@ -3738,8 +3770,6 @@ async def async_data_generator(
f"\033[1;31mAn error occurred: {e}\n\n Debug this by setting `--debug`, e.g. `litellm --model gpt-3.5-turbo --debug`" f"\033[1;31mAn error occurred: {e}\n\n Debug this by setting `--debug`, e.g. `litellm --model gpt-3.5-turbo --debug`"
) )
router_model_names = llm_router.model_names if llm_router is not None else [] router_model_names = llm_router.model_names if llm_router is not None else []
if user_debug:
traceback.print_exc()
if isinstance(e, HTTPException): if isinstance(e, HTTPException):
raise e raise e
@ -3800,6 +3830,18 @@ def on_backoff(details):
verbose_proxy_logger.debug("Backing off... this was attempt # %s", details["tries"]) verbose_proxy_logger.debug("Backing off... this was attempt # %s", details["tries"])
def giveup(e):
result = not (
isinstance(e, ProxyException)
and getattr(e, "message", None) is not None
and isinstance(e.message, str)
and "Max parallel request limit reached" in e.message
)
if result:
verbose_proxy_logger.info(json.dumps({"event": "giveup", "exception": str(e)}))
return result
@router.on_event("startup") @router.on_event("startup")
async def startup_event(): async def startup_event():
global prisma_client, master_key, use_background_health_checks, llm_router, llm_model_list, general_settings, proxy_budget_rescheduler_min_time, proxy_budget_rescheduler_max_time, litellm_proxy_admin_name, db_writer_client, store_model_in_db global prisma_client, master_key, use_background_health_checks, llm_router, llm_model_list, general_settings, proxy_budget_rescheduler_min_time, proxy_budget_rescheduler_max_time, litellm_proxy_admin_name, db_writer_client, store_model_in_db
@ -4084,12 +4126,8 @@ def model_list(
max_tries=litellm.num_retries or 3, # maximum number of retries max_tries=litellm.num_retries or 3, # maximum number of retries
max_time=litellm.request_timeout or 60, # maximum total time to retry for max_time=litellm.request_timeout or 60, # maximum total time to retry for
on_backoff=on_backoff, # specifying the function to call on backoff on_backoff=on_backoff, # specifying the function to call on backoff
giveup=lambda e: not ( giveup=giveup,
isinstance(e, ProxyException) logger=verbose_proxy_logger,
and getattr(e, "message", None) is not None
and isinstance(e.message, str)
and "Max parallel request limit reached" in e.message
), # the result of the logical expression is on the second position
) )
async def chat_completion( async def chat_completion(
request: Request, request: Request,
@ -4098,6 +4136,7 @@ async def chat_completion(
user_api_key_dict: UserAPIKeyAuth = Depends(user_api_key_auth), user_api_key_dict: UserAPIKeyAuth = Depends(user_api_key_auth),
): ):
global general_settings, user_debug, proxy_logging_obj, llm_model_list global general_settings, user_debug, proxy_logging_obj, llm_model_list
data = {} data = {}
try: try:
body = await request.body() body = await request.body()
@ -4386,7 +4425,12 @@ async def chat_completion(
return _chat_response return _chat_response
except Exception as e: except Exception as e:
data["litellm_status"] = "fail" # used for alerting data["litellm_status"] = "fail" # used for alerting
traceback.print_exc() verbose_proxy_logger.error(
"litellm.proxy.proxy_server.chat_completion(): Exception occured - {}".format(
get_error_message_str(e=e)
)
)
verbose_proxy_logger.debug(traceback.format_exc())
await proxy_logging_obj.post_call_failure_hook( await proxy_logging_obj.post_call_failure_hook(
user_api_key_dict=user_api_key_dict, original_exception=e, request_data=data user_api_key_dict=user_api_key_dict, original_exception=e, request_data=data
) )
@ -4397,8 +4441,6 @@ async def chat_completion(
litellm_debug_info, litellm_debug_info,
) )
router_model_names = llm_router.model_names if llm_router is not None else [] router_model_names = llm_router.model_names if llm_router is not None else []
if user_debug:
traceback.print_exc()
if isinstance(e, HTTPException): if isinstance(e, HTTPException):
raise ProxyException( raise ProxyException(
@ -4630,15 +4672,12 @@ async def completion(
await proxy_logging_obj.post_call_failure_hook( await proxy_logging_obj.post_call_failure_hook(
user_api_key_dict=user_api_key_dict, original_exception=e, request_data=data user_api_key_dict=user_api_key_dict, original_exception=e, request_data=data
) )
verbose_proxy_logger.debug("EXCEPTION RAISED IN PROXY MAIN.PY") verbose_proxy_logger.error(
litellm_debug_info = getattr(e, "litellm_debug_info", "") "litellm.proxy.proxy_server.completion(): Exception occured - {}".format(
verbose_proxy_logger.debug( str(e)
"\033[1;31mAn error occurred: %s %s\n\n Debug this by setting `--debug`, e.g. `litellm --model gpt-3.5-turbo --debug`", )
e,
litellm_debug_info,
) )
traceback.print_exc() verbose_proxy_logger.debug(traceback.format_exc())
error_traceback = traceback.format_exc()
error_msg = f"{str(e)}" error_msg = f"{str(e)}"
raise ProxyException( raise ProxyException(
message=getattr(e, "message", error_msg), message=getattr(e, "message", error_msg),
@ -4848,7 +4887,12 @@ async def embeddings(
e, e,
litellm_debug_info, litellm_debug_info,
) )
traceback.print_exc() verbose_proxy_logger.error(
"litellm.proxy.proxy_server.embeddings(): Exception occured - {}".format(
str(e)
)
)
verbose_proxy_logger.debug(traceback.format_exc())
if isinstance(e, HTTPException): if isinstance(e, HTTPException):
raise ProxyException( raise ProxyException(
message=getattr(e, "message", str(e)), message=getattr(e, "message", str(e)),
@ -5027,7 +5071,12 @@ async def image_generation(
await proxy_logging_obj.post_call_failure_hook( await proxy_logging_obj.post_call_failure_hook(
user_api_key_dict=user_api_key_dict, original_exception=e, request_data=data user_api_key_dict=user_api_key_dict, original_exception=e, request_data=data
) )
traceback.print_exc() verbose_proxy_logger.error(
"litellm.proxy.proxy_server.image_generation(): Exception occured - {}".format(
str(e)
)
)
verbose_proxy_logger.debug(traceback.format_exc())
if isinstance(e, HTTPException): if isinstance(e, HTTPException):
raise ProxyException( raise ProxyException(
message=getattr(e, "message", str(e)), message=getattr(e, "message", str(e)),
@ -5205,7 +5254,12 @@ async def audio_speech(
) )
except Exception as e: except Exception as e:
traceback.print_exc() verbose_proxy_logger.error(
"litellm.proxy.proxy_server.audio_speech(): Exception occured - {}".format(
str(e)
)
)
verbose_proxy_logger.debug(traceback.format_exc())
raise e raise e
@ -5394,7 +5448,12 @@ async def audio_transcriptions(
await proxy_logging_obj.post_call_failure_hook( await proxy_logging_obj.post_call_failure_hook(
user_api_key_dict=user_api_key_dict, original_exception=e, request_data=data user_api_key_dict=user_api_key_dict, original_exception=e, request_data=data
) )
traceback.print_exc() verbose_proxy_logger.error(
"litellm.proxy.proxy_server.audio_transcription(): Exception occured - {}".format(
str(e)
)
)
verbose_proxy_logger.debug(traceback.format_exc())
if isinstance(e, HTTPException): if isinstance(e, HTTPException):
raise ProxyException( raise ProxyException(
message=getattr(e, "message", str(e.detail)), message=getattr(e, "message", str(e.detail)),
@ -5403,7 +5462,6 @@ async def audio_transcriptions(
code=getattr(e, "status_code", status.HTTP_400_BAD_REQUEST), code=getattr(e, "status_code", status.HTTP_400_BAD_REQUEST),
) )
else: else:
error_traceback = traceback.format_exc()
error_msg = f"{str(e)}" error_msg = f"{str(e)}"
raise ProxyException( raise ProxyException(
message=getattr(e, "message", error_msg), message=getattr(e, "message", error_msg),
@ -5531,7 +5589,12 @@ async def get_assistants(
await proxy_logging_obj.post_call_failure_hook( await proxy_logging_obj.post_call_failure_hook(
user_api_key_dict=user_api_key_dict, original_exception=e, request_data=data user_api_key_dict=user_api_key_dict, original_exception=e, request_data=data
) )
traceback.print_exc() verbose_proxy_logger.error(
"litellm.proxy.proxy_server.get_assistants(): Exception occured - {}".format(
str(e)
)
)
verbose_proxy_logger.debug(traceback.format_exc())
if isinstance(e, HTTPException): if isinstance(e, HTTPException):
raise ProxyException( raise ProxyException(
message=getattr(e, "message", str(e.detail)), message=getattr(e, "message", str(e.detail)),
@ -5540,7 +5603,6 @@ async def get_assistants(
code=getattr(e, "status_code", status.HTTP_400_BAD_REQUEST), code=getattr(e, "status_code", status.HTTP_400_BAD_REQUEST),
) )
else: else:
error_traceback = traceback.format_exc()
error_msg = f"{str(e)}" error_msg = f"{str(e)}"
raise ProxyException( raise ProxyException(
message=getattr(e, "message", error_msg), message=getattr(e, "message", error_msg),
@ -5660,7 +5722,12 @@ async def create_threads(
await proxy_logging_obj.post_call_failure_hook( await proxy_logging_obj.post_call_failure_hook(
user_api_key_dict=user_api_key_dict, original_exception=e, request_data=data user_api_key_dict=user_api_key_dict, original_exception=e, request_data=data
) )
traceback.print_exc() verbose_proxy_logger.error(
"litellm.proxy.proxy_server.create_threads(): Exception occured - {}".format(
str(e)
)
)
verbose_proxy_logger.debug(traceback.format_exc())
if isinstance(e, HTTPException): if isinstance(e, HTTPException):
raise ProxyException( raise ProxyException(
message=getattr(e, "message", str(e.detail)), message=getattr(e, "message", str(e.detail)),
@ -5669,7 +5736,6 @@ async def create_threads(
code=getattr(e, "status_code", status.HTTP_400_BAD_REQUEST), code=getattr(e, "status_code", status.HTTP_400_BAD_REQUEST),
) )
else: else:
error_traceback = traceback.format_exc()
error_msg = f"{str(e)}" error_msg = f"{str(e)}"
raise ProxyException( raise ProxyException(
message=getattr(e, "message", error_msg), message=getattr(e, "message", error_msg),
@ -5788,7 +5854,12 @@ async def get_thread(
await proxy_logging_obj.post_call_failure_hook( await proxy_logging_obj.post_call_failure_hook(
user_api_key_dict=user_api_key_dict, original_exception=e, request_data=data user_api_key_dict=user_api_key_dict, original_exception=e, request_data=data
) )
traceback.print_exc() verbose_proxy_logger.error(
"litellm.proxy.proxy_server.get_thread(): Exception occured - {}".format(
str(e)
)
)
verbose_proxy_logger.debug(traceback.format_exc())
if isinstance(e, HTTPException): if isinstance(e, HTTPException):
raise ProxyException( raise ProxyException(
message=getattr(e, "message", str(e.detail)), message=getattr(e, "message", str(e.detail)),
@ -5797,7 +5868,6 @@ async def get_thread(
code=getattr(e, "status_code", status.HTTP_400_BAD_REQUEST), code=getattr(e, "status_code", status.HTTP_400_BAD_REQUEST),
) )
else: else:
error_traceback = traceback.format_exc()
error_msg = f"{str(e)}" error_msg = f"{str(e)}"
raise ProxyException( raise ProxyException(
message=getattr(e, "message", error_msg), message=getattr(e, "message", error_msg),
@ -5919,7 +5989,12 @@ async def add_messages(
await proxy_logging_obj.post_call_failure_hook( await proxy_logging_obj.post_call_failure_hook(
user_api_key_dict=user_api_key_dict, original_exception=e, request_data=data user_api_key_dict=user_api_key_dict, original_exception=e, request_data=data
) )
traceback.print_exc() verbose_proxy_logger.error(
"litellm.proxy.proxy_server.add_messages(): Exception occured - {}".format(
str(e)
)
)
verbose_proxy_logger.debug(traceback.format_exc())
if isinstance(e, HTTPException): if isinstance(e, HTTPException):
raise ProxyException( raise ProxyException(
message=getattr(e, "message", str(e.detail)), message=getattr(e, "message", str(e.detail)),
@ -5928,7 +6003,6 @@ async def add_messages(
code=getattr(e, "status_code", status.HTTP_400_BAD_REQUEST), code=getattr(e, "status_code", status.HTTP_400_BAD_REQUEST),
) )
else: else:
error_traceback = traceback.format_exc()
error_msg = f"{str(e)}" error_msg = f"{str(e)}"
raise ProxyException( raise ProxyException(
message=getattr(e, "message", error_msg), message=getattr(e, "message", error_msg),
@ -6046,7 +6120,12 @@ async def get_messages(
await proxy_logging_obj.post_call_failure_hook( await proxy_logging_obj.post_call_failure_hook(
user_api_key_dict=user_api_key_dict, original_exception=e, request_data=data user_api_key_dict=user_api_key_dict, original_exception=e, request_data=data
) )
traceback.print_exc() verbose_proxy_logger.error(
"litellm.proxy.proxy_server.get_messages(): Exception occured - {}".format(
str(e)
)
)
verbose_proxy_logger.debug(traceback.format_exc())
if isinstance(e, HTTPException): if isinstance(e, HTTPException):
raise ProxyException( raise ProxyException(
message=getattr(e, "message", str(e.detail)), message=getattr(e, "message", str(e.detail)),
@ -6055,7 +6134,6 @@ async def get_messages(
code=getattr(e, "status_code", status.HTTP_400_BAD_REQUEST), code=getattr(e, "status_code", status.HTTP_400_BAD_REQUEST),
) )
else: else:
error_traceback = traceback.format_exc()
error_msg = f"{str(e)}" error_msg = f"{str(e)}"
raise ProxyException( raise ProxyException(
message=getattr(e, "message", error_msg), message=getattr(e, "message", error_msg),
@ -6187,7 +6265,12 @@ async def run_thread(
await proxy_logging_obj.post_call_failure_hook( await proxy_logging_obj.post_call_failure_hook(
user_api_key_dict=user_api_key_dict, original_exception=e, request_data=data user_api_key_dict=user_api_key_dict, original_exception=e, request_data=data
) )
traceback.print_exc() verbose_proxy_logger.error(
"litellm.proxy.proxy_server.run_thread(): Exception occured - {}".format(
str(e)
)
)
verbose_proxy_logger.debug(traceback.format_exc())
if isinstance(e, HTTPException): if isinstance(e, HTTPException):
raise ProxyException( raise ProxyException(
message=getattr(e, "message", str(e.detail)), message=getattr(e, "message", str(e.detail)),
@ -6196,7 +6279,6 @@ async def run_thread(
code=getattr(e, "status_code", status.HTTP_400_BAD_REQUEST), code=getattr(e, "status_code", status.HTTP_400_BAD_REQUEST),
) )
else: else:
error_traceback = traceback.format_exc()
error_msg = f"{str(e)}" error_msg = f"{str(e)}"
raise ProxyException( raise ProxyException(
message=getattr(e, "message", error_msg), message=getattr(e, "message", error_msg),
@ -6335,7 +6417,12 @@ async def create_batch(
await proxy_logging_obj.post_call_failure_hook( await proxy_logging_obj.post_call_failure_hook(
user_api_key_dict=user_api_key_dict, original_exception=e, request_data=data user_api_key_dict=user_api_key_dict, original_exception=e, request_data=data
) )
traceback.print_exc() verbose_proxy_logger.error(
"litellm.proxy.proxy_server.create_batch(): Exception occured - {}".format(
str(e)
)
)
verbose_proxy_logger.debug(traceback.format_exc())
if isinstance(e, HTTPException): if isinstance(e, HTTPException):
raise ProxyException( raise ProxyException(
message=getattr(e, "message", str(e.detail)), message=getattr(e, "message", str(e.detail)),
@ -6344,7 +6431,6 @@ async def create_batch(
code=getattr(e, "status_code", status.HTTP_400_BAD_REQUEST), code=getattr(e, "status_code", status.HTTP_400_BAD_REQUEST),
) )
else: else:
error_traceback = traceback.format_exc()
error_msg = f"{str(e)}" error_msg = f"{str(e)}"
raise ProxyException( raise ProxyException(
message=getattr(e, "message", error_msg), message=getattr(e, "message", error_msg),
@ -6478,7 +6564,12 @@ async def retrieve_batch(
await proxy_logging_obj.post_call_failure_hook( await proxy_logging_obj.post_call_failure_hook(
user_api_key_dict=user_api_key_dict, original_exception=e, request_data=data user_api_key_dict=user_api_key_dict, original_exception=e, request_data=data
) )
traceback.print_exc() verbose_proxy_logger.error(
"litellm.proxy.proxy_server.retrieve_batch(): Exception occured - {}".format(
str(e)
)
)
verbose_proxy_logger.debug(traceback.format_exc())
if isinstance(e, HTTPException): if isinstance(e, HTTPException):
raise ProxyException( raise ProxyException(
message=getattr(e, "message", str(e.detail)), message=getattr(e, "message", str(e.detail)),
@ -6631,7 +6722,12 @@ async def create_file(
await proxy_logging_obj.post_call_failure_hook( await proxy_logging_obj.post_call_failure_hook(
user_api_key_dict=user_api_key_dict, original_exception=e, request_data=data user_api_key_dict=user_api_key_dict, original_exception=e, request_data=data
) )
traceback.print_exc() verbose_proxy_logger.error(
"litellm.proxy.proxy_server.create_file(): Exception occured - {}".format(
str(e)
)
)
verbose_proxy_logger.debug(traceback.format_exc())
if isinstance(e, HTTPException): if isinstance(e, HTTPException):
raise ProxyException( raise ProxyException(
message=getattr(e, "message", str(e.detail)), message=getattr(e, "message", str(e.detail)),
@ -6640,7 +6736,6 @@ async def create_file(
code=getattr(e, "status_code", status.HTTP_400_BAD_REQUEST), code=getattr(e, "status_code", status.HTTP_400_BAD_REQUEST),
) )
else: else:
error_traceback = traceback.format_exc()
error_msg = f"{str(e)}" error_msg = f"{str(e)}"
raise ProxyException( raise ProxyException(
message=getattr(e, "message", error_msg), message=getattr(e, "message", error_msg),
@ -6816,7 +6911,12 @@ async def moderations(
await proxy_logging_obj.post_call_failure_hook( await proxy_logging_obj.post_call_failure_hook(
user_api_key_dict=user_api_key_dict, original_exception=e, request_data=data user_api_key_dict=user_api_key_dict, original_exception=e, request_data=data
) )
traceback.print_exc() verbose_proxy_logger.error(
"litellm.proxy.proxy_server.moderations(): Exception occured - {}".format(
str(e)
)
)
verbose_proxy_logger.debug(traceback.format_exc())
if isinstance(e, HTTPException): if isinstance(e, HTTPException):
raise ProxyException( raise ProxyException(
message=getattr(e, "message", str(e)), message=getattr(e, "message", str(e)),
@ -6825,7 +6925,6 @@ async def moderations(
code=getattr(e, "status_code", status.HTTP_400_BAD_REQUEST), code=getattr(e, "status_code", status.HTTP_400_BAD_REQUEST),
) )
else: else:
error_traceback = traceback.format_exc()
error_msg = f"{str(e)}" error_msg = f"{str(e)}"
raise ProxyException( raise ProxyException(
message=getattr(e, "message", error_msg), message=getattr(e, "message", error_msg),
@ -7115,9 +7214,33 @@ async def generate_key_fn(
) )
) )
# Enterprise Feature - Audit Logging. Enable with litellm.store_audit_logs = True
if litellm.store_audit_logs is True:
_updated_values = json.dumps(response)
asyncio.create_task(
create_audit_log_for_update(
request_data=LiteLLM_AuditLogs(
id=str(uuid.uuid4()),
updated_at=datetime.now(timezone.utc),
changed_by=user_api_key_dict.user_id
or litellm_proxy_admin_name,
table_name=LitellmTableNames.KEY_TABLE_NAME,
object_id=response.get("token_id", ""),
action="created",
updated_values=_updated_values,
before_value=None,
)
)
)
return GenerateKeyResponse(**response) return GenerateKeyResponse(**response)
except Exception as e: except Exception as e:
traceback.print_exc() verbose_proxy_logger.error(
"litellm.proxy.proxy_server.generate_key_fn(): Exception occured - {}".format(
str(e)
)
)
verbose_proxy_logger.debug(traceback.format_exc())
if isinstance(e, HTTPException): if isinstance(e, HTTPException):
raise ProxyException( raise ProxyException(
message=getattr(e, "detail", f"Authentication Error({str(e)})"), message=getattr(e, "detail", f"Authentication Error({str(e)})"),
@ -7138,7 +7261,11 @@ async def generate_key_fn(
@router.post( @router.post(
"/key/update", tags=["key management"], dependencies=[Depends(user_api_key_auth)] "/key/update", tags=["key management"], dependencies=[Depends(user_api_key_auth)]
) )
async def update_key_fn(request: Request, data: UpdateKeyRequest): async def update_key_fn(
request: Request,
data: UpdateKeyRequest,
user_api_key_dict: UserAPIKeyAuth = Depends(user_api_key_auth),
):
""" """
Update an existing key Update an existing key
""" """
@ -7150,6 +7277,16 @@ async def update_key_fn(request: Request, data: UpdateKeyRequest):
if prisma_client is None: if prisma_client is None:
raise Exception("Not connected to DB!") raise Exception("Not connected to DB!")
existing_key_row = await prisma_client.get_data(
token=data.key, table_name="key", query_type="find_unique"
)
if existing_key_row is None:
raise HTTPException(
status_code=404,
detail={"error": f"Team not found, passed team_id={data.team_id}"},
)
# get non default values for key # get non default values for key
non_default_values = {} non_default_values = {}
for k, v in data_json.items(): for k, v in data_json.items():
@ -7176,6 +7313,29 @@ async def update_key_fn(request: Request, data: UpdateKeyRequest):
hashed_token = hash_token(key) hashed_token = hash_token(key)
user_api_key_cache.delete_cache(hashed_token) user_api_key_cache.delete_cache(hashed_token)
# Enterprise Feature - Audit Logging. Enable with litellm.store_audit_logs = True
if litellm.store_audit_logs is True:
_updated_values = json.dumps(data_json)
_before_value = existing_key_row.json(exclude_none=True)
_before_value = json.dumps(_before_value)
asyncio.create_task(
create_audit_log_for_update(
request_data=LiteLLM_AuditLogs(
id=str(uuid.uuid4()),
updated_at=datetime.now(timezone.utc),
changed_by=user_api_key_dict.user_id
or litellm_proxy_admin_name,
table_name=LitellmTableNames.KEY_TABLE_NAME,
object_id=data.key,
action="updated",
updated_values=_updated_values,
before_value=_before_value,
)
)
)
return {"key": key, **response["data"]} return {"key": key, **response["data"]}
# update based on remaining passed in values # update based on remaining passed in values
except Exception as e: except Exception as e:
@ -7238,6 +7398,34 @@ async def delete_key_fn(
): ):
user_id = None # unless they're admin user_id = None # unless they're admin
# Enterprise Feature - Audit Logging. Enable with litellm.store_audit_logs = True
# we do this after the first for loop, since first for loop is for validation. we only want this inserted after validation passes
if litellm.store_audit_logs is True:
# make an audit log for each team deleted
for key in data.keys:
key_row = await prisma_client.get_data( # type: ignore
token=key, table_name="key", query_type="find_unique"
)
key_row = key_row.json(exclude_none=True)
_key_row = json.dumps(key_row)
asyncio.create_task(
create_audit_log_for_update(
request_data=LiteLLM_AuditLogs(
id=str(uuid.uuid4()),
updated_at=datetime.now(timezone.utc),
changed_by=user_api_key_dict.user_id
or litellm_proxy_admin_name,
table_name=LitellmTableNames.KEY_TABLE_NAME,
object_id=key,
action="deleted",
updated_values="{}",
before_value=_key_row,
)
)
)
number_deleted_keys = await delete_verification_token( number_deleted_keys = await delete_verification_token(
tokens=keys, user_id=user_id tokens=keys, user_id=user_id
) )
@ -9507,7 +9695,12 @@ async def user_info(
} }
return response_data return response_data
except Exception as e: except Exception as e:
traceback.print_exc() verbose_proxy_logger.error(
"litellm.proxy.proxy_server.user_info(): Exception occured - {}".format(
str(e)
)
)
verbose_proxy_logger.debug(traceback.format_exc())
if isinstance(e, HTTPException): if isinstance(e, HTTPException):
raise ProxyException( raise ProxyException(
message=getattr(e, "detail", f"Authentication Error({str(e)})"), message=getattr(e, "detail", f"Authentication Error({str(e)})"),
@ -9602,7 +9795,12 @@ async def user_update(data: UpdateUserRequest):
return response return response
# update based on remaining passed in values # update based on remaining passed in values
except Exception as e: except Exception as e:
traceback.print_exc() verbose_proxy_logger.error(
"litellm.proxy.proxy_server.user_update(): Exception occured - {}".format(
str(e)
)
)
verbose_proxy_logger.debug(traceback.format_exc())
if isinstance(e, HTTPException): if isinstance(e, HTTPException):
raise ProxyException( raise ProxyException(
message=getattr(e, "detail", f"Authentication Error({str(e)})"), message=getattr(e, "detail", f"Authentication Error({str(e)})"),
@ -9655,7 +9853,12 @@ async def user_request_model(request: Request):
return {"status": "success"} return {"status": "success"}
# update based on remaining passed in values # update based on remaining passed in values
except Exception as e: except Exception as e:
traceback.print_exc() verbose_proxy_logger.error(
"litellm.proxy.proxy_server.user_request_model(): Exception occured - {}".format(
str(e)
)
)
verbose_proxy_logger.debug(traceback.format_exc())
if isinstance(e, HTTPException): if isinstance(e, HTTPException):
raise ProxyException( raise ProxyException(
message=getattr(e, "detail", f"Authentication Error({str(e)})"), message=getattr(e, "detail", f"Authentication Error({str(e)})"),
@ -9697,7 +9900,12 @@ async def user_get_requests():
return {"requests": response} return {"requests": response}
# update based on remaining passed in values # update based on remaining passed in values
except Exception as e: except Exception as e:
traceback.print_exc() verbose_proxy_logger.error(
"litellm.proxy.proxy_server.user_get_requests(): Exception occured - {}".format(
str(e)
)
)
verbose_proxy_logger.debug(traceback.format_exc())
if isinstance(e, HTTPException): if isinstance(e, HTTPException):
raise ProxyException( raise ProxyException(
message=getattr(e, "detail", f"Authentication Error({str(e)})"), message=getattr(e, "detail", f"Authentication Error({str(e)})"),
@ -10087,7 +10295,12 @@ async def update_end_user(
# update based on remaining passed in values # update based on remaining passed in values
except Exception as e: except Exception as e:
traceback.print_exc() verbose_proxy_logger.error(
"litellm.proxy.proxy_server.update_end_user(): Exception occured - {}".format(
str(e)
)
)
verbose_proxy_logger.debug(traceback.format_exc())
if isinstance(e, HTTPException): if isinstance(e, HTTPException):
raise ProxyException( raise ProxyException(
message=getattr(e, "detail", f"Internal Server Error({str(e)})"), message=getattr(e, "detail", f"Internal Server Error({str(e)})"),
@ -10171,7 +10384,12 @@ async def delete_end_user(
# update based on remaining passed in values # update based on remaining passed in values
except Exception as e: except Exception as e:
traceback.print_exc() verbose_proxy_logger.error(
"litellm.proxy.proxy_server.delete_end_user(): Exception occured - {}".format(
str(e)
)
)
verbose_proxy_logger.debug(traceback.format_exc())
if isinstance(e, HTTPException): if isinstance(e, HTTPException):
raise ProxyException( raise ProxyException(
message=getattr(e, "detail", f"Internal Server Error({str(e)})"), message=getattr(e, "detail", f"Internal Server Error({str(e)})"),
@ -10365,12 +10583,65 @@ async def new_team(
} }
}, },
) )
# Enterprise Feature - Audit Logging. Enable with litellm.store_audit_logs = True
if litellm.store_audit_logs is True:
_updated_values = complete_team_data.json(exclude_none=True)
_updated_values = json.dumps(_updated_values)
asyncio.create_task(
create_audit_log_for_update(
request_data=LiteLLM_AuditLogs(
id=str(uuid.uuid4()),
updated_at=datetime.now(timezone.utc),
changed_by=user_api_key_dict.user_id or litellm_proxy_admin_name,
table_name=LitellmTableNames.TEAM_TABLE_NAME,
object_id=data.team_id,
action="created",
updated_values=_updated_values,
before_value=None,
)
)
)
try: try:
return team_row.model_dump() return team_row.model_dump()
except Exception as e: except Exception as e:
return team_row.dict() return team_row.dict()
async def create_audit_log_for_update(request_data: LiteLLM_AuditLogs):
if premium_user is not True:
return
if litellm.store_audit_logs is not True:
return
if prisma_client is None:
raise Exception("prisma_client is None, no DB connected")
verbose_proxy_logger.debug("creating audit log for %s", request_data)
if isinstance(request_data.updated_values, dict):
request_data.updated_values = json.dumps(request_data.updated_values)
if isinstance(request_data.before_value, dict):
request_data.before_value = json.dumps(request_data.before_value)
_request_data = request_data.dict(exclude_none=True)
try:
await prisma_client.db.litellm_auditlog.create(
data={
**_request_data, # type: ignore
}
)
except Exception as e:
# [Non-Blocking Exception. Do not allow blocking LLM API call]
verbose_proxy_logger.error(f"Failed Creating audit log {e}")
return
@router.post( @router.post(
"/team/update", tags=["team management"], dependencies=[Depends(user_api_key_auth)] "/team/update", tags=["team management"], dependencies=[Depends(user_api_key_auth)]
) )
@ -10443,6 +10714,27 @@ async def update_team(
team_id=data.team_id, team_id=data.team_id,
) )
# Enterprise Feature - Audit Logging. Enable with litellm.store_audit_logs = True
if litellm.store_audit_logs is True:
_before_value = existing_team_row.json(exclude_none=True)
_before_value = json.dumps(_before_value)
_after_value: str = json.dumps(updated_kv)
asyncio.create_task(
create_audit_log_for_update(
request_data=LiteLLM_AuditLogs(
id=str(uuid.uuid4()),
updated_at=datetime.now(timezone.utc),
changed_by=user_api_key_dict.user_id or litellm_proxy_admin_name,
table_name=LitellmTableNames.TEAM_TABLE_NAME,
object_id=data.team_id,
action="updated",
updated_values=_after_value,
before_value=_before_value,
)
)
)
return team_row return team_row
@ -10714,6 +11006,35 @@ async def delete_team(
detail={"error": f"Team not found, passed team_id={team_id}"}, detail={"error": f"Team not found, passed team_id={team_id}"},
) )
# Enterprise Feature - Audit Logging. Enable with litellm.store_audit_logs = True
# we do this after the first for loop, since first for loop is for validation. we only want this inserted after validation passes
if litellm.store_audit_logs is True:
# make an audit log for each team deleted
for team_id in data.team_ids:
team_row = await prisma_client.get_data( # type: ignore
team_id=team_id, table_name="team", query_type="find_unique"
)
_team_row = team_row.json(exclude_none=True)
asyncio.create_task(
create_audit_log_for_update(
request_data=LiteLLM_AuditLogs(
id=str(uuid.uuid4()),
updated_at=datetime.now(timezone.utc),
changed_by=user_api_key_dict.user_id
or litellm_proxy_admin_name,
table_name=LitellmTableNames.TEAM_TABLE_NAME,
object_id=team_id,
action="deleted",
updated_values="{}",
before_value=_team_row,
)
)
)
# End of Audit logging
## DELETE ASSOCIATED KEYS ## DELETE ASSOCIATED KEYS
await prisma_client.delete_data(team_id_list=data.team_ids, table_name="key") await prisma_client.delete_data(team_id_list=data.team_ids, table_name="key")
## DELETE TEAMS ## DELETE TEAMS
@ -11371,7 +11692,12 @@ async def add_new_model(
return model_response return model_response
except Exception as e: except Exception as e:
traceback.print_exc() verbose_proxy_logger.error(
"litellm.proxy.proxy_server.add_new_model(): Exception occured - {}".format(
str(e)
)
)
verbose_proxy_logger.debug(traceback.format_exc())
if isinstance(e, HTTPException): if isinstance(e, HTTPException):
raise ProxyException( raise ProxyException(
message=getattr(e, "detail", f"Authentication Error({str(e)})"), message=getattr(e, "detail", f"Authentication Error({str(e)})"),
@ -11485,7 +11811,12 @@ async def update_model(
return model_response return model_response
except Exception as e: except Exception as e:
traceback.print_exc() verbose_proxy_logger.error(
"litellm.proxy.proxy_server.update_model(): Exception occured - {}".format(
str(e)
)
)
verbose_proxy_logger.debug(traceback.format_exc())
if isinstance(e, HTTPException): if isinstance(e, HTTPException):
raise ProxyException( raise ProxyException(
message=getattr(e, "detail", f"Authentication Error({str(e)})"), message=getattr(e, "detail", f"Authentication Error({str(e)})"),
@ -13719,7 +14050,12 @@ async def update_config(config_info: ConfigYAML):
return {"message": "Config updated successfully"} return {"message": "Config updated successfully"}
except Exception as e: except Exception as e:
traceback.print_exc() verbose_proxy_logger.error(
"litellm.proxy.proxy_server.update_config(): Exception occured - {}".format(
str(e)
)
)
verbose_proxy_logger.debug(traceback.format_exc())
if isinstance(e, HTTPException): if isinstance(e, HTTPException):
raise ProxyException( raise ProxyException(
message=getattr(e, "detail", f"Authentication Error({str(e)})"), message=getattr(e, "detail", f"Authentication Error({str(e)})"),
@ -14192,7 +14528,12 @@ async def get_config():
"available_callbacks": all_available_callbacks, "available_callbacks": all_available_callbacks,
} }
except Exception as e: except Exception as e:
traceback.print_exc() verbose_proxy_logger.error(
"litellm.proxy.proxy_server.get_config(): Exception occured - {}".format(
str(e)
)
)
verbose_proxy_logger.debug(traceback.format_exc())
if isinstance(e, HTTPException): if isinstance(e, HTTPException):
raise ProxyException( raise ProxyException(
message=getattr(e, "detail", f"Authentication Error({str(e)})"), message=getattr(e, "detail", f"Authentication Error({str(e)})"),
@ -14443,7 +14784,12 @@ async def health_services_endpoint(
} }
except Exception as e: except Exception as e:
traceback.print_exc() verbose_proxy_logger.error(
"litellm.proxy.proxy_server.health_services_endpoint(): Exception occured - {}".format(
str(e)
)
)
verbose_proxy_logger.debug(traceback.format_exc())
if isinstance(e, HTTPException): if isinstance(e, HTTPException):
raise ProxyException( raise ProxyException(
message=getattr(e, "detail", f"Authentication Error({str(e)})"), message=getattr(e, "detail", f"Authentication Error({str(e)})"),
@ -14522,7 +14868,12 @@ async def health_endpoint(
"unhealthy_count": len(unhealthy_endpoints), "unhealthy_count": len(unhealthy_endpoints),
} }
except Exception as e: except Exception as e:
traceback.print_exc() verbose_proxy_logger.error(
"litellm.proxy.proxy_server.py::health_endpoint(): Exception occured - {}".format(
str(e)
)
)
verbose_proxy_logger.debug(traceback.format_exc())
raise e raise e

View file

@ -243,4 +243,16 @@ model LiteLLM_InvitationLink {
liteLLM_user_table_user LiteLLM_UserTable @relation("UserId", fields: [user_id], references: [user_id]) liteLLM_user_table_user LiteLLM_UserTable @relation("UserId", fields: [user_id], references: [user_id])
liteLLM_user_table_created LiteLLM_UserTable @relation("CreatedBy", fields: [created_by], references: [user_id]) liteLLM_user_table_created LiteLLM_UserTable @relation("CreatedBy", fields: [created_by], references: [user_id])
liteLLM_user_table_updated LiteLLM_UserTable @relation("UpdatedBy", fields: [updated_by], references: [user_id]) liteLLM_user_table_updated LiteLLM_UserTable @relation("UpdatedBy", fields: [updated_by], references: [user_id])
}
model LiteLLM_AuditLog {
id String @id @default(uuid())
updated_at DateTime @default(now())
changed_by String // user or system that performed the action
action String // create, update, delete
table_name String // on of LitellmTableNames.TEAM_TABLE_NAME, LitellmTableNames.USER_TABLE_NAME, LitellmTableNames.PROXY_MODEL_TABLE_NAME,
object_id String // id of the object being audited. This can be the key id, team id, user id, model id
before_value Json? // value of the row
updated_values Json? // value of the row after change
} }

View file

@ -8,7 +8,8 @@ Requires:
* `pip install boto3>=1.28.57` * `pip install boto3>=1.28.57`
""" """
import litellm, os import litellm
import os
from typing import Optional from typing import Optional
from litellm.proxy._types import KeyManagementSystem from litellm.proxy._types import KeyManagementSystem
@ -38,3 +39,21 @@ def load_aws_secret_manager(use_aws_secret_manager: Optional[bool]):
except Exception as e: except Exception as e:
raise e raise e
def load_aws_kms(use_aws_kms: Optional[bool]):
if use_aws_kms is None or use_aws_kms is False:
return
try:
import boto3
validate_environment()
# Create a Secrets Manager client
kms_client = boto3.client("kms", region_name=os.getenv("AWS_REGION_NAME"))
litellm.secret_manager_client = kms_client
litellm._key_management_system = KeyManagementSystem.AWS_KMS
except Exception as e:
raise e

View file

@ -2709,13 +2709,15 @@ def decrypt_value(value: bytes, master_key: str) -> str:
# LiteLLM Admin UI - Non SSO Login # LiteLLM Admin UI - Non SSO Login
html_form = """ url_to_redirect_to = os.getenv("PROXY_BASE_URL", "")
url_to_redirect_to += "/login"
html_form = f"""
<!DOCTYPE html> <!DOCTYPE html>
<html> <html>
<head> <head>
<title>LiteLLM Login</title> <title>LiteLLM Login</title>
<style> <style>
body { body {{
font-family: Arial, sans-serif; font-family: Arial, sans-serif;
background-color: #f4f4f4; background-color: #f4f4f4;
margin: 0; margin: 0;
@ -2724,42 +2726,42 @@ html_form = """
justify-content: center; justify-content: center;
align-items: center; align-items: center;
height: 100vh; height: 100vh;
} }}
form { form {{
background-color: #fff; background-color: #fff;
padding: 20px; padding: 20px;
border-radius: 8px; border-radius: 8px;
box-shadow: 0 0 10px rgba(0, 0, 0, 0.1); box-shadow: 0 0 10px rgba(0, 0, 0, 0.1);
} }}
label { label {{
display: block; display: block;
margin-bottom: 8px; margin-bottom: 8px;
} }}
input { input {{
width: 100%; width: 100%;
padding: 8px; padding: 8px;
margin-bottom: 16px; margin-bottom: 16px;
box-sizing: border-box; box-sizing: border-box;
border: 1px solid #ccc; border: 1px solid #ccc;
border-radius: 4px; border-radius: 4px;
} }}
input[type="submit"] { input[type="submit"] {{
background-color: #4caf50; background-color: #4caf50;
color: #fff; color: #fff;
cursor: pointer; cursor: pointer;
} }}
input[type="submit"]:hover { input[type="submit"]:hover {{
background-color: #45a049; background-color: #45a049;
} }}
</style> </style>
</head> </head>
<body> <body>
<form action="/login" method="post"> <form action="{url_to_redirect_to}" method="post">
<h2>LiteLLM Login</h2> <h2>LiteLLM Login</h2>
<p>By default Username is "admin" and Password is your set LiteLLM Proxy `MASTER_KEY`</p> <p>By default Username is "admin" and Password is your set LiteLLM Proxy `MASTER_KEY`</p>
@ -2771,8 +2773,6 @@ html_form = """
<input type="password" id="password" name="password" required> <input type="password" id="password" name="password" required>
<input type="submit" value="Submit"> <input type="submit" value="Submit">
</form> </form>
</body>
</html>
""" """
@ -2837,3 +2837,17 @@ missing_keys_html_form = """
</body> </body>
</html> </html>
""" """
def get_error_message_str(e: Exception) -> str:
error_message = ""
if isinstance(e, HTTPException):
if isinstance(e.detail, str):
error_message = e.detail
elif isinstance(e.detail, dict):
error_message = json.dumps(e.detail)
else:
error_message = str(e)
else:
error_message = str(e)
return error_message

View file

@ -220,8 +220,6 @@ class Router:
[] []
) # names of models under litellm_params. ex. azure/chatgpt-v-2 ) # names of models under litellm_params. ex. azure/chatgpt-v-2
self.deployment_latency_map = {} self.deployment_latency_map = {}
### SCHEDULER ###
self.scheduler = Scheduler(polling_interval=polling_interval)
### CACHING ### ### CACHING ###
cache_type: Literal["local", "redis"] = "local" # default to an in-memory cache cache_type: Literal["local", "redis"] = "local" # default to an in-memory cache
redis_cache = None redis_cache = None
@ -259,6 +257,10 @@ class Router:
redis_cache=redis_cache, in_memory_cache=InMemoryCache() redis_cache=redis_cache, in_memory_cache=InMemoryCache()
) # use a dual cache (Redis+In-Memory) for tracking cooldowns, usage, etc. ) # use a dual cache (Redis+In-Memory) for tracking cooldowns, usage, etc.
### SCHEDULER ###
self.scheduler = Scheduler(
polling_interval=polling_interval, redis_cache=redis_cache
)
self.default_deployment = None # use this to track the users default deployment, when they want to use model = * self.default_deployment = None # use this to track the users default deployment, when they want to use model = *
self.default_max_parallel_requests = default_max_parallel_requests self.default_max_parallel_requests = default_max_parallel_requests
@ -2096,8 +2098,8 @@ class Router:
except Exception as e: except Exception as e:
raise e raise e
except Exception as e: except Exception as e:
verbose_router_logger.debug(f"An exception occurred - {str(e)}") verbose_router_logger.error(f"An exception occurred - {str(e)}")
traceback.print_exc() verbose_router_logger.debug(traceback.format_exc())
raise original_exception raise original_exception
async def async_function_with_retries(self, *args, **kwargs): async def async_function_with_retries(self, *args, **kwargs):
@ -4048,6 +4050,12 @@ class Router:
for idx in reversed(invalid_model_indices): for idx in reversed(invalid_model_indices):
_returned_deployments.pop(idx) _returned_deployments.pop(idx)
## ORDER FILTERING ## -> if user set 'order' in deployments, return deployments with lowest order (e.g. order=1 > order=2)
if len(_returned_deployments) > 0:
_returned_deployments = litellm.utils._get_order_filtered_deployments(
_returned_deployments
)
return _returned_deployments return _returned_deployments
def _common_checks_available_deployment( def _common_checks_available_deployment(

View file

@ -1,11 +1,9 @@
#### What this does #### #### What this does ####
# picks based on response time (for streaming, this is time to first token) # picks based on response time (for streaming, this is time to first token)
from pydantic import BaseModel, Extra, Field, root_validator from pydantic import BaseModel
import os, requests, random # type: ignore
from typing import Optional, Union, List, Dict from typing import Optional, Union, List, Dict
from datetime import datetime, timedelta from datetime import datetime, timedelta
import random from litellm import verbose_logger
import traceback import traceback
from litellm.caching import DualCache from litellm.caching import DualCache
from litellm.integrations.custom_logger import CustomLogger from litellm.integrations.custom_logger import CustomLogger
@ -119,7 +117,12 @@ class LowestCostLoggingHandler(CustomLogger):
if self.test_flag: if self.test_flag:
self.logged_success += 1 self.logged_success += 1
except Exception as e: except Exception as e:
traceback.print_exc() verbose_logger.error(
"litellm.proxy.hooks.prompt_injection_detection.py::async_pre_call_hook(): Exception occured - {}".format(
str(e)
)
)
verbose_logger.debug(traceback.format_exc())
pass pass
async def async_log_success_event(self, kwargs, response_obj, start_time, end_time): async def async_log_success_event(self, kwargs, response_obj, start_time, end_time):
@ -201,7 +204,12 @@ class LowestCostLoggingHandler(CustomLogger):
if self.test_flag: if self.test_flag:
self.logged_success += 1 self.logged_success += 1
except Exception as e: except Exception as e:
traceback.print_exc() verbose_logger.error(
"litellm.proxy.hooks.prompt_injection_detection.py::async_pre_call_hook(): Exception occured - {}".format(
str(e)
)
)
verbose_logger.debug(traceback.format_exc())
pass pass
async def async_get_available_deployments( async def async_get_available_deployments(

View file

@ -1,16 +1,16 @@
#### What this does #### #### What this does ####
# picks based on response time (for streaming, this is time to first token) # picks based on response time (for streaming, this is time to first token)
from pydantic import BaseModel, Extra, Field, root_validator # type: ignore from pydantic import BaseModel
import dotenv, os, requests, random # type: ignore import random
from typing import Optional, Union, List, Dict from typing import Optional, Union, List, Dict
from datetime import datetime, timedelta from datetime import datetime, timedelta
import random
import traceback import traceback
from litellm.caching import DualCache from litellm.caching import DualCache
from litellm.integrations.custom_logger import CustomLogger from litellm.integrations.custom_logger import CustomLogger
from litellm import ModelResponse from litellm import ModelResponse
from litellm import token_counter from litellm import token_counter
import litellm import litellm
from litellm import verbose_logger
class LiteLLMBase(BaseModel): class LiteLLMBase(BaseModel):
@ -165,7 +165,12 @@ class LowestLatencyLoggingHandler(CustomLogger):
if self.test_flag: if self.test_flag:
self.logged_success += 1 self.logged_success += 1
except Exception as e: except Exception as e:
traceback.print_exc() verbose_logger.error(
"litellm.proxy.hooks.prompt_injection_detection.py::async_pre_call_hook(): Exception occured - {}".format(
str(e)
)
)
verbose_logger.debug(traceback.format_exc())
pass pass
async def async_log_failure_event(self, kwargs, response_obj, start_time, end_time): async def async_log_failure_event(self, kwargs, response_obj, start_time, end_time):
@ -229,7 +234,12 @@ class LowestLatencyLoggingHandler(CustomLogger):
# do nothing if it's not a timeout error # do nothing if it's not a timeout error
return return
except Exception as e: except Exception as e:
traceback.print_exc() verbose_logger.error(
"litellm.proxy.hooks.prompt_injection_detection.py::async_pre_call_hook(): Exception occured - {}".format(
str(e)
)
)
verbose_logger.debug(traceback.format_exc())
pass pass
async def async_log_success_event(self, kwargs, response_obj, start_time, end_time): async def async_log_success_event(self, kwargs, response_obj, start_time, end_time):
@ -352,7 +362,12 @@ class LowestLatencyLoggingHandler(CustomLogger):
if self.test_flag: if self.test_flag:
self.logged_success += 1 self.logged_success += 1
except Exception as e: except Exception as e:
traceback.print_exc() verbose_logger.error(
"litellm.router_strategy.lowest_latency.py::async_log_success_event(): Exception occured - {}".format(
str(e)
)
)
verbose_logger.debug(traceback.format_exc())
pass pass
def get_available_deployments( def get_available_deployments(

View file

@ -11,6 +11,7 @@ from litellm.integrations.custom_logger import CustomLogger
from litellm._logging import verbose_router_logger from litellm._logging import verbose_router_logger
from litellm.utils import print_verbose from litellm.utils import print_verbose
class LiteLLMBase(BaseModel): class LiteLLMBase(BaseModel):
""" """
Implements default functions, all pydantic objects should have. Implements default functions, all pydantic objects should have.
@ -23,16 +24,20 @@ class LiteLLMBase(BaseModel):
# if using pydantic v1 # if using pydantic v1
return self.dict() return self.dict()
class RoutingArgs(LiteLLMBase): class RoutingArgs(LiteLLMBase):
ttl: int = 1 * 60 # 1min (RPM/TPM expire key) ttl: int = 1 * 60 # 1min (RPM/TPM expire key)
class LowestTPMLoggingHandler(CustomLogger): class LowestTPMLoggingHandler(CustomLogger):
test_flag: bool = False test_flag: bool = False
logged_success: int = 0 logged_success: int = 0
logged_failure: int = 0 logged_failure: int = 0
default_cache_time_seconds: int = 1 * 60 * 60 # 1 hour default_cache_time_seconds: int = 1 * 60 * 60 # 1 hour
def __init__(self, router_cache: DualCache, model_list: list, routing_args: dict = {}): def __init__(
self, router_cache: DualCache, model_list: list, routing_args: dict = {}
):
self.router_cache = router_cache self.router_cache = router_cache
self.model_list = model_list self.model_list = model_list
self.routing_args = RoutingArgs(**routing_args) self.routing_args = RoutingArgs(**routing_args)
@ -72,19 +77,28 @@ class LowestTPMLoggingHandler(CustomLogger):
request_count_dict = self.router_cache.get_cache(key=tpm_key) or {} request_count_dict = self.router_cache.get_cache(key=tpm_key) or {}
request_count_dict[id] = request_count_dict.get(id, 0) + total_tokens request_count_dict[id] = request_count_dict.get(id, 0) + total_tokens
self.router_cache.set_cache(key=tpm_key, value=request_count_dict, ttl=self.routing_args.ttl) self.router_cache.set_cache(
key=tpm_key, value=request_count_dict, ttl=self.routing_args.ttl
)
## RPM ## RPM
request_count_dict = self.router_cache.get_cache(key=rpm_key) or {} request_count_dict = self.router_cache.get_cache(key=rpm_key) or {}
request_count_dict[id] = request_count_dict.get(id, 0) + 1 request_count_dict[id] = request_count_dict.get(id, 0) + 1
self.router_cache.set_cache(key=rpm_key, value=request_count_dict, ttl=self.routing_args.ttl) self.router_cache.set_cache(
key=rpm_key, value=request_count_dict, ttl=self.routing_args.ttl
)
### TESTING ### ### TESTING ###
if self.test_flag: if self.test_flag:
self.logged_success += 1 self.logged_success += 1
except Exception as e: except Exception as e:
traceback.print_exc() verbose_router_logger.error(
"litellm.router_strategy.lowest_tpm_rpm.py::async_log_success_event(): Exception occured - {}".format(
str(e)
)
)
verbose_router_logger.debug(traceback.format_exc())
pass pass
async def async_log_success_event(self, kwargs, response_obj, start_time, end_time): async def async_log_success_event(self, kwargs, response_obj, start_time, end_time):
@ -123,19 +137,28 @@ class LowestTPMLoggingHandler(CustomLogger):
request_count_dict = self.router_cache.get_cache(key=tpm_key) or {} request_count_dict = self.router_cache.get_cache(key=tpm_key) or {}
request_count_dict[id] = request_count_dict.get(id, 0) + total_tokens request_count_dict[id] = request_count_dict.get(id, 0) + total_tokens
self.router_cache.set_cache(key=tpm_key, value=request_count_dict, ttl=self.routing_args.ttl) self.router_cache.set_cache(
key=tpm_key, value=request_count_dict, ttl=self.routing_args.ttl
)
## RPM ## RPM
request_count_dict = self.router_cache.get_cache(key=rpm_key) or {} request_count_dict = self.router_cache.get_cache(key=rpm_key) or {}
request_count_dict[id] = request_count_dict.get(id, 0) + 1 request_count_dict[id] = request_count_dict.get(id, 0) + 1
self.router_cache.set_cache(key=rpm_key, value=request_count_dict, ttl=self.routing_args.ttl) self.router_cache.set_cache(
key=rpm_key, value=request_count_dict, ttl=self.routing_args.ttl
)
### TESTING ### ### TESTING ###
if self.test_flag: if self.test_flag:
self.logged_success += 1 self.logged_success += 1
except Exception as e: except Exception as e:
traceback.print_exc() verbose_router_logger.error(
"litellm.router_strategy.lowest_tpm_rpm.py::async_log_success_event(): Exception occured - {}".format(
str(e)
)
)
verbose_router_logger.debug(traceback.format_exc())
pass pass
def get_available_deployments( def get_available_deployments(

View file

@ -1,19 +1,19 @@
#### What this does #### #### What this does ####
# identifies lowest tpm deployment # identifies lowest tpm deployment
from pydantic import BaseModel from pydantic import BaseModel
import dotenv, os, requests, random import random
from typing import Optional, Union, List, Dict from typing import Optional, Union, List, Dict
import datetime as datetime_og import traceback
from datetime import datetime import httpx
import traceback, asyncio, httpx
import litellm import litellm
from litellm import token_counter from litellm import token_counter
from litellm.caching import DualCache from litellm.caching import DualCache
from litellm.integrations.custom_logger import CustomLogger from litellm.integrations.custom_logger import CustomLogger
from litellm._logging import verbose_router_logger from litellm._logging import verbose_router_logger, verbose_logger
from litellm.utils import print_verbose, get_utc_datetime from litellm.utils import print_verbose, get_utc_datetime
from litellm.types.router import RouterErrors from litellm.types.router import RouterErrors
class LiteLLMBase(BaseModel): class LiteLLMBase(BaseModel):
""" """
Implements default functions, all pydantic objects should have. Implements default functions, all pydantic objects should have.
@ -22,12 +22,14 @@ class LiteLLMBase(BaseModel):
def json(self, **kwargs): def json(self, **kwargs):
try: try:
return self.model_dump() # noqa return self.model_dump() # noqa
except: except Exception as e:
# if using pydantic v1 # if using pydantic v1
return self.dict() return self.dict()
class RoutingArgs(LiteLLMBase): class RoutingArgs(LiteLLMBase):
ttl: int = 1 * 60 # 1min (RPM/TPM expire key) ttl: int = 1 * 60 # 1min (RPM/TPM expire key)
class LowestTPMLoggingHandler_v2(CustomLogger): class LowestTPMLoggingHandler_v2(CustomLogger):
""" """
@ -47,7 +49,9 @@ class LowestTPMLoggingHandler_v2(CustomLogger):
logged_failure: int = 0 logged_failure: int = 0
default_cache_time_seconds: int = 1 * 60 * 60 # 1 hour default_cache_time_seconds: int = 1 * 60 * 60 # 1 hour
def __init__(self, router_cache: DualCache, model_list: list, routing_args: dict = {}): def __init__(
self, router_cache: DualCache, model_list: list, routing_args: dict = {}
):
self.router_cache = router_cache self.router_cache = router_cache
self.model_list = model_list self.model_list = model_list
self.routing_args = RoutingArgs(**routing_args) self.routing_args = RoutingArgs(**routing_args)
@ -104,7 +108,9 @@ class LowestTPMLoggingHandler_v2(CustomLogger):
) )
else: else:
# if local result below limit, check redis ## prevent unnecessary redis checks # if local result below limit, check redis ## prevent unnecessary redis checks
result = self.router_cache.increment_cache(key=rpm_key, value=1, ttl=self.routing_args.ttl) result = self.router_cache.increment_cache(
key=rpm_key, value=1, ttl=self.routing_args.ttl
)
if result is not None and result > deployment_rpm: if result is not None and result > deployment_rpm:
raise litellm.RateLimitError( raise litellm.RateLimitError(
message="Deployment over defined rpm limit={}. current usage={}".format( message="Deployment over defined rpm limit={}. current usage={}".format(
@ -244,12 +250,19 @@ class LowestTPMLoggingHandler_v2(CustomLogger):
# update cache # update cache
## TPM ## TPM
self.router_cache.increment_cache(key=tpm_key, value=total_tokens, ttl=self.routing_args.ttl) self.router_cache.increment_cache(
key=tpm_key, value=total_tokens, ttl=self.routing_args.ttl
)
### TESTING ### ### TESTING ###
if self.test_flag: if self.test_flag:
self.logged_success += 1 self.logged_success += 1
except Exception as e: except Exception as e:
traceback.print_exc() verbose_logger.error(
"litellm.proxy.hooks.prompt_injection_detection.py::async_pre_call_hook(): Exception occured - {}".format(
str(e)
)
)
verbose_logger.debug(traceback.format_exc())
pass pass
async def async_log_success_event(self, kwargs, response_obj, start_time, end_time): async def async_log_success_event(self, kwargs, response_obj, start_time, end_time):
@ -295,7 +308,12 @@ class LowestTPMLoggingHandler_v2(CustomLogger):
if self.test_flag: if self.test_flag:
self.logged_success += 1 self.logged_success += 1
except Exception as e: except Exception as e:
traceback.print_exc() verbose_logger.error(
"litellm.proxy.hooks.prompt_injection_detection.py::async_pre_call_hook(): Exception occured - {}".format(
str(e)
)
)
verbose_logger.debug(traceback.format_exc())
pass pass
def _common_checks_available_deployment( def _common_checks_available_deployment(

View file

@ -1,13 +1,14 @@
import heapq, time import heapq
from pydantic import BaseModel from pydantic import BaseModel
from typing import Optional from typing import Optional
import enum import enum
from litellm.caching import DualCache from litellm.caching import DualCache, RedisCache
from litellm import print_verbose from litellm import print_verbose
class SchedulerCacheKeys(enum.Enum): class SchedulerCacheKeys(enum.Enum):
queue = "scheduler:queue" queue = "scheduler:queue"
default_in_memory_ttl = 5 # cache queue in-memory for 5s when redis cache available
class DefaultPriorities(enum.Enum): class DefaultPriorities(enum.Enum):
@ -25,18 +26,24 @@ class FlowItem(BaseModel):
class Scheduler: class Scheduler:
cache: DualCache cache: DualCache
def __init__(self, polling_interval: Optional[float] = None): def __init__(
self,
polling_interval: Optional[float] = None,
redis_cache: Optional[RedisCache] = None,
):
""" """
polling_interval: float or null - frequency of polling queue. Default is 3ms. polling_interval: float or null - frequency of polling queue. Default is 3ms.
""" """
self.queue: list = [] self.queue: list = []
self.cache = DualCache() default_in_memory_ttl: Optional[float] = None
if redis_cache is not None:
# if redis-cache available frequently poll that instead of using in-memory.
default_in_memory_ttl = SchedulerCacheKeys.default_in_memory_ttl.value
self.cache = DualCache(
redis_cache=redis_cache, default_in_memory_ttl=default_in_memory_ttl
)
self.polling_interval = polling_interval or 0.03 # default to 3ms self.polling_interval = polling_interval or 0.03 # default to 3ms
def update_variables(self, cache: Optional[DualCache] = None):
if cache is not None:
self.cache = cache
async def add_request(self, request: FlowItem): async def add_request(self, request: FlowItem):
# We use the priority directly, as lower values indicate higher priority # We use the priority directly, as lower values indicate higher priority
# get the queue # get the queue

View file

@ -198,7 +198,11 @@ async def test_aarun_thread_litellm(sync_mode, provider, is_streaming):
) )
assert isinstance(messages.data[0], Message) assert isinstance(messages.data[0], Message)
else: else:
pytest.fail("An unexpected error occurred when running the thread") pytest.fail(
"An unexpected error occurred when running the thread, {}".format(
run
)
)
else: else:
added_message = await litellm.a_add_message(**data) added_message = await litellm.a_add_message(**data)
@ -226,4 +230,8 @@ async def test_aarun_thread_litellm(sync_mode, provider, is_streaming):
) )
assert isinstance(messages.data[0], Message) assert isinstance(messages.data[0], Message)
else: else:
pytest.fail("An unexpected error occurred when running the thread") pytest.fail(
"An unexpected error occurred when running the thread, {}".format(
run
)
)

View file

@ -2169,6 +2169,7 @@ def test_completion_azure_key_completion_arg():
logprobs=True, logprobs=True,
max_tokens=10, max_tokens=10,
) )
print(f"response: {response}") print(f"response: {response}")
print("Hidden Params", response._hidden_params) print("Hidden Params", response._hidden_params)
@ -2544,6 +2545,8 @@ def test_replicate_custom_prompt_dict():
"content": "what is yc write 1 paragraph", "content": "what is yc write 1 paragraph",
} }
], ],
mock_response="Hello world",
mock_response="hello world",
repetition_penalty=0.1, repetition_penalty=0.1,
num_retries=3, num_retries=3,
) )

View file

@ -76,7 +76,7 @@ def test_image_generation_azure_dall_e_3():
) )
print(f"response: {response}") print(f"response: {response}")
assert len(response.data) > 0 assert len(response.data) > 0
except litellm.RateLimitError as e: except litellm.InternalServerError as e:
pass pass
except litellm.ContentPolicyViolationError: except litellm.ContentPolicyViolationError:
pass # OpenAI randomly raises these errors - skip when they occur pass # OpenAI randomly raises these errors - skip when they occur

View file

@ -102,18 +102,18 @@ async def test_get_available_deployments_custom_price():
@pytest.mark.asyncio @pytest.mark.asyncio
async def test_lowest_cost_routing(): async def test_lowest_cost_routing():
""" """
Test if router returns model with the lowest cost Test if router, returns model with the lowest cost
""" """
model_list = [ model_list = [
{ {
"model_name": "gpt-3.5-turbo", "model_name": "gpt-4",
"litellm_params": {"model": "gpt-4"}, "litellm_params": {"model": "gpt-4"},
"model_info": {"id": "openai-gpt-4"}, "model_info": {"id": "openai-gpt-4"},
}, },
{ {
"model_name": "gpt-3.5-turbo", "model_name": "gpt-3.5-turbo",
"litellm_params": {"model": "groq/llama3-8b-8192"}, "litellm_params": {"model": "gpt-3.5-turbo"},
"model_info": {"id": "groq-llama"}, "model_info": {"id": "gpt-3.5-turbo"},
}, },
] ]
@ -127,7 +127,7 @@ async def test_lowest_cost_routing():
print( print(
response._hidden_params["model_id"] response._hidden_params["model_id"]
) # expect groq-llama, since groq/llama has lowest cost ) # expect groq-llama, since groq/llama has lowest cost
assert "groq-llama" == response._hidden_params["model_id"] assert "gpt-3.5-turbo" == response._hidden_params["model_id"]
async def _deploy(lowest_cost_logger, deployment_id, tokens_used, duration): async def _deploy(lowest_cost_logger, deployment_id, tokens_used, duration):

View file

@ -38,6 +38,48 @@ def test_router_sensitive_keys():
assert "special-key" not in str(e) assert "special-key" not in str(e)
def test_router_order():
"""
Asserts for 2 models in a model group, model with order=1 always called first
"""
router = Router(
model_list=[
{
"model_name": "gpt-3.5-turbo",
"litellm_params": {
"model": "gpt-4o",
"api_key": os.getenv("OPENAI_API_KEY"),
"mock_response": "Hello world",
"order": 1,
},
"model_info": {"id": "1"},
},
{
"model_name": "gpt-3.5-turbo",
"litellm_params": {
"model": "gpt-4o",
"api_key": "bad-key",
"mock_response": Exception("this is a bad key"),
"order": 2,
},
"model_info": {"id": "2"},
},
],
num_retries=0,
allowed_fails=0,
enable_pre_call_checks=True,
)
for _ in range(100):
response = router.completion(
model="gpt-3.5-turbo",
messages=[{"role": "user", "content": "Hey, how's it going?"}],
)
assert isinstance(response, litellm.ModelResponse)
assert response._hidden_params["model_id"] == "1"
@pytest.mark.parametrize("num_retries", [None, 2]) @pytest.mark.parametrize("num_retries", [None, 2])
@pytest.mark.parametrize("max_retries", [None, 4]) @pytest.mark.parametrize("max_retries", [None, 4])
def test_router_num_retries_init(num_retries, max_retries): def test_router_num_retries_init(num_retries, max_retries):

View file

@ -186,3 +186,13 @@ def test_load_test_token_counter(model):
total_time = end_time - start_time total_time = end_time - start_time
print("model={}, total test time={}".format(model, total_time)) print("model={}, total test time={}".format(model, total_time))
assert total_time < 10, f"Total encoding time > 10s, {total_time}" assert total_time < 10, f"Total encoding time > 10s, {total_time}"
def test_openai_token_with_image_and_text():
model = "gpt-4o"
full_request = {'model': 'gpt-4o', 'tools': [{'type': 'function', 'function': {'name': 'json', 'parameters': {'type': 'object', 'required': ['clause'], 'properties': {'clause': {'type': 'string'}}}, 'description': 'Respond with a JSON object.'}}], 'logprobs': False, 'messages': [{'role': 'user', 'content': [{'text': '\n Just some long text, long long text, and you know it will be longer than 7 tokens definetly.', 'type': 'text'}]}], 'tool_choice': {'type': 'function', 'function': {'name': 'json'}}, 'exclude_models': [], 'disable_fallback': False, 'exclude_providers': []}
messages = full_request.get("messages", [])
token_count = token_counter(model=model, messages=messages)
print(token_count)
test_openai_token_with_image_and_text()

View file

@ -1374,8 +1374,12 @@ class Logging:
callback_func=callback, callback_func=callback,
) )
except Exception as e: except Exception as e:
traceback.print_exc() verbose_logger.error(
print_verbose( "litellm.Logging.pre_call(): Exception occured - {}".format(
str(e)
)
)
verbose_logger.debug(
f"LiteLLM.LoggingError: [Non-Blocking] Exception occurred while input logging with integrations {traceback.format_exc()}" f"LiteLLM.LoggingError: [Non-Blocking] Exception occurred while input logging with integrations {traceback.format_exc()}"
) )
print_verbose( print_verbose(
@ -4062,6 +4066,7 @@ def openai_token_counter(
for c in value: for c in value:
if c["type"] == "text": if c["type"] == "text":
text += c["text"] text += c["text"]
num_tokens += len(encoding.encode(c["text"], disallowed_special=()))
elif c["type"] == "image_url": elif c["type"] == "image_url":
if isinstance(c["image_url"], dict): if isinstance(c["image_url"], dict):
image_url_dict = c["image_url"] image_url_dict = c["image_url"]
@ -6194,6 +6199,27 @@ def calculate_max_parallel_requests(
return None return None
def _get_order_filtered_deployments(healthy_deployments: List[Dict]) -> List:
min_order = min(
(
deployment["litellm_params"]["order"]
for deployment in healthy_deployments
if "order" in deployment["litellm_params"]
),
default=None,
)
if min_order is not None:
filtered_deployments = [
deployment
for deployment in healthy_deployments
if deployment["litellm_params"].get("order") == min_order
]
return filtered_deployments
return healthy_deployments
def _get_model_region( def _get_model_region(
custom_llm_provider: str, litellm_params: LiteLLM_Params custom_llm_provider: str, litellm_params: LiteLLM_Params
) -> Optional[str]: ) -> Optional[str]:
@ -7336,6 +7362,10 @@ def get_provider_fields(custom_llm_provider: str) -> List[ProviderField]:
if custom_llm_provider == "databricks": if custom_llm_provider == "databricks":
return litellm.DatabricksConfig().get_required_params() return litellm.DatabricksConfig().get_required_params()
elif custom_llm_provider == "ollama":
return litellm.OllamaConfig().get_required_params()
else: else:
return [] return []
@ -9782,8 +9812,7 @@ def exception_type(
elif custom_llm_provider == "azure": elif custom_llm_provider == "azure":
if "Internal server error" in error_str: if "Internal server error" in error_str:
exception_mapping_worked = True exception_mapping_worked = True
raise APIError( raise litellm.InternalServerError(
status_code=500,
message=f"AzureException Internal server error - {original_exception.message}", message=f"AzureException Internal server error - {original_exception.message}",
llm_provider="azure", llm_provider="azure",
model=model, model=model,
@ -10033,6 +10062,8 @@ def get_secret(
): ):
key_management_system = litellm._key_management_system key_management_system = litellm._key_management_system
key_management_settings = litellm._key_management_settings key_management_settings = litellm._key_management_settings
args = locals()
if secret_name.startswith("os.environ/"): if secret_name.startswith("os.environ/"):
secret_name = secret_name.replace("os.environ/", "") secret_name = secret_name.replace("os.environ/", "")
@ -10120,13 +10151,13 @@ def get_secret(
key_manager = "local" key_manager = "local"
if ( if (
key_manager == KeyManagementSystem.AZURE_KEY_VAULT key_manager == KeyManagementSystem.AZURE_KEY_VAULT.value
or type(client).__module__ + "." + type(client).__name__ or type(client).__module__ + "." + type(client).__name__
== "azure.keyvault.secrets._client.SecretClient" == "azure.keyvault.secrets._client.SecretClient"
): # support Azure Secret Client - from azure.keyvault.secrets import SecretClient ): # support Azure Secret Client - from azure.keyvault.secrets import SecretClient
secret = client.get_secret(secret_name).value secret = client.get_secret(secret_name).value
elif ( elif (
key_manager == KeyManagementSystem.GOOGLE_KMS key_manager == KeyManagementSystem.GOOGLE_KMS.value
or client.__class__.__name__ == "KeyManagementServiceClient" or client.__class__.__name__ == "KeyManagementServiceClient"
): ):
encrypted_secret: Any = os.getenv(secret_name) encrypted_secret: Any = os.getenv(secret_name)
@ -10154,6 +10185,25 @@ def get_secret(
secret = response.plaintext.decode( secret = response.plaintext.decode(
"utf-8" "utf-8"
) # assumes the original value was encoded with utf-8 ) # assumes the original value was encoded with utf-8
elif key_manager == KeyManagementSystem.AWS_KMS.value:
"""
Only check the tokens which start with 'aws_kms/'. This prevents latency impact caused by checking all keys.
"""
encrypted_value = os.getenv(secret_name, None)
if encrypted_value is None:
raise Exception("encrypted value for AWS KMS cannot be None.")
# Decode the base64 encoded ciphertext
ciphertext_blob = base64.b64decode(encrypted_value)
# Set up the parameters for the decrypt call
params = {"CiphertextBlob": ciphertext_blob}
# Perform the decryption
response = client.decrypt(**params)
# Extract and decode the plaintext
plaintext = response["Plaintext"]
secret = plaintext.decode("utf-8")
elif key_manager == KeyManagementSystem.AWS_SECRET_MANAGER.value: elif key_manager == KeyManagementSystem.AWS_SECRET_MANAGER.value:
try: try:
get_secret_value_response = client.get_secret_value( get_secret_value_response = client.get_secret_value(
@ -10174,10 +10224,14 @@ def get_secret(
for k, v in secret_dict.items(): for k, v in secret_dict.items():
secret = v secret = v
print_verbose(f"secret: {secret}") print_verbose(f"secret: {secret}")
elif key_manager == "local":
secret = os.getenv(secret_name)
else: # assume the default is infisicial client else: # assume the default is infisicial client
secret = client.get_secret(secret_name).secret_value secret = client.get_secret(secret_name).secret_value
except Exception as e: # check if it's in os.environ except Exception as e: # check if it's in os.environ
print_verbose(f"An exception occurred - {str(e)}") verbose_logger.error(
f"An exception occurred - {str(e)}\n\n{traceback.format_exc()}"
)
secret = os.getenv(secret_name) secret = os.getenv(secret_name)
try: try:
secret_value_as_bool = ast.literal_eval(secret) secret_value_as_bool = ast.literal_eval(secret)
@ -10511,7 +10565,12 @@ class CustomStreamWrapper:
"finish_reason": finish_reason, "finish_reason": finish_reason,
} }
except Exception as e: except Exception as e:
traceback.print_exc() verbose_logger.error(
"litellm.CustomStreamWrapper.handle_predibase_chunk(): Exception occured - {}".format(
str(e)
)
)
verbose_logger.debug(traceback.format_exc())
raise e raise e
def handle_huggingface_chunk(self, chunk): def handle_huggingface_chunk(self, chunk):
@ -10555,7 +10614,12 @@ class CustomStreamWrapper:
"finish_reason": finish_reason, "finish_reason": finish_reason,
} }
except Exception as e: except Exception as e:
traceback.print_exc() verbose_logger.error(
"litellm.CustomStreamWrapper.handle_huggingface_chunk(): Exception occured - {}".format(
str(e)
)
)
verbose_logger.debug(traceback.format_exc())
raise e raise e
def handle_ai21_chunk(self, chunk): # fake streaming def handle_ai21_chunk(self, chunk): # fake streaming
@ -10790,7 +10854,12 @@ class CustomStreamWrapper:
"usage": usage, "usage": usage,
} }
except Exception as e: except Exception as e:
traceback.print_exc() verbose_logger.error(
"litellm.CustomStreamWrapper.handle_openai_chat_completion_chunk(): Exception occured - {}".format(
str(e)
)
)
verbose_logger.debug(traceback.format_exc())
raise e raise e
def handle_azure_text_completion_chunk(self, chunk): def handle_azure_text_completion_chunk(self, chunk):
@ -10871,7 +10940,12 @@ class CustomStreamWrapper:
else: else:
return "" return ""
except: except:
traceback.print_exc() verbose_logger.error(
"litellm.CustomStreamWrapper.handle_baseten_chunk(): Exception occured - {}".format(
str(e)
)
)
verbose_logger.debug(traceback.format_exc())
return "" return ""
def handle_cloudlfare_stream(self, chunk): def handle_cloudlfare_stream(self, chunk):
@ -11070,7 +11144,12 @@ class CustomStreamWrapper:
"is_finished": True, "is_finished": True,
} }
except: except:
traceback.print_exc() verbose_logger.error(
"litellm.CustomStreamWrapper.handle_clarifai_chunk(): Exception occured - {}".format(
str(e)
)
)
verbose_logger.debug(traceback.format_exc())
return "" return ""
def model_response_creator(self): def model_response_creator(self):
@ -11557,7 +11636,12 @@ class CustomStreamWrapper:
tool["type"] = "function" tool["type"] = "function"
model_response.choices[0].delta = Delta(**_json_delta) model_response.choices[0].delta = Delta(**_json_delta)
except Exception as e: except Exception as e:
traceback.print_exc() verbose_logger.error(
"litellm.CustomStreamWrapper.chunk_creator(): Exception occured - {}".format(
str(e)
)
)
verbose_logger.debug(traceback.format_exc())
model_response.choices[0].delta = Delta() model_response.choices[0].delta = Delta()
else: else:
try: try:

14
poetry.lock generated
View file

@ -1,4 +1,4 @@
# This file is automatically @generated by Poetry 1.7.1 and should not be changed by hand. # This file is automatically @generated by Poetry 1.8.3 and should not be changed by hand.
[[package]] [[package]]
name = "aiohttp" name = "aiohttp"
@ -2114,6 +2114,7 @@ files = [
{file = "PyYAML-6.0.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:69b023b2b4daa7548bcfbd4aa3da05b3a74b772db9e23b982788168117739938"}, {file = "PyYAML-6.0.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:69b023b2b4daa7548bcfbd4aa3da05b3a74b772db9e23b982788168117739938"},
{file = "PyYAML-6.0.1-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:81e0b275a9ecc9c0c0c07b4b90ba548307583c125f54d5b6946cfee6360c733d"}, {file = "PyYAML-6.0.1-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:81e0b275a9ecc9c0c0c07b4b90ba548307583c125f54d5b6946cfee6360c733d"},
{file = "PyYAML-6.0.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ba336e390cd8e4d1739f42dfe9bb83a3cc2e80f567d8805e11b46f4a943f5515"}, {file = "PyYAML-6.0.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ba336e390cd8e4d1739f42dfe9bb83a3cc2e80f567d8805e11b46f4a943f5515"},
{file = "PyYAML-6.0.1-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:326c013efe8048858a6d312ddd31d56e468118ad4cdeda36c719bf5bb6192290"},
{file = "PyYAML-6.0.1-cp310-cp310-win32.whl", hash = "sha256:bd4af7373a854424dabd882decdc5579653d7868b8fb26dc7d0e99f823aa5924"}, {file = "PyYAML-6.0.1-cp310-cp310-win32.whl", hash = "sha256:bd4af7373a854424dabd882decdc5579653d7868b8fb26dc7d0e99f823aa5924"},
{file = "PyYAML-6.0.1-cp310-cp310-win_amd64.whl", hash = "sha256:fd1592b3fdf65fff2ad0004b5e363300ef59ced41c2e6b3a99d4089fa8c5435d"}, {file = "PyYAML-6.0.1-cp310-cp310-win_amd64.whl", hash = "sha256:fd1592b3fdf65fff2ad0004b5e363300ef59ced41c2e6b3a99d4089fa8c5435d"},
{file = "PyYAML-6.0.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:6965a7bc3cf88e5a1c3bd2e0b5c22f8d677dc88a455344035f03399034eb3007"}, {file = "PyYAML-6.0.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:6965a7bc3cf88e5a1c3bd2e0b5c22f8d677dc88a455344035f03399034eb3007"},
@ -2121,8 +2122,15 @@ files = [
{file = "PyYAML-6.0.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:42f8152b8dbc4fe7d96729ec2b99c7097d656dc1213a3229ca5383f973a5ed6d"}, {file = "PyYAML-6.0.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:42f8152b8dbc4fe7d96729ec2b99c7097d656dc1213a3229ca5383f973a5ed6d"},
{file = "PyYAML-6.0.1-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:062582fca9fabdd2c8b54a3ef1c978d786e0f6b3a1510e0ac93ef59e0ddae2bc"}, {file = "PyYAML-6.0.1-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:062582fca9fabdd2c8b54a3ef1c978d786e0f6b3a1510e0ac93ef59e0ddae2bc"},
{file = "PyYAML-6.0.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d2b04aac4d386b172d5b9692e2d2da8de7bfb6c387fa4f801fbf6fb2e6ba4673"}, {file = "PyYAML-6.0.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d2b04aac4d386b172d5b9692e2d2da8de7bfb6c387fa4f801fbf6fb2e6ba4673"},
{file = "PyYAML-6.0.1-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:e7d73685e87afe9f3b36c799222440d6cf362062f78be1013661b00c5c6f678b"},
{file = "PyYAML-6.0.1-cp311-cp311-win32.whl", hash = "sha256:1635fd110e8d85d55237ab316b5b011de701ea0f29d07611174a1b42f1444741"}, {file = "PyYAML-6.0.1-cp311-cp311-win32.whl", hash = "sha256:1635fd110e8d85d55237ab316b5b011de701ea0f29d07611174a1b42f1444741"},
{file = "PyYAML-6.0.1-cp311-cp311-win_amd64.whl", hash = "sha256:bf07ee2fef7014951eeb99f56f39c9bb4af143d8aa3c21b1677805985307da34"}, {file = "PyYAML-6.0.1-cp311-cp311-win_amd64.whl", hash = "sha256:bf07ee2fef7014951eeb99f56f39c9bb4af143d8aa3c21b1677805985307da34"},
{file = "PyYAML-6.0.1-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:855fb52b0dc35af121542a76b9a84f8d1cd886ea97c84703eaa6d88e37a2ad28"},
{file = "PyYAML-6.0.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:40df9b996c2b73138957fe23a16a4f0ba614f4c0efce1e9406a184b6d07fa3a9"},
{file = "PyYAML-6.0.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6c22bec3fbe2524cde73d7ada88f6566758a8f7227bfbf93a408a9d86bcc12a0"},
{file = "PyYAML-6.0.1-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:8d4e9c88387b0f5c7d5f281e55304de64cf7f9c0021a3525bd3b1c542da3b0e4"},
{file = "PyYAML-6.0.1-cp312-cp312-win32.whl", hash = "sha256:d483d2cdf104e7c9fa60c544d92981f12ad66a457afae824d146093b8c294c54"},
{file = "PyYAML-6.0.1-cp312-cp312-win_amd64.whl", hash = "sha256:0d3304d8c0adc42be59c5f8a4d9e3d7379e6955ad754aa9d6ab7a398b59dd1df"},
{file = "PyYAML-6.0.1-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:50550eb667afee136e9a77d6dc71ae76a44df8b3e51e41b77f6de2932bfe0f47"}, {file = "PyYAML-6.0.1-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:50550eb667afee136e9a77d6dc71ae76a44df8b3e51e41b77f6de2932bfe0f47"},
{file = "PyYAML-6.0.1-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1fe35611261b29bd1de0070f0b2f47cb6ff71fa6595c077e42bd0c419fa27b98"}, {file = "PyYAML-6.0.1-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1fe35611261b29bd1de0070f0b2f47cb6ff71fa6595c077e42bd0c419fa27b98"},
{file = "PyYAML-6.0.1-cp36-cp36m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:704219a11b772aea0d8ecd7058d0082713c3562b4e271b849ad7dc4a5c90c13c"}, {file = "PyYAML-6.0.1-cp36-cp36m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:704219a11b772aea0d8ecd7058d0082713c3562b4e271b849ad7dc4a5c90c13c"},
@ -2139,6 +2147,7 @@ files = [
{file = "PyYAML-6.0.1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a0cd17c15d3bb3fa06978b4e8958dcdc6e0174ccea823003a106c7d4d7899ac5"}, {file = "PyYAML-6.0.1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a0cd17c15d3bb3fa06978b4e8958dcdc6e0174ccea823003a106c7d4d7899ac5"},
{file = "PyYAML-6.0.1-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:28c119d996beec18c05208a8bd78cbe4007878c6dd15091efb73a30e90539696"}, {file = "PyYAML-6.0.1-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:28c119d996beec18c05208a8bd78cbe4007878c6dd15091efb73a30e90539696"},
{file = "PyYAML-6.0.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7e07cbde391ba96ab58e532ff4803f79c4129397514e1413a7dc761ccd755735"}, {file = "PyYAML-6.0.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7e07cbde391ba96ab58e532ff4803f79c4129397514e1413a7dc761ccd755735"},
{file = "PyYAML-6.0.1-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:49a183be227561de579b4a36efbb21b3eab9651dd81b1858589f796549873dd6"},
{file = "PyYAML-6.0.1-cp38-cp38-win32.whl", hash = "sha256:184c5108a2aca3c5b3d3bf9395d50893a7ab82a38004c8f61c258d4428e80206"}, {file = "PyYAML-6.0.1-cp38-cp38-win32.whl", hash = "sha256:184c5108a2aca3c5b3d3bf9395d50893a7ab82a38004c8f61c258d4428e80206"},
{file = "PyYAML-6.0.1-cp38-cp38-win_amd64.whl", hash = "sha256:1e2722cc9fbb45d9b87631ac70924c11d3a401b2d7f410cc0e3bbf249f2dca62"}, {file = "PyYAML-6.0.1-cp38-cp38-win_amd64.whl", hash = "sha256:1e2722cc9fbb45d9b87631ac70924c11d3a401b2d7f410cc0e3bbf249f2dca62"},
{file = "PyYAML-6.0.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:9eb6caa9a297fc2c2fb8862bc5370d0303ddba53ba97e71f08023b6cd73d16a8"}, {file = "PyYAML-6.0.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:9eb6caa9a297fc2c2fb8862bc5370d0303ddba53ba97e71f08023b6cd73d16a8"},
@ -2146,6 +2155,7 @@ files = [
{file = "PyYAML-6.0.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5773183b6446b2c99bb77e77595dd486303b4faab2b086e7b17bc6bef28865f6"}, {file = "PyYAML-6.0.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5773183b6446b2c99bb77e77595dd486303b4faab2b086e7b17bc6bef28865f6"},
{file = "PyYAML-6.0.1-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:b786eecbdf8499b9ca1d697215862083bd6d2a99965554781d0d8d1ad31e13a0"}, {file = "PyYAML-6.0.1-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:b786eecbdf8499b9ca1d697215862083bd6d2a99965554781d0d8d1ad31e13a0"},
{file = "PyYAML-6.0.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bc1bf2925a1ecd43da378f4db9e4f799775d6367bdb94671027b73b393a7c42c"}, {file = "PyYAML-6.0.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bc1bf2925a1ecd43da378f4db9e4f799775d6367bdb94671027b73b393a7c42c"},
{file = "PyYAML-6.0.1-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:04ac92ad1925b2cff1db0cfebffb6ffc43457495c9b3c39d3fcae417d7125dc5"},
{file = "PyYAML-6.0.1-cp39-cp39-win32.whl", hash = "sha256:faca3bdcf85b2fc05d06ff3fbc1f83e1391b3e724afa3feba7d13eeab355484c"}, {file = "PyYAML-6.0.1-cp39-cp39-win32.whl", hash = "sha256:faca3bdcf85b2fc05d06ff3fbc1f83e1391b3e724afa3feba7d13eeab355484c"},
{file = "PyYAML-6.0.1-cp39-cp39-win_amd64.whl", hash = "sha256:510c9deebc5c0225e8c96813043e62b680ba2f9c50a08d3724c7f28a747d1486"}, {file = "PyYAML-6.0.1-cp39-cp39-win_amd64.whl", hash = "sha256:510c9deebc5c0225e8c96813043e62b680ba2f9c50a08d3724c7f28a747d1486"},
{file = "PyYAML-6.0.1.tar.gz", hash = "sha256:bfdf460b1736c775f2ba9f6a92bca30bc2095067b8a9d77876d1fad6cc3b4a43"}, {file = "PyYAML-6.0.1.tar.gz", hash = "sha256:bfdf460b1736c775f2ba9f6a92bca30bc2095067b8a9d77876d1fad6cc3b4a43"},
@ -3140,4 +3150,4 @@ proxy = ["PyJWT", "apscheduler", "backoff", "cryptography", "fastapi", "fastapi-
[metadata] [metadata]
lock-version = "2.0" lock-version = "2.0"
python-versions = ">=3.8.1,<4.0, !=3.9.7" python-versions = ">=3.8.1,<4.0, !=3.9.7"
content-hash = "a54d969a1a707413e7cd3ce869d14ef73dd41bb9d36ebf0fb878d9e929bc15b3" content-hash = "6a37992b63b11d254f5f40687bd96898b1d9515728f663f30dcc81c4ef8df7b7"

View file

@ -1,6 +1,6 @@
[tool.poetry] [tool.poetry]
name = "litellm" name = "litellm"
version = "1.40.3" version = "1.40.5"
description = "Library to easily interface with LLM API providers" description = "Library to easily interface with LLM API providers"
authors = ["BerriAI"] authors = ["BerriAI"]
license = "MIT" license = "MIT"
@ -62,7 +62,8 @@ extra_proxy = [
"azure-identity", "azure-identity",
"azure-keyvault-secrets", "azure-keyvault-secrets",
"google-cloud-kms", "google-cloud-kms",
"resend" "resend",
"pynacl"
] ]
[tool.poetry.scripts] [tool.poetry.scripts]
@ -79,7 +80,7 @@ requires = ["poetry-core", "wheel"]
build-backend = "poetry.core.masonry.api" build-backend = "poetry.core.masonry.api"
[tool.commitizen] [tool.commitizen]
version = "1.40.3" version = "1.40.5"
version_files = [ version_files = [
"pyproject.toml:^version" "pyproject.toml:^version"
] ]

View file

@ -243,4 +243,16 @@ model LiteLLM_InvitationLink {
liteLLM_user_table_user LiteLLM_UserTable @relation("UserId", fields: [user_id], references: [user_id]) liteLLM_user_table_user LiteLLM_UserTable @relation("UserId", fields: [user_id], references: [user_id])
liteLLM_user_table_created LiteLLM_UserTable @relation("CreatedBy", fields: [created_by], references: [user_id]) liteLLM_user_table_created LiteLLM_UserTable @relation("CreatedBy", fields: [created_by], references: [user_id])
liteLLM_user_table_updated LiteLLM_UserTable @relation("UpdatedBy", fields: [updated_by], references: [user_id]) liteLLM_user_table_updated LiteLLM_UserTable @relation("UpdatedBy", fields: [updated_by], references: [user_id])
}
model LiteLLM_AuditLog {
id String @id @default(uuid())
updated_at DateTime @default(now())
changed_by String // user or system that performed the action
action String // create, update, delete
table_name String // on of LitellmTableNames.TEAM_TABLE_NAME, LitellmTableNames.USER_TABLE_NAME, LitellmTableNames.PROXY_MODEL_TABLE_NAME,
object_id String // id of the object being audited. This can be the key id, team id, user id, model id
before_value Json? // value of the row
updated_values Json? // value of the row after change
} }

View file

@ -145,6 +145,7 @@ enum Providers {
OpenAI_Compatible = "OpenAI-Compatible Endpoints (Groq, Together AI, Mistral AI, etc.)", OpenAI_Compatible = "OpenAI-Compatible Endpoints (Groq, Together AI, Mistral AI, etc.)",
Vertex_AI = "Vertex AI (Anthropic, Gemini, etc.)", Vertex_AI = "Vertex AI (Anthropic, Gemini, etc.)",
Databricks = "Databricks", Databricks = "Databricks",
Ollama = "Ollama",
} }
const provider_map: Record<string, string> = { const provider_map: Record<string, string> = {
@ -156,6 +157,7 @@ const provider_map: Record<string, string> = {
OpenAI_Compatible: "openai", OpenAI_Compatible: "openai",
Vertex_AI: "vertex_ai", Vertex_AI: "vertex_ai",
Databricks: "databricks", Databricks: "databricks",
Ollama: "ollama",
}; };
const retry_policy_map: Record<string, string> = { const retry_policy_map: Record<string, string> = {
@ -1747,6 +1749,7 @@ const ModelDashboard: React.FC<ModelDashboardProps> = ({
)} )}
{selectedProvider != Providers.Bedrock && {selectedProvider != Providers.Bedrock &&
selectedProvider != Providers.Vertex_AI && selectedProvider != Providers.Vertex_AI &&
selectedProvider != Providers.Ollama &&
(dynamicProviderForm === undefined || (dynamicProviderForm === undefined ||
dynamicProviderForm.fields.length == 0) && ( dynamicProviderForm.fields.length == 0) && (
<Form.Item <Form.Item