Merge branch 'main' into litellm_bedrock_converse_api

This commit is contained in:
Krish Dholakia 2024-06-07 08:49:52 -07:00 committed by GitHub
commit 26993c067e
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
82 changed files with 2540 additions and 1147 deletions

View file

@ -344,4 +344,4 @@ workflows:
filters:
branches:
only:
- main
- main

View file

@ -225,37 +225,37 @@ curl 'http://0.0.0.0:4000/key/generate' \
## Supported Providers ([Docs](https://docs.litellm.ai/docs/providers))
| Provider | [Completion](https://docs.litellm.ai/docs/#basic-usage) | [Streaming](https://docs.litellm.ai/docs/completion/stream#streaming-responses) | [Async Completion](https://docs.litellm.ai/docs/completion/stream#async-completion) | [Async Streaming](https://docs.litellm.ai/docs/completion/stream#async-streaming) | [Async Embedding](https://docs.litellm.ai/docs/embedding/supported_embedding) | [Async Image Generation](https://docs.litellm.ai/docs/image_generation) |
| ----------------------------------------------------------------------------------- | ------------------------------------------------------- | ------------------------------------------------------------------------------- | ----------------------------------------------------------------------------------- | --------------------------------------------------------------------------------- | ----------------------------------------------------------------------------- | ----------------------------------------------------------------------- |
| [openai](https://docs.litellm.ai/docs/providers/openai) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ |
| [azure](https://docs.litellm.ai/docs/providers/azure) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ |
| [aws - sagemaker](https://docs.litellm.ai/docs/providers/aws_sagemaker) | ✅ | ✅ | ✅ | ✅ | ✅ |
| [aws - bedrock](https://docs.litellm.ai/docs/providers/bedrock) | ✅ | ✅ | ✅ | ✅ | ✅ |
| [google - vertex_ai](https://docs.litellm.ai/docs/providers/vertex) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅
| [google - palm](https://docs.litellm.ai/docs/providers/palm) | ✅ | ✅ | ✅ | ✅ |
| [google AI Studio - gemini](https://docs.litellm.ai/docs/providers/gemini) | ✅ | ✅ | ✅ | ✅ | |
| [mistral ai api](https://docs.litellm.ai/docs/providers/mistral) | ✅ | ✅ | ✅ | ✅ | ✅ |
| [cloudflare AI Workers](https://docs.litellm.ai/docs/providers/cloudflare_workers) | ✅ | ✅ | ✅ | ✅ |
| [cohere](https://docs.litellm.ai/docs/providers/cohere) | ✅ | ✅ | ✅ | ✅ | ✅ |
| [anthropic](https://docs.litellm.ai/docs/providers/anthropic) | ✅ | ✅ | ✅ | ✅ |
| [huggingface](https://docs.litellm.ai/docs/providers/huggingface) | ✅ | ✅ | ✅ | ✅ | ✅ |
| [replicate](https://docs.litellm.ai/docs/providers/replicate) | ✅ | ✅ | ✅ | ✅ |
| [together_ai](https://docs.litellm.ai/docs/providers/togetherai) | ✅ | ✅ | ✅ | ✅ |
| [openrouter](https://docs.litellm.ai/docs/providers/openrouter) | ✅ | ✅ | ✅ | ✅ |
| [ai21](https://docs.litellm.ai/docs/providers/ai21) | ✅ | ✅ | ✅ | ✅ |
| [baseten](https://docs.litellm.ai/docs/providers/baseten) | ✅ | ✅ | ✅ | ✅ |
| [vllm](https://docs.litellm.ai/docs/providers/vllm) | ✅ | ✅ | ✅ | ✅ |
| [nlp_cloud](https://docs.litellm.ai/docs/providers/nlp_cloud) | ✅ | ✅ | ✅ | ✅ |
| [aleph alpha](https://docs.litellm.ai/docs/providers/aleph_alpha) | ✅ | ✅ | ✅ | ✅ |
| [petals](https://docs.litellm.ai/docs/providers/petals) | ✅ | ✅ | ✅ | ✅ |
| [ollama](https://docs.litellm.ai/docs/providers/ollama) | ✅ | ✅ | ✅ | ✅ | ✅ |
| [deepinfra](https://docs.litellm.ai/docs/providers/deepinfra) | ✅ | ✅ | ✅ | ✅ |
| [perplexity-ai](https://docs.litellm.ai/docs/providers/perplexity) | ✅ | ✅ | ✅ | ✅ |
| [Groq AI](https://docs.litellm.ai/docs/providers/groq) | ✅ | ✅ | ✅ | ✅ |
| [Deepseek](https://docs.litellm.ai/docs/providers/deepseek) | ✅ | ✅ | ✅ | ✅ |
| [anyscale](https://docs.litellm.ai/docs/providers/anyscale) | ✅ | ✅ | ✅ | ✅ |
| [IBM - watsonx.ai](https://docs.litellm.ai/docs/providers/watsonx) | ✅ | ✅ | ✅ | ✅ | ✅
| [voyage ai](https://docs.litellm.ai/docs/providers/voyage) | | | | | ✅ |
| [xinference [Xorbits Inference]](https://docs.litellm.ai/docs/providers/xinference) | | | | | ✅ |
|-------------------------------------------------------------------------------------|---------------------------------------------------------|---------------------------------------------------------------------------------|-------------------------------------------------------------------------------------|-----------------------------------------------------------------------------------|-------------------------------------------------------------------------------|-------------------------------------------------------------------------|
| [openai](https://docs.litellm.ai/docs/providers/openai) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ |
| [azure](https://docs.litellm.ai/docs/providers/azure) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ |
| [aws - sagemaker](https://docs.litellm.ai/docs/providers/aws_sagemaker) | ✅ | ✅ | ✅ | ✅ | ✅ | |
| [aws - bedrock](https://docs.litellm.ai/docs/providers/bedrock) | ✅ | ✅ | ✅ | ✅ | ✅ | |
| [google - vertex_ai](https://docs.litellm.ai/docs/providers/vertex) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ |
| [google - palm](https://docs.litellm.ai/docs/providers/palm) | ✅ | ✅ | ✅ | ✅ | | |
| [google AI Studio - gemini](https://docs.litellm.ai/docs/providers/gemini) | ✅ | ✅ | ✅ | | | |
| [mistral ai api](https://docs.litellm.ai/docs/providers/mistral) | ✅ | ✅ | ✅ | ✅ | ✅ | |
| [cloudflare AI Workers](https://docs.litellm.ai/docs/providers/cloudflare_workers) | ✅ | ✅ | ✅ | ✅ | | |
| [cohere](https://docs.litellm.ai/docs/providers/cohere) | ✅ | ✅ | ✅ | ✅ | ✅ | |
| [anthropic](https://docs.litellm.ai/docs/providers/anthropic) | ✅ | ✅ | ✅ | ✅ | | |
| [huggingface](https://docs.litellm.ai/docs/providers/huggingface) | ✅ | ✅ | ✅ | ✅ | ✅ | |
| [replicate](https://docs.litellm.ai/docs/providers/replicate) | ✅ | ✅ | ✅ | ✅ | | |
| [together_ai](https://docs.litellm.ai/docs/providers/togetherai) | ✅ | ✅ | ✅ | ✅ | | |
| [openrouter](https://docs.litellm.ai/docs/providers/openrouter) | ✅ | ✅ | ✅ | ✅ | | |
| [ai21](https://docs.litellm.ai/docs/providers/ai21) | ✅ | ✅ | ✅ | ✅ | | |
| [baseten](https://docs.litellm.ai/docs/providers/baseten) | ✅ | ✅ | ✅ | ✅ | | |
| [vllm](https://docs.litellm.ai/docs/providers/vllm) | ✅ | ✅ | ✅ | ✅ | | |
| [nlp_cloud](https://docs.litellm.ai/docs/providers/nlp_cloud) | ✅ | ✅ | ✅ | ✅ | | |
| [aleph alpha](https://docs.litellm.ai/docs/providers/aleph_alpha) | ✅ | ✅ | ✅ | ✅ | | |
| [petals](https://docs.litellm.ai/docs/providers/petals) | ✅ | ✅ | ✅ | ✅ | | |
| [ollama](https://docs.litellm.ai/docs/providers/ollama) | ✅ | ✅ | ✅ | ✅ | ✅ | |
| [deepinfra](https://docs.litellm.ai/docs/providers/deepinfra) | ✅ | ✅ | ✅ | ✅ | | |
| [perplexity-ai](https://docs.litellm.ai/docs/providers/perplexity) | ✅ | ✅ | ✅ | ✅ | | |
| [Groq AI](https://docs.litellm.ai/docs/providers/groq) | ✅ | ✅ | ✅ | ✅ | | |
| [Deepseek](https://docs.litellm.ai/docs/providers/deepseek) | ✅ | ✅ | ✅ | ✅ | | |
| [anyscale](https://docs.litellm.ai/docs/providers/anyscale) | ✅ | ✅ | ✅ | ✅ | | |
| [IBM - watsonx.ai](https://docs.litellm.ai/docs/providers/watsonx) | ✅ | ✅ | ✅ | ✅ | ✅ | |
| [voyage ai](https://docs.litellm.ai/docs/providers/voyage) | | | | | ✅ | |
| [xinference [Xorbits Inference]](https://docs.litellm.ai/docs/providers/xinference) | | | | | ✅ | |
[**Read the Docs**](https://docs.litellm.ai/docs/)

View file

@ -10,6 +10,7 @@ For companies that need SSO, user management and professional support for LiteLL
This covers:
- ✅ **Features under the [LiteLLM Commercial License (Content Mod, Custom Tags, etc.)](https://docs.litellm.ai/docs/proxy/enterprise)**
- ✅ [**Secure UI access with Single Sign-On**](../docs/proxy/ui.md#setup-ssoauth-for-ui)
- ✅ [**Audit Logs with retention policy**](../docs/proxy/enterprise.md#audit-logs)
- ✅ [**JWT-Auth**](../docs/proxy/token_auth.md)
- ✅ [**Prompt Injection Detection**](#prompt-injection-detection-lakeraai)
- ✅ [**Invite Team Members to access `/spend` Routes**](../docs/proxy/cost_tracking#allowing-non-proxy-admins-to-access-spend-endpoints)

View file

@ -38,7 +38,7 @@ class MyCustomHandler(CustomLogger):
print(f"On Async Success")
async def async_log_failure_event(self, kwargs, response_obj, start_time, end_time):
print(f"On Async Success")
print(f"On Async Failure")
customHandler = MyCustomHandler()

View file

@ -144,6 +144,26 @@ print(response)
```
You can also pass `metadata` as part of the request header with a `langfuse_*` prefix:
```shell
curl --location 'http://0.0.0.0:4000/chat/completions' \
--header 'Content-Type: application/json' \
--header 'langfuse_trace_id: trace-id22' \
--header 'langfuse_trace_user_id: user-id2' \
--header 'langfuse_trace_metadata: {"key":"value"}' \
--data '{
"model": "gpt-3.5-turbo",
"messages": [
{
"role": "user",
"content": "what llm are you"
}
]
}'
```
### Trace & Generation Parameters
#### Trace Specific Parameters

View file

@ -0,0 +1,3 @@
llmcord.py lets you and your friends chat with LLMs directly in your Discord server. It works with practically any LLM, remote or locally hosted.
Github: https://github.com/jakobdylanc/discord-llm-chatbot

View file

@ -46,13 +46,13 @@ for chunk in response:
## Supported Models - ALL Groq Models Supported!
We support ALL Groq models, just set `groq/` as a prefix when sending completion requests
| Model Name | Function Call |
|--------------------------|------------------------------------------------------------------------------------------------------------------------------------------------------------------|
| llama3-8b-8192 | `completion(model="groq/llama3-8b-8192", messages)` |
| llama3-70b-8192 | `completion(model="groq/llama3-70b-8192", messages)` |
| llama2-70b-4096 | `completion(model="groq/llama2-70b-4096", messages)` |
| Model Name | Function Call |
|--------------------|---------------------------------------------------------|
| llama3-8b-8192 | `completion(model="groq/llama3-8b-8192", messages)` |
| llama3-70b-8192 | `completion(model="groq/llama3-70b-8192", messages)` |
| llama2-70b-4096 | `completion(model="groq/llama2-70b-4096", messages)` |
| mixtral-8x7b-32768 | `completion(model="groq/mixtral-8x7b-32768", messages)` |
| gemma-7b-it | `completion(model="groq/gemma-7b-it", messages)` |
| gemma-7b-it | `completion(model="groq/gemma-7b-it", messages)` |
## Groq - Tool / Function Calling Example

View file

@ -26,52 +26,52 @@ Example TogetherAI Usage - Note: liteLLM supports all models deployed on Togethe
### Llama LLMs - Chat
| Model Name | Function Call | Required OS Variables |
|-----------------------------------|------------------------------------------------------------------------|---------------------------------|
| togethercomputer/llama-2-70b-chat | `completion('together_ai/togethercomputer/llama-2-70b-chat', messages)` | `os.environ['TOGETHERAI_API_KEY']` |
| Model Name | Function Call | Required OS Variables |
|-----------------------------------|-------------------------------------------------------------------------|------------------------------------|
| togethercomputer/llama-2-70b-chat | `completion('together_ai/togethercomputer/llama-2-70b-chat', messages)` | `os.environ['TOGETHERAI_API_KEY']` |
### Llama LLMs - Language / Instruct
| Model Name | Function Call | Required OS Variables |
|-----------------------------------|------------------------------------------------------------------------|---------------------------------|
| togethercomputer/llama-2-70b | `completion('together_ai/togethercomputer/llama-2-70b', messages)` | `os.environ['TOGETHERAI_API_KEY']` |
| togethercomputer/LLaMA-2-7B-32K | `completion('together_ai/togethercomputer/LLaMA-2-7B-32K', messages)` | `os.environ['TOGETHERAI_API_KEY']` |
| togethercomputer/Llama-2-7B-32K-Instruct | `completion('together_ai/togethercomputer/Llama-2-7B-32K-Instruct', messages)` | `os.environ['TOGETHERAI_API_KEY']` |
| togethercomputer/llama-2-7b | `completion('together_ai/togethercomputer/llama-2-7b', messages)` | `os.environ['TOGETHERAI_API_KEY']` |
| Model Name | Function Call | Required OS Variables |
|------------------------------------------|--------------------------------------------------------------------------------|------------------------------------|
| togethercomputer/llama-2-70b | `completion('together_ai/togethercomputer/llama-2-70b', messages)` | `os.environ['TOGETHERAI_API_KEY']` |
| togethercomputer/LLaMA-2-7B-32K | `completion('together_ai/togethercomputer/LLaMA-2-7B-32K', messages)` | `os.environ['TOGETHERAI_API_KEY']` |
| togethercomputer/Llama-2-7B-32K-Instruct | `completion('together_ai/togethercomputer/Llama-2-7B-32K-Instruct', messages)` | `os.environ['TOGETHERAI_API_KEY']` |
| togethercomputer/llama-2-7b | `completion('together_ai/togethercomputer/llama-2-7b', messages)` | `os.environ['TOGETHERAI_API_KEY']` |
### Falcon LLMs
| Model Name | Function Call | Required OS Variables |
|-----------------------------------|------------------------------------------------------------------------|---------------------------------|
| togethercomputer/falcon-40b-instruct | `completion('together_ai/togethercomputer/falcon-40b-instruct', messages)` | `os.environ['TOGETHERAI_API_KEY']` |
| togethercomputer/falcon-7b-instruct | `completion('together_ai/togethercomputer/falcon-7b-instruct', messages)` | `os.environ['TOGETHERAI_API_KEY']` |
| Model Name | Function Call | Required OS Variables |
|--------------------------------------|----------------------------------------------------------------------------|------------------------------------|
| togethercomputer/falcon-40b-instruct | `completion('together_ai/togethercomputer/falcon-40b-instruct', messages)` | `os.environ['TOGETHERAI_API_KEY']` |
| togethercomputer/falcon-7b-instruct | `completion('together_ai/togethercomputer/falcon-7b-instruct', messages)` | `os.environ['TOGETHERAI_API_KEY']` |
### Alpaca LLMs
| Model Name | Function Call | Required OS Variables |
|-----------------------------------|------------------------------------------------------------------------|---------------------------------|
| togethercomputer/alpaca-7b | `completion('together_ai/togethercomputer/alpaca-7b', messages)` | `os.environ['TOGETHERAI_API_KEY']` |
| Model Name | Function Call | Required OS Variables |
|----------------------------|------------------------------------------------------------------|------------------------------------|
| togethercomputer/alpaca-7b | `completion('together_ai/togethercomputer/alpaca-7b', messages)` | `os.environ['TOGETHERAI_API_KEY']` |
### Other Chat LLMs
| Model Name | Function Call | Required OS Variables |
|-----------------------------------|------------------------------------------------------------------------|---------------------------------|
| HuggingFaceH4/starchat-alpha | `completion('together_ai/HuggingFaceH4/starchat-alpha', messages)` | `os.environ['TOGETHERAI_API_KEY']` |
| Model Name | Function Call | Required OS Variables |
|------------------------------|--------------------------------------------------------------------|------------------------------------|
| HuggingFaceH4/starchat-alpha | `completion('together_ai/HuggingFaceH4/starchat-alpha', messages)` | `os.environ['TOGETHERAI_API_KEY']` |
### Code LLMs
| Model Name | Function Call | Required OS Variables |
|-----------------------------------|------------------------------------------------------------------------|---------------------------------|
| togethercomputer/CodeLlama-34b | `completion('together_ai/togethercomputer/CodeLlama-34b', messages)` | `os.environ['TOGETHERAI_API_KEY']` |
| togethercomputer/CodeLlama-34b-Instruct | `completion('together_ai/togethercomputer/CodeLlama-34b-Instruct', messages)` | `os.environ['TOGETHERAI_API_KEY']` |
| togethercomputer/CodeLlama-34b-Python | `completion('together_ai/togethercomputer/CodeLlama-34b-Python', messages)` | `os.environ['TOGETHERAI_API_KEY']` |
| defog/sqlcoder | `completion('together_ai/defog/sqlcoder', messages)` | `os.environ['TOGETHERAI_API_KEY']` |
| NumbersStation/nsql-llama-2-7B | `completion('together_ai/NumbersStation/nsql-llama-2-7B', messages)` | `os.environ['TOGETHERAI_API_KEY']` |
| WizardLM/WizardCoder-15B-V1.0 | `completion('together_ai/WizardLM/WizardCoder-15B-V1.0', messages)` | `os.environ['TOGETHERAI_API_KEY']` |
| WizardLM/WizardCoder-Python-34B-V1.0 | `completion('together_ai/WizardLM/WizardCoder-Python-34B-V1.0', messages)` | `os.environ['TOGETHERAI_API_KEY']` |
| Model Name | Function Call | Required OS Variables |
|-----------------------------------------|-------------------------------------------------------------------------------|------------------------------------|
| togethercomputer/CodeLlama-34b | `completion('together_ai/togethercomputer/CodeLlama-34b', messages)` | `os.environ['TOGETHERAI_API_KEY']` |
| togethercomputer/CodeLlama-34b-Instruct | `completion('together_ai/togethercomputer/CodeLlama-34b-Instruct', messages)` | `os.environ['TOGETHERAI_API_KEY']` |
| togethercomputer/CodeLlama-34b-Python | `completion('together_ai/togethercomputer/CodeLlama-34b-Python', messages)` | `os.environ['TOGETHERAI_API_KEY']` |
| defog/sqlcoder | `completion('together_ai/defog/sqlcoder', messages)` | `os.environ['TOGETHERAI_API_KEY']` |
| NumbersStation/nsql-llama-2-7B | `completion('together_ai/NumbersStation/nsql-llama-2-7B', messages)` | `os.environ['TOGETHERAI_API_KEY']` |
| WizardLM/WizardCoder-15B-V1.0 | `completion('together_ai/WizardLM/WizardCoder-15B-V1.0', messages)` | `os.environ['TOGETHERAI_API_KEY']` |
| WizardLM/WizardCoder-Python-34B-V1.0 | `completion('together_ai/WizardLM/WizardCoder-Python-34B-V1.0', messages)` | `os.environ['TOGETHERAI_API_KEY']` |
### Language LLMs
| Model Name | Function Call | Required OS Variables |
|-----------------------------------|------------------------------------------------------------------------|---------------------------------|
| NousResearch/Nous-Hermes-Llama2-13b | `completion('together_ai/NousResearch/Nous-Hermes-Llama2-13b', messages)` | `os.environ['TOGETHERAI_API_KEY']` |
| Austism/chronos-hermes-13b | `completion('together_ai/Austism/chronos-hermes-13b', messages)` | `os.environ['TOGETHERAI_API_KEY']` |
| upstage/SOLAR-0-70b-16bit | `completion('together_ai/upstage/SOLAR-0-70b-16bit', messages)` | `os.environ['TOGETHERAI_API_KEY']` |
| WizardLM/WizardLM-70B-V1.0 | `completion('together_ai/WizardLM/WizardLM-70B-V1.0', messages)` | `os.environ['TOGETHERAI_API_KEY']` |
| Model Name | Function Call | Required OS Variables |
|-------------------------------------|---------------------------------------------------------------------------|------------------------------------|
| NousResearch/Nous-Hermes-Llama2-13b | `completion('together_ai/NousResearch/Nous-Hermes-Llama2-13b', messages)` | `os.environ['TOGETHERAI_API_KEY']` |
| Austism/chronos-hermes-13b | `completion('together_ai/Austism/chronos-hermes-13b', messages)` | `os.environ['TOGETHERAI_API_KEY']` |
| upstage/SOLAR-0-70b-16bit | `completion('together_ai/upstage/SOLAR-0-70b-16bit', messages)` | `os.environ['TOGETHERAI_API_KEY']` |
| WizardLM/WizardLM-70B-V1.0 | `completion('together_ai/WizardLM/WizardLM-70B-V1.0', messages)` | `os.environ['TOGETHERAI_API_KEY']` |
## Prompt Templates

View file

@ -155,14 +155,14 @@ def default_pt(messages):
#### Models we already have Prompt Templates for
| Model Name | Works for Models | Function Call |
| -------- | -------- | -------- |
| meta-llama/Llama-2-7b-chat | All meta-llama llama2 chat models| `completion(model='vllm/meta-llama/Llama-2-7b', messages=messages, api_base="your_api_endpoint")` |
| tiiuae/falcon-7b-instruct | All falcon instruct models | `completion(model='vllm/tiiuae/falcon-7b-instruct', messages=messages, api_base="your_api_endpoint")` |
| mosaicml/mpt-7b-chat | All mpt chat models | `completion(model='vllm/mosaicml/mpt-7b-chat', messages=messages, api_base="your_api_endpoint")` |
| codellama/CodeLlama-34b-Instruct-hf | All codellama instruct models | `completion(model='vllm/codellama/CodeLlama-34b-Instruct-hf', messages=messages, api_base="your_api_endpoint")` |
| WizardLM/WizardCoder-Python-34B-V1.0 | All wizardcoder models | `completion(model='vllm/WizardLM/WizardCoder-Python-34B-V1.0', messages=messages, api_base="your_api_endpoint")` |
| Phind/Phind-CodeLlama-34B-v2 | All phind-codellama models | `completion(model='vllm/Phind/Phind-CodeLlama-34B-v2', messages=messages, api_base="your_api_endpoint")` |
| Model Name | Works for Models | Function Call |
|--------------------------------------|-----------------------------------|------------------------------------------------------------------------------------------------------------------|
| meta-llama/Llama-2-7b-chat | All meta-llama llama2 chat models | `completion(model='vllm/meta-llama/Llama-2-7b', messages=messages, api_base="your_api_endpoint")` |
| tiiuae/falcon-7b-instruct | All falcon instruct models | `completion(model='vllm/tiiuae/falcon-7b-instruct', messages=messages, api_base="your_api_endpoint")` |
| mosaicml/mpt-7b-chat | All mpt chat models | `completion(model='vllm/mosaicml/mpt-7b-chat', messages=messages, api_base="your_api_endpoint")` |
| codellama/CodeLlama-34b-Instruct-hf | All codellama instruct models | `completion(model='vllm/codellama/CodeLlama-34b-Instruct-hf', messages=messages, api_base="your_api_endpoint")` |
| WizardLM/WizardCoder-Python-34B-V1.0 | All wizardcoder models | `completion(model='vllm/WizardLM/WizardCoder-Python-34B-V1.0', messages=messages, api_base="your_api_endpoint")` |
| Phind/Phind-CodeLlama-34B-v2 | All phind-codellama models | `completion(model='vllm/Phind/Phind-CodeLlama-34B-v2', messages=messages, api_base="your_api_endpoint")` |
#### Custom prompt templates

View file

@ -251,23 +251,23 @@ response = completion(
Here are some examples of models available in IBM watsonx.ai that you can use with LiteLLM:
| Mode Name | Command |
| ---------- | --------- |
| Flan T5 XXL | `completion(model=watsonx/google/flan-t5-xxl, messages=messages)` |
| Flan Ul2 | `completion(model=watsonx/google/flan-ul2, messages=messages)` |
| Mt0 XXL | `completion(model=watsonx/bigscience/mt0-xxl, messages=messages)` |
| Gpt Neox | `completion(model=watsonx/eleutherai/gpt-neox-20b, messages=messages)` |
| Mpt 7B Instruct2 | `completion(model=watsonx/ibm/mpt-7b-instruct2, messages=messages)` |
| Starcoder | `completion(model=watsonx/bigcode/starcoder, messages=messages)` |
| Llama 2 70B Chat | `completion(model=watsonx/meta-llama/llama-2-70b-chat, messages=messages)` |
| Llama 2 13B Chat | `completion(model=watsonx/meta-llama/llama-2-13b-chat, messages=messages)` |
| Granite 13B Instruct | `completion(model=watsonx/ibm/granite-13b-instruct-v1, messages=messages)` |
| Granite 13B Chat | `completion(model=watsonx/ibm/granite-13b-chat-v1, messages=messages)` |
| Flan T5 XL | `completion(model=watsonx/google/flan-t5-xl, messages=messages)` |
| Granite 13B Chat V2 | `completion(model=watsonx/ibm/granite-13b-chat-v2, messages=messages)` |
| Granite 13B Instruct V2 | `completion(model=watsonx/ibm/granite-13b-instruct-v2, messages=messages)` |
| Elyza Japanese Llama 2 7B Instruct | `completion(model=watsonx/elyza/elyza-japanese-llama-2-7b-instruct, messages=messages)` |
| Mixtral 8X7B Instruct V01 Q | `completion(model=watsonx/ibm-mistralai/mixtral-8x7b-instruct-v01-q, messages=messages)` |
| Mode Name | Command |
|------------------------------------|------------------------------------------------------------------------------------------|
| Flan T5 XXL | `completion(model=watsonx/google/flan-t5-xxl, messages=messages)` |
| Flan Ul2 | `completion(model=watsonx/google/flan-ul2, messages=messages)` |
| Mt0 XXL | `completion(model=watsonx/bigscience/mt0-xxl, messages=messages)` |
| Gpt Neox | `completion(model=watsonx/eleutherai/gpt-neox-20b, messages=messages)` |
| Mpt 7B Instruct2 | `completion(model=watsonx/ibm/mpt-7b-instruct2, messages=messages)` |
| Starcoder | `completion(model=watsonx/bigcode/starcoder, messages=messages)` |
| Llama 2 70B Chat | `completion(model=watsonx/meta-llama/llama-2-70b-chat, messages=messages)` |
| Llama 2 13B Chat | `completion(model=watsonx/meta-llama/llama-2-13b-chat, messages=messages)` |
| Granite 13B Instruct | `completion(model=watsonx/ibm/granite-13b-instruct-v1, messages=messages)` |
| Granite 13B Chat | `completion(model=watsonx/ibm/granite-13b-chat-v1, messages=messages)` |
| Flan T5 XL | `completion(model=watsonx/google/flan-t5-xl, messages=messages)` |
| Granite 13B Chat V2 | `completion(model=watsonx/ibm/granite-13b-chat-v2, messages=messages)` |
| Granite 13B Instruct V2 | `completion(model=watsonx/ibm/granite-13b-instruct-v2, messages=messages)` |
| Elyza Japanese Llama 2 7B Instruct | `completion(model=watsonx/elyza/elyza-japanese-llama-2-7b-instruct, messages=messages)` |
| Mixtral 8X7B Instruct V01 Q | `completion(model=watsonx/ibm-mistralai/mixtral-8x7b-instruct-v01-q, messages=messages)` |
For a list of all available models in watsonx.ai, see [here](https://dataplatform.cloud.ibm.com/docs/content/wsj/analyze-data/fm-models.html?context=wx&locale=en&audience=wdp).
@ -275,10 +275,10 @@ For a list of all available models in watsonx.ai, see [here](https://dataplatfor
## Supported IBM watsonx.ai Embedding Models
| Model Name | Function Call |
|----------------------|---------------------------------------------|
| Slate 30m | `embedding(model="watsonx/ibm/slate-30m-english-rtrvr", input=input)` |
| Slate 125m | `embedding(model="watsonx/ibm/slate-125m-english-rtrvr", input=input)` |
| Model Name | Function Call |
|------------|------------------------------------------------------------------------|
| Slate 30m | `embedding(model="watsonx/ibm/slate-30m-english-rtrvr", input=input)` |
| Slate 125m | `embedding(model="watsonx/ibm/slate-125m-english-rtrvr", input=input)` |
For a list of all available embedding models in watsonx.ai, see [here](https://dataplatform.cloud.ibm.com/docs/content/wsj/analyze-data/fm-models-embed.html?context=wx).

View file

@ -37,26 +37,26 @@ print(response)
## Supported Models
All models listed here https://inference.readthedocs.io/en/latest/models/builtin/embedding/index.html are supported
| Model Name | Function Call |
|------------------------------|--------------------------------------------------------|
| bge-base-en | `embedding(model="xinference/bge-base-en", input)` |
| bge-base-en-v1.5 | `embedding(model="xinference/bge-base-en-v1.5", input)` |
| bge-base-zh | `embedding(model="xinference/bge-base-zh", input)` |
| bge-base-zh-v1.5 | `embedding(model="xinference/bge-base-zh-v1.5", input)` |
| bge-large-en | `embedding(model="xinference/bge-large-en", input)` |
| bge-large-en-v1.5 | `embedding(model="xinference/bge-large-en-v1.5", input)` |
| bge-large-zh | `embedding(model="xinference/bge-large-zh", input)` |
| bge-large-zh-noinstruct | `embedding(model="xinference/bge-large-zh-noinstruct", input)` |
| bge-large-zh-v1.5 | `embedding(model="xinference/bge-large-zh-v1.5", input)` |
| bge-small-en-v1.5 | `embedding(model="xinference/bge-small-en-v1.5", input)` |
| bge-small-zh | `embedding(model="xinference/bge-small-zh", input)` |
| bge-small-zh-v1.5 | `embedding(model="xinference/bge-small-zh-v1.5", input)` |
| e5-large-v2 | `embedding(model="xinference/e5-large-v2", input)` |
| gte-base | `embedding(model="xinference/gte-base", input)` |
| gte-large | `embedding(model="xinference/gte-large", input)` |
| jina-embeddings-v2-base-en | `embedding(model="xinference/jina-embeddings-v2-base-en", input)` |
| jina-embeddings-v2-small-en | `embedding(model="xinference/jina-embeddings-v2-small-en", input)` |
| multilingual-e5-large | `embedding(model="xinference/multilingual-e5-large", input)` |
| Model Name | Function Call |
|-----------------------------|--------------------------------------------------------------------|
| bge-base-en | `embedding(model="xinference/bge-base-en", input)` |
| bge-base-en-v1.5 | `embedding(model="xinference/bge-base-en-v1.5", input)` |
| bge-base-zh | `embedding(model="xinference/bge-base-zh", input)` |
| bge-base-zh-v1.5 | `embedding(model="xinference/bge-base-zh-v1.5", input)` |
| bge-large-en | `embedding(model="xinference/bge-large-en", input)` |
| bge-large-en-v1.5 | `embedding(model="xinference/bge-large-en-v1.5", input)` |
| bge-large-zh | `embedding(model="xinference/bge-large-zh", input)` |
| bge-large-zh-noinstruct | `embedding(model="xinference/bge-large-zh-noinstruct", input)` |
| bge-large-zh-v1.5 | `embedding(model="xinference/bge-large-zh-v1.5", input)` |
| bge-small-en-v1.5 | `embedding(model="xinference/bge-small-en-v1.5", input)` |
| bge-small-zh | `embedding(model="xinference/bge-small-zh", input)` |
| bge-small-zh-v1.5 | `embedding(model="xinference/bge-small-zh-v1.5", input)` |
| e5-large-v2 | `embedding(model="xinference/e5-large-v2", input)` |
| gte-base | `embedding(model="xinference/gte-base", input)` |
| gte-large | `embedding(model="xinference/gte-large", input)` |
| jina-embeddings-v2-base-en | `embedding(model="xinference/jina-embeddings-v2-base-en", input)` |
| jina-embeddings-v2-small-en | `embedding(model="xinference/jina-embeddings-v2-small-en", input)` |
| multilingual-e5-large | `embedding(model="xinference/multilingual-e5-large", input)` |

View file

@ -260,7 +260,7 @@ Requirements:
<TabItem value="docker-deploy" label="Dockerfile">
We maintain a [seperate Dockerfile](https://github.com/BerriAI/litellm/pkgs/container/litellm-database) for reducing build time when running LiteLLM proxy with a connected Postgres Database
We maintain a [separate Dockerfile](https://github.com/BerriAI/litellm/pkgs/container/litellm-database) for reducing build time when running LiteLLM proxy with a connected Postgres Database
```shell
docker pull ghcr.io/berriai/litellm-database:main-latest

View file

@ -2,30 +2,213 @@ import Image from '@theme/IdealImage';
import Tabs from '@theme/Tabs';
import TabItem from '@theme/TabItem';
# ✨ Enterprise Features - Content Mod, SSO, Custom Swagger
# ✨ Enterprise Features - SSO, Audit Logs, Guardrails
Features here are behind a commercial license in our `/enterprise` folder. [**See Code**](https://github.com/BerriAI/litellm/tree/main/enterprise)
:::tip
:::info
[Get Started with Enterprise here](https://github.com/BerriAI/litellm/tree/main/enterprise)
Get in touch with us [here](https://calendly.com/d/4mp-gd3-k5k/litellm-1-1-onboarding-chat)
:::
Features:
- ✅ [SSO for Admin UI](./ui.md#✨-enterprise-features)
- ✅ Content Moderation with LLM Guard, LlamaGuard, Google Text Moderations
- ✅ [Prompt Injection Detection (with LakeraAI API)](#prompt-injection-detection-lakeraai)
- ✅ [Audit Logs](#audit-logs)
- ✅ [Tracking Spend for Custom Tags](#tracking-spend-for-custom-tags)
- ✅ [Content Moderation with LLM Guard, LlamaGuard, Google Text Moderations](#content-moderation)
- ✅ [Prompt Injection Detection (with LakeraAI API)](#prompt-injection-detection---lakeraai)
- ✅ [Custom Branding + Routes on Swagger Docs](#swagger-docs---custom-routes--branding)
- ✅ Reject calls from Blocked User list
- ✅ Reject calls (incoming / outgoing) with Banned Keywords (e.g. competitors)
- ✅ Don't log/store specific requests to Langfuse, Sentry, etc. (eg confidential LLM requests)
- ✅ Tracking Spend for Custom Tags
- ✅ Custom Branding + Routes on Swagger Docs
- ✅ Audit Logs for `Created At, Created By` when Models Added
## Audit Logs
Store Audit logs for **Create, Update Delete Operations** done on `Teams` and `Virtual Keys`
**Step 1** Switch on audit Logs
```shell
litellm_settings:
store_audit_logs: true
```
Start the litellm proxy with this config
**Step 2** Test it - Create a Team
```shell
curl --location 'http://0.0.0.0:4000/team/new' \
--header 'Authorization: Bearer sk-1234' \
--header 'Content-Type: application/json' \
--data '{
"max_budget": 2
}'
```
**Step 3** Expected Log
```json
{
"id": "e1760e10-4264-4499-82cd-c08c86c8d05b",
"updated_at": "2024-06-06T02:10:40.836420+00:00",
"changed_by": "109010464461339474872",
"action": "created",
"table_name": "LiteLLM_TeamTable",
"object_id": "82e725b5-053f-459d-9a52-867191635446",
"before_value": null,
"updated_values": {
"team_id": "82e725b5-053f-459d-9a52-867191635446",
"admins": [],
"members": [],
"members_with_roles": [
{
"role": "admin",
"user_id": "109010464461339474872"
}
],
"max_budget": 2.0,
"models": [],
"blocked": false
}
}
```
## Tracking Spend for Custom Tags
Requirements:
- Virtual Keys & a database should be set up, see [virtual keys](https://docs.litellm.ai/docs/proxy/virtual_keys)
#### Usage - /chat/completions requests with request tags
<Tabs>
<TabItem value="openai" label="OpenAI Python v1.0.0+">
Set `extra_body={"metadata": { }}` to `metadata` you want to pass
```python
import openai
client = openai.OpenAI(
api_key="anything",
base_url="http://0.0.0.0:4000"
)
# request sent to model set on litellm proxy, `litellm --model`
response = client.chat.completions.create(
model="gpt-3.5-turbo",
messages = [
{
"role": "user",
"content": "this is a test request, write a short poem"
}
],
extra_body={
"metadata": {
"tags": ["model-anthropic-claude-v2.1", "app-ishaan-prod"]
}
}
)
print(response)
```
</TabItem>
<TabItem value="Curl" label="Curl Request">
Pass `metadata` as part of the request body
```shell
curl --location 'http://0.0.0.0:4000/chat/completions' \
--header 'Content-Type: application/json' \
--data '{
"model": "gpt-3.5-turbo",
"messages": [
{
"role": "user",
"content": "what llm are you"
}
],
"metadata": {"tags": ["model-anthropic-claude-v2.1", "app-ishaan-prod"]}
}'
```
</TabItem>
<TabItem value="langchain" label="Langchain">
```python
from langchain.chat_models import ChatOpenAI
from langchain.prompts.chat import (
ChatPromptTemplate,
HumanMessagePromptTemplate,
SystemMessagePromptTemplate,
)
from langchain.schema import HumanMessage, SystemMessage
chat = ChatOpenAI(
openai_api_base="http://0.0.0.0:4000",
model = "gpt-3.5-turbo",
temperature=0.1,
extra_body={
"metadata": {
"tags": ["model-anthropic-claude-v2.1", "app-ishaan-prod"]
}
}
)
messages = [
SystemMessage(
content="You are a helpful assistant that im using to make a test request to."
),
HumanMessage(
content="test from litellm. tell me why it's amazing in 1 sentence"
),
]
response = chat(messages)
print(response)
```
</TabItem>
</Tabs>
#### Viewing Spend per tag
#### `/spend/tags` Request Format
```shell
curl -X GET "http://0.0.0.0:4000/spend/tags" \
-H "Authorization: Bearer sk-1234"
```
#### `/spend/tags`Response Format
```shell
[
{
"individual_request_tag": "model-anthropic-claude-v2.1",
"log_count": 6,
"total_spend": 0.000672
},
{
"individual_request_tag": "app-ishaan-local",
"log_count": 4,
"total_spend": 0.000448
},
{
"individual_request_tag": "app-ishaan-prod",
"log_count": 2,
"total_spend": 0.000224
}
]
```
## Content Moderation
### Content Moderation with LLM Guard
#### Content Moderation with LLM Guard
Set the LLM Guard API Base in your environment
@ -160,7 +343,7 @@ curl --location 'http://0.0.0.0:4000/v1/chat/completions' \
</TabItem>
</Tabs>
### Content Moderation with LlamaGuard
#### Content Moderation with LlamaGuard
Currently works with Sagemaker's LlamaGuard endpoint.
@ -194,7 +377,7 @@ callbacks: ["llamaguard_moderations"]
### Content Moderation with Google Text Moderation
#### Content Moderation with Google Text Moderation
Requires your GOOGLE_APPLICATION_CREDENTIALS to be set in your .env (same as VertexAI).
@ -250,7 +433,7 @@ Here are the category specific values:
### Content Moderation with OpenAI Moderations
#### Content Moderation with OpenAI Moderations
Use this if you want to reject /chat, /completions, /embeddings calls that fail OpenAI Moderations checks
@ -276,7 +459,7 @@ Step 1 Set a `LAKERA_API_KEY` in your env
LAKERA_API_KEY="7a91a1a6059da*******"
```
Step 2. Add `lakera_prompt_injection` to your calbacks
Step 2. Add `lakera_prompt_injection` to your callbacks
```yaml
litellm_settings:
@ -302,6 +485,42 @@ curl --location 'http://localhost:4000/chat/completions' \
}'
```
## Swagger Docs - Custom Routes + Branding
:::info
Requires a LiteLLM Enterprise key to use. Get a free 2-week license [here](https://forms.gle/sTDVprBs18M4V8Le8)
:::
Set LiteLLM Key in your environment
```bash
LITELLM_LICENSE=""
```
#### Customize Title + Description
In your environment, set:
```bash
DOCS_TITLE="TotalGPT"
DOCS_DESCRIPTION="Sample Company Description"
```
#### Customize Routes
Hide admin routes from users.
In your environment, set:
```bash
DOCS_FILTERED="True" # only shows openai routes to user
```
<Image img={require('../../img/custom_swagger.png')} style={{ width: '900px', height: 'auto' }} />
## Enable Blocked User Lists
If any call is made to proxy with this user id, it'll be rejected - use this if you want to let users opt-out of ai features
@ -417,176 +636,6 @@ curl --location 'http://0.0.0.0:4000/chat/completions' \
}
'
```
## Tracking Spend for Custom Tags
Requirements:
- Virtual Keys & a database should be set up, see [virtual keys](https://docs.litellm.ai/docs/proxy/virtual_keys)
### Usage - /chat/completions requests with request tags
<Tabs>
<TabItem value="openai" label="OpenAI Python v1.0.0+">
Set `extra_body={"metadata": { }}` to `metadata` you want to pass
```python
import openai
client = openai.OpenAI(
api_key="anything",
base_url="http://0.0.0.0:4000"
)
# request sent to model set on litellm proxy, `litellm --model`
response = client.chat.completions.create(
model="gpt-3.5-turbo",
messages = [
{
"role": "user",
"content": "this is a test request, write a short poem"
}
],
extra_body={
"metadata": {
"tags": ["model-anthropic-claude-v2.1", "app-ishaan-prod"]
}
}
)
print(response)
```
</TabItem>
<TabItem value="Curl" label="Curl Request">
Pass `metadata` as part of the request body
```shell
curl --location 'http://0.0.0.0:4000/chat/completions' \
--header 'Content-Type: application/json' \
--data '{
"model": "gpt-3.5-turbo",
"messages": [
{
"role": "user",
"content": "what llm are you"
}
],
"metadata": {"tags": ["model-anthropic-claude-v2.1", "app-ishaan-prod"]}
}'
```
</TabItem>
<TabItem value="langchain" label="Langchain">
```python
from langchain.chat_models import ChatOpenAI
from langchain.prompts.chat import (
ChatPromptTemplate,
HumanMessagePromptTemplate,
SystemMessagePromptTemplate,
)
from langchain.schema import HumanMessage, SystemMessage
chat = ChatOpenAI(
openai_api_base="http://0.0.0.0:4000",
model = "gpt-3.5-turbo",
temperature=0.1,
extra_body={
"metadata": {
"tags": ["model-anthropic-claude-v2.1", "app-ishaan-prod"]
}
}
)
messages = [
SystemMessage(
content="You are a helpful assistant that im using to make a test request to."
),
HumanMessage(
content="test from litellm. tell me why it's amazing in 1 sentence"
),
]
response = chat(messages)
print(response)
```
</TabItem>
</Tabs>
### Viewing Spend per tag
#### `/spend/tags` Request Format
```shell
curl -X GET "http://0.0.0.0:4000/spend/tags" \
-H "Authorization: Bearer sk-1234"
```
#### `/spend/tags`Response Format
```shell
[
{
"individual_request_tag": "model-anthropic-claude-v2.1",
"log_count": 6,
"total_spend": 0.000672
},
{
"individual_request_tag": "app-ishaan-local",
"log_count": 4,
"total_spend": 0.000448
},
{
"individual_request_tag": "app-ishaan-prod",
"log_count": 2,
"total_spend": 0.000224
}
]
```
<!-- ## Tracking Spend per Key
## Tracking Spend per User -->
## Swagger Docs - Custom Routes + Branding
:::info
Requires a LiteLLM Enterprise key to use. Get a free 2-week license [here](https://forms.gle/sTDVprBs18M4V8Le8)
:::
Set LiteLLM Key in your environment
```bash
LITELLM_LICENSE=""
```
### Customize Title + Description
In your environment, set:
```bash
DOCS_TITLE="TotalGPT"
DOCS_DESCRIPTION="Sample Company Description"
```
### Customize Routes
Hide admin routes from users.
In your environment, set:
```bash
DOCS_FILTERED="True" # only shows openai routes to user
```
<Image img={require('../../img/custom_swagger.png')} style={{ width: '900px', height: 'auto' }} />
## Public Model Hub

View file

@ -41,7 +41,9 @@ litellm_settings:
**Step 3**: Set required env variables for logging to langfuse
```shell
export LANGFUSE_PUBLIC_KEY="pk_kk"
export LANGFUSE_SECRET_KEY="sk_ss
export LANGFUSE_SECRET_KEY="sk_ss"
# Optional, defaults to https://cloud.langfuse.com
export LANGFUSE_HOST="https://xxx.langfuse.com"
```
**Step 4**: Start the proxy, make a test request

View file

@ -100,4 +100,76 @@ print(response)
```
</TabItem>
</Tabs>
</Tabs>
## Advanced - Redis Caching
Use redis caching to do request prioritization across multiple instances of LiteLLM.
### SDK
```python
from litellm import Router
router = Router(
model_list=[
{
"model_name": "gpt-3.5-turbo",
"litellm_params": {
"model": "gpt-3.5-turbo",
"mock_response": "Hello world this is Macintosh!", # fakes the LLM API call
"rpm": 1,
},
},
],
### REDIS PARAMS ###
redis_host=os.environ["REDIS_HOST"],
redis_password=os.environ["REDIS_PASSWORD"],
redis_port=os.environ["REDIS_PORT"],
)
try:
_response = await router.schedule_acompletion( # 👈 ADDS TO QUEUE + POLLS + MAKES CALL
model="gpt-3.5-turbo",
messages=[{"role": "user", "content": "Hey!"}],
priority=0, # 👈 LOWER IS BETTER
)
except Exception as e:
print("didn't make request")
```
### PROXY
```yaml
model_list:
- model_name: gpt-3.5-turbo-fake-model
litellm_params:
model: gpt-3.5-turbo
mock_response: "hello world!"
api_key: my-good-key
router_settings:
redis_host; os.environ/REDIS_HOST
redis_password: os.environ/REDIS_PASSWORD
redis_port: os.environ/REDIS_PORT
```
```bash
$ litellm --config /path/to/config.yaml
# RUNNING on http://0.0.0.0:4000s
```
```bash
curl -X POST 'http://localhost:4000/queue/chat/completions' \
-H 'Content-Type: application/json' \
-H 'Authorization: Bearer sk-1234' \
-D '{
"model": "gpt-3.5-turbo-fake-model",
"messages": [
{
"role": "user",
"content": "what is the meaning of the universe? 1234"
}],
"priority": 0 👈 SET VALUE HERE
}'
```

View file

@ -1,11 +1,31 @@
# Secret Manager
LiteLLM supports reading secrets from Azure Key Vault and Infisical
- AWS Key Managemenet Service
- AWS Secret Manager
- [Azure Key Vault](#azure-key-vault)
- Google Key Management Service
- [Infisical Secret Manager](#infisical-secret-manager)
- [.env Files](#env-files)
## AWS Key Management Service
Use AWS KMS to storing a hashed copy of your Proxy Master Key in the environment.
```bash
export LITELLM_MASTER_KEY="djZ9xjVaZ..." # 👈 ENCRYPTED KEY
export AWS_REGION_NAME="us-west-2"
```
```yaml
general_settings:
key_management_system: "aws_kms"
key_management_settings:
hosted_keys: ["LITELLM_MASTER_KEY"] # 👈 WHICH KEYS ARE STORED ON KMS
```
[**See Decryption Code**](https://github.com/BerriAI/litellm/blob/a2da2a8f168d45648b61279d4795d647d94f90c9/litellm/utils.py#L10182)
## AWS Secret Manager
Store your proxy keys in AWS Secret Manager.

View file

@ -1,8 +1,8 @@
# Using Fine-Tuned gpt-3.5-turbo
LiteLLM allows you to call `completion` with your fine-tuned gpt-3.5-turbo models
If you're trying to create your custom finetuned gpt-3.5-turbo model following along on this tutorial: https://platform.openai.com/docs/guides/fine-tuning/preparing-your-dataset
If you're trying to create your custom fine-tuned gpt-3.5-turbo model following along on this tutorial: https://platform.openai.com/docs/guides/fine-tuning/preparing-your-dataset
Once you've created your fine tuned model, you can call it with `litellm.completion()`
Once you've created your fine-tuned model, you can call it with `litellm.completion()`
## Usage
```python

View file

@ -5975,9 +5975,9 @@
}
},
"node_modules/caniuse-lite": {
"version": "1.0.30001519",
"resolved": "https://registry.npmjs.org/caniuse-lite/-/caniuse-lite-1.0.30001519.tgz",
"integrity": "sha512-0QHgqR+Jv4bxHMp8kZ1Kn8CH55OikjKJ6JmKkZYP1F3D7w+lnFXF70nG5eNfsZS89jadi5Ywy5UCSKLAglIRkg==",
"version": "1.0.30001629",
"resolved": "https://registry.npmjs.org/caniuse-lite/-/caniuse-lite-1.0.30001629.tgz",
"integrity": "sha512-c3dl911slnQhmxUIT4HhYzT7wnBK/XYpGnYLOj4nJBaRiw52Ibe7YxlDaAeRECvA786zCuExhxIUJ2K7nHMrBw==",
"funding": [
{
"type": "opencollective",

View file

@ -36,6 +36,7 @@ const sidebars = {
label: "📖 All Endpoints (Swagger)",
href: "https://litellm-api.up.railway.app/",
},
"proxy/enterprise",
"proxy/demo",
"proxy/configs",
"proxy/reliability",
@ -45,7 +46,6 @@ const sidebars = {
"proxy/customers",
"proxy/billing",
"proxy/user_keys",
"proxy/enterprise",
"proxy/virtual_keys",
"proxy/alerting",
{

File diff suppressed because it is too large Load diff

View file

@ -18,10 +18,6 @@ async def log_event(request: Request):
return {"message": "Request received successfully"}
except Exception as e:
print(f"Error processing request: {str(e)}")
import traceback
traceback.print_exc()
raise HTTPException(status_code=500, detail="Internal Server Error")

View file

@ -120,6 +120,5 @@ class GenericAPILogger:
)
return response
except Exception as e:
traceback.print_exc()
verbose_logger.debug(f"Generic - {str(e)}\n{traceback.format_exc()}")
verbose_logger.error(f"Generic - {str(e)}\n{traceback.format_exc()}")
pass

View file

@ -82,7 +82,7 @@ class _ENTERPRISE_BannedKeywords(CustomLogger):
except HTTPException as e:
raise e
except Exception as e:
traceback.print_exc()
verbose_proxy_logger.error(traceback.format_exc())
async def async_post_call_success_hook(
self,

View file

@ -118,4 +118,4 @@ class _ENTERPRISE_BlockedUserList(CustomLogger):
except HTTPException as e:
raise e
except Exception as e:
traceback.print_exc()
verbose_proxy_logger.error(traceback.format_exc())

View file

@ -92,7 +92,7 @@ class _ENTERPRISE_LLMGuard(CustomLogger):
},
)
except Exception as e:
traceback.print_exc()
verbose_proxy_logger.error(traceback.format_exc())
raise e
def should_proceed(self, user_api_key_dict: UserAPIKeyAuth, data: dict) -> bool:

View file

@ -60,6 +60,7 @@ _async_failure_callback: List[Callable] = (
pre_call_rules: List[Callable] = []
post_call_rules: List[Callable] = []
turn_off_message_logging: Optional[bool] = False
store_audit_logs = False # Enterprise feature, allow users to see audit logs
## end of callbacks #############
email: Optional[str] = (
@ -808,6 +809,7 @@ from .exceptions import (
APIConnectionError,
APIResponseValidationError,
UnprocessableEntityError,
InternalServerError,
LITELLM_EXCEPTION_TYPES,
)
from .budget_manager import BudgetManager

View file

@ -1,5 +1,6 @@
import logging, os, json
from logging import Formatter
import traceback
set_verbose = False
json_logs = bool(os.getenv("JSON_LOGS", False))

View file

@ -253,7 +253,6 @@ class RedisCache(BaseCache):
str(e),
value,
)
traceback.print_exc()
raise e
async def async_scan_iter(self, pattern: str, count: int = 100) -> list:
@ -313,7 +312,6 @@ class RedisCache(BaseCache):
str(e),
value,
)
traceback.print_exc()
key = self.check_and_fix_namespace(key=key)
async with _redis_client as redis_client:
@ -352,7 +350,6 @@ class RedisCache(BaseCache):
str(e),
value,
)
traceback.print_exc()
async def async_set_cache_pipeline(self, cache_list, ttl=None):
"""
@ -413,7 +410,6 @@ class RedisCache(BaseCache):
str(e),
cache_value,
)
traceback.print_exc()
async def batch_cache_write(self, key, value, **kwargs):
print_verbose(
@ -458,7 +454,6 @@ class RedisCache(BaseCache):
str(e),
value,
)
traceback.print_exc()
raise e
async def flush_cache_buffer(self):
@ -495,8 +490,9 @@ class RedisCache(BaseCache):
return self._get_cache_logic(cached_response=cached_response)
except Exception as e:
# NON blocking - notify users Redis is throwing an exception
traceback.print_exc()
logging.debug("LiteLLM Caching: get() - Got exception from REDIS: ", e)
verbose_logger.error(
"LiteLLM Caching: get() - Got exception from REDIS: ", e
)
def batch_get_cache(self, key_list) -> dict:
"""
@ -646,10 +642,9 @@ class RedisCache(BaseCache):
error=e,
call_type="sync_ping",
)
print_verbose(
verbose_logger.error(
f"LiteLLM Redis Cache PING: - Got exception from REDIS : {str(e)}"
)
traceback.print_exc()
raise e
async def ping(self) -> bool:
@ -683,10 +678,9 @@ class RedisCache(BaseCache):
call_type="async_ping",
)
)
print_verbose(
verbose_logger.error(
f"LiteLLM Redis Cache PING: - Got exception from REDIS : {str(e)}"
)
traceback.print_exc()
raise e
async def delete_cache_keys(self, keys):
@ -1138,22 +1132,23 @@ class S3Cache(BaseCache):
cached_response = ast.literal_eval(cached_response)
if type(cached_response) is not dict:
cached_response = dict(cached_response)
print_verbose(
verbose_logger.debug(
f"Got S3 Cache: key: {key}, cached_response {cached_response}. Type Response {type(cached_response)}"
)
return cached_response
except botocore.exceptions.ClientError as e:
if e.response["Error"]["Code"] == "NoSuchKey":
print_verbose(
verbose_logger.error(
f"S3 Cache: The specified key '{key}' does not exist in the S3 bucket."
)
return None
except Exception as e:
# NON blocking - notify users S3 is throwing an exception
traceback.print_exc()
print_verbose(f"S3 Caching: get_cache() - Got exception from S3: {e}")
verbose_logger.error(
f"S3 Caching: get_cache() - Got exception from S3: {e}"
)
async def async_get_cache(self, key, **kwargs):
return self.get_cache(key=key, **kwargs)
@ -1234,8 +1229,7 @@ class DualCache(BaseCache):
return result
except Exception as e:
print_verbose(f"LiteLLM Cache: Excepton async add_cache: {str(e)}")
traceback.print_exc()
verbose_logger.error(f"LiteLLM Cache: Excepton async add_cache: {str(e)}")
raise e
def get_cache(self, key, local_only: bool = False, **kwargs):
@ -1262,7 +1256,7 @@ class DualCache(BaseCache):
print_verbose(f"get cache: cache result: {result}")
return result
except Exception as e:
traceback.print_exc()
verbose_logger.error(traceback.format_exc())
def batch_get_cache(self, keys: list, local_only: bool = False, **kwargs):
try:
@ -1295,7 +1289,7 @@ class DualCache(BaseCache):
print_verbose(f"async batch get cache: cache result: {result}")
return result
except Exception as e:
traceback.print_exc()
verbose_logger.error(traceback.format_exc())
async def async_get_cache(self, key, local_only: bool = False, **kwargs):
# Try to fetch from in-memory cache first
@ -1328,7 +1322,7 @@ class DualCache(BaseCache):
print_verbose(f"get cache: cache result: {result}")
return result
except Exception as e:
traceback.print_exc()
verbose_logger.error(traceback.format_exc())
async def async_batch_get_cache(
self, keys: list, local_only: bool = False, **kwargs
@ -1368,7 +1362,7 @@ class DualCache(BaseCache):
return result
except Exception as e:
traceback.print_exc()
verbose_logger.error(traceback.format_exc())
async def async_set_cache(self, key, value, local_only: bool = False, **kwargs):
print_verbose(
@ -1381,8 +1375,8 @@ class DualCache(BaseCache):
if self.redis_cache is not None and local_only == False:
await self.redis_cache.async_set_cache(key, value, **kwargs)
except Exception as e:
print_verbose(f"LiteLLM Cache: Excepton async add_cache: {str(e)}")
traceback.print_exc()
verbose_logger.error(f"LiteLLM Cache: Excepton async add_cache: {str(e)}")
verbose_logger.debug(traceback.format_exc())
async def async_batch_set_cache(
self, cache_list: list, local_only: bool = False, **kwargs
@ -1404,8 +1398,8 @@ class DualCache(BaseCache):
cache_list=cache_list, ttl=kwargs.get("ttl", None)
)
except Exception as e:
print_verbose(f"LiteLLM Cache: Excepton async add_cache: {str(e)}")
traceback.print_exc()
verbose_logger.error(f"LiteLLM Cache: Excepton async add_cache: {str(e)}")
verbose_logger.debug(traceback.format_exc())
async def async_increment_cache(
self, key, value: float, local_only: bool = False, **kwargs
@ -1429,8 +1423,8 @@ class DualCache(BaseCache):
return result
except Exception as e:
print_verbose(f"LiteLLM Cache: Excepton async add_cache: {str(e)}")
traceback.print_exc()
verbose_logger.error(f"LiteLLM Cache: Excepton async add_cache: {str(e)}")
verbose_logger.debug(traceback.format_exc())
raise e
def flush_cache(self):
@ -1846,8 +1840,8 @@ class Cache:
)
self.cache.set_cache(cache_key, cached_data, **kwargs)
except Exception as e:
print_verbose(f"LiteLLM Cache: Excepton add_cache: {str(e)}")
traceback.print_exc()
verbose_logger.error(f"LiteLLM Cache: Excepton add_cache: {str(e)}")
verbose_logger.debug(traceback.format_exc())
pass
async def async_add_cache(self, result, *args, **kwargs):
@ -1864,8 +1858,8 @@ class Cache:
)
await self.cache.async_set_cache(cache_key, cached_data, **kwargs)
except Exception as e:
print_verbose(f"LiteLLM Cache: Excepton add_cache: {str(e)}")
traceback.print_exc()
verbose_logger.error(f"LiteLLM Cache: Excepton add_cache: {str(e)}")
verbose_logger.debug(traceback.format_exc())
async def async_add_cache_pipeline(self, result, *args, **kwargs):
"""
@ -1897,8 +1891,8 @@ class Cache:
)
await asyncio.gather(*tasks)
except Exception as e:
print_verbose(f"LiteLLM Cache: Excepton add_cache: {str(e)}")
traceback.print_exc()
verbose_logger.error(f"LiteLLM Cache: Excepton add_cache: {str(e)}")
verbose_logger.debug(traceback.format_exc())
async def batch_cache_write(self, result, *args, **kwargs):
cache_key, cached_data, kwargs = self._add_cache_logic(

View file

@ -638,6 +638,7 @@ LITELLM_EXCEPTION_TYPES = [
APIConnectionError,
APIResponseValidationError,
OpenAIError,
InternalServerError,
]

View file

@ -169,6 +169,5 @@ class AISpendLogger:
print_verbose(f"AISpend Logging - final data object: {data}")
except:
# traceback.print_exc()
print_verbose(f"AISpend Logging Error - {traceback.format_exc()}")
pass

View file

@ -178,6 +178,5 @@ class BerriSpendLogger:
print_verbose(f"BerriSpend Logging - final data object: {data}")
response = requests.post(url, headers=headers, json=data)
except:
# traceback.print_exc()
print_verbose(f"BerriSpend Logging Error - {traceback.format_exc()}")
pass

View file

@ -297,6 +297,5 @@ class ClickhouseLogger:
# make request to endpoint with payload
verbose_logger.debug(f"Clickhouse Logger - final response = {response}")
except Exception as e:
traceback.print_exc()
verbose_logger.debug(f"Clickhouse - {str(e)}\n{traceback.format_exc()}")
pass

View file

@ -115,7 +115,6 @@ class CustomLogger: # https://docs.litellm.ai/docs/observability/custom_callbac
)
print_verbose(f"Custom Logger - model call details: {kwargs}")
except:
traceback.print_exc()
print_verbose(f"Custom Logger Error - {traceback.format_exc()}")
async def async_log_input_event(
@ -130,7 +129,6 @@ class CustomLogger: # https://docs.litellm.ai/docs/observability/custom_callbac
)
print_verbose(f"Custom Logger - model call details: {kwargs}")
except:
traceback.print_exc()
print_verbose(f"Custom Logger Error - {traceback.format_exc()}")
def log_event(
@ -146,7 +144,6 @@ class CustomLogger: # https://docs.litellm.ai/docs/observability/custom_callbac
end_time,
)
except:
# traceback.print_exc()
print_verbose(f"Custom Logger Error - {traceback.format_exc()}")
pass
@ -163,6 +160,5 @@ class CustomLogger: # https://docs.litellm.ai/docs/observability/custom_callbac
end_time,
)
except:
# traceback.print_exc()
print_verbose(f"Custom Logger Error - {traceback.format_exc()}")
pass

View file

@ -134,7 +134,6 @@ class DataDogLogger:
f"Datadog Layer Logging - final response object: {response_obj}"
)
except Exception as e:
traceback.print_exc()
verbose_logger.debug(
f"Datadog Layer Error - {str(e)}\n{traceback.format_exc()}"
)

View file

@ -85,6 +85,5 @@ class DyanmoDBLogger:
)
return response
except:
traceback.print_exc()
print_verbose(f"DynamoDB Layer Error - {traceback.format_exc()}")
pass

View file

@ -112,6 +112,5 @@ class HeliconeLogger:
)
print_verbose(f"Helicone Logging - Error {response.text}")
except:
# traceback.print_exc()
print_verbose(f"Helicone Logging Error - {traceback.format_exc()}")
pass

View file

@ -69,6 +69,43 @@ class LangFuseLogger:
else:
self.upstream_langfuse = None
@staticmethod
def add_metadata_from_header(litellm_params: dict, metadata: dict) -> dict:
"""
Adds metadata from proxy request headers to Langfuse logging if keys start with "langfuse_"
and overwrites litellm_params.metadata if already included.
For example if you want to append your trace to an existing `trace_id` via header, send
`headers: { ..., langfuse_existing_trace_id: your-existing-trace-id }` via proxy request.
"""
if litellm_params is None:
return metadata
if litellm_params.get("proxy_server_request") is None:
return metadata
if metadata is None:
metadata = {}
proxy_headers = (
litellm_params.get("proxy_server_request", {}).get("headers", {}) or {}
)
for metadata_param_key in proxy_headers:
if metadata_param_key.startswith("langfuse_"):
trace_param_key = metadata_param_key.replace("langfuse_", "", 1)
if trace_param_key in metadata:
verbose_logger.warning(
f"Overwriting Langfuse `{trace_param_key}` from request header"
)
else:
verbose_logger.debug(
f"Found Langfuse `{trace_param_key}` in request header"
)
metadata[trace_param_key] = proxy_headers.get(metadata_param_key)
return metadata
# def log_error(kwargs, response_obj, start_time, end_time):
# generation = trace.generation(
# level ="ERROR" # can be any of DEBUG, DEFAULT, WARNING or ERROR
@ -97,6 +134,7 @@ class LangFuseLogger:
metadata = (
litellm_params.get("metadata", {}) or {}
) # if litellm_params['metadata'] == None
metadata = self.add_metadata_from_header(litellm_params, metadata)
optional_params = copy.deepcopy(kwargs.get("optional_params", {}))
prompt = {"messages": kwargs.get("messages")}
@ -182,9 +220,11 @@ class LangFuseLogger:
verbose_logger.info(f"Langfuse Layer Logging - logging success")
return {"trace_id": trace_id, "generation_id": generation_id}
except:
traceback.print_exc()
verbose_logger.debug(f"Langfuse Layer Error - {traceback.format_exc()}")
except Exception as e:
verbose_logger.error(
"Langfuse Layer Error(): Exception occured - {}".format(str(e))
)
verbose_logger.debug(traceback.format_exc())
return {"trace_id": None, "generation_id": None}
async def _async_log_event(

View file

@ -44,7 +44,9 @@ class LangsmithLogger:
print_verbose(
f"Langsmith Logging - project_name: {project_name}, run_name {run_name}"
)
langsmith_base_url = os.getenv("LANGSMITH_BASE_URL", "https://api.smith.langchain.com")
langsmith_base_url = os.getenv(
"LANGSMITH_BASE_URL", "https://api.smith.langchain.com"
)
try:
print_verbose(
@ -89,9 +91,7 @@ class LangsmithLogger:
}
url = f"{langsmith_base_url}/runs"
print_verbose(
f"Langsmith Logging - About to send data to {url} ..."
)
print_verbose(f"Langsmith Logging - About to send data to {url} ...")
response = requests.post(
url=url,
json=data,
@ -106,6 +106,5 @@ class LangsmithLogger:
f"Langsmith Layer Logging - final response object: {response_obj}"
)
except:
# traceback.print_exc()
print_verbose(f"Langsmith Layer Error - {traceback.format_exc()}")
pass

View file

@ -171,7 +171,6 @@ class LogfireLogger:
f"Logfire Layer Logging - final response object: {response_obj}"
)
except Exception as e:
traceback.print_exc()
verbose_logger.debug(
f"Logfire Layer Error - {str(e)}\n{traceback.format_exc()}"
)

View file

@ -14,6 +14,7 @@ def parse_usage(usage):
"prompt": usage["prompt_tokens"] if "prompt_tokens" in usage else 0,
}
def parse_tool_calls(tool_calls):
if tool_calls is None:
return None
@ -26,13 +27,13 @@ def parse_tool_calls(tool_calls):
"function": {
"name": tool_call.function.name,
"arguments": tool_call.function.arguments,
}
},
}
return serialized
return [clean_tool_call(tool_call) for tool_call in tool_calls]
def parse_messages(input):
@ -176,6 +177,5 @@ class LunaryLogger:
)
except:
# traceback.print_exc()
print_verbose(f"Lunary Logging Error - {traceback.format_exc()}")
pass

View file

@ -109,8 +109,8 @@ class PrometheusLogger:
end_user_id, user_api_key, model, user_api_team, user_id
).inc()
except Exception as e:
traceback.print_exc()
verbose_logger.debug(
f"prometheus Layer Error - {str(e)}\n{traceback.format_exc()}"
verbose_logger.error(
"prometheus Layer Error(): Exception occured - {}".format(str(e))
)
verbose_logger.debug(traceback.format_exc())
pass

View file

@ -180,6 +180,5 @@ class S3Logger:
print_verbose(f"s3 Layer Logging - final response object: {response_obj}")
return response
except Exception as e:
traceback.print_exc()
verbose_logger.debug(f"s3 Layer Error - {str(e)}\n{traceback.format_exc()}")
pass

View file

@ -110,6 +110,5 @@ class Supabase:
)
except:
# traceback.print_exc()
print_verbose(f"Supabase Logging Error - {traceback.format_exc()}")
pass

View file

@ -217,6 +217,5 @@ class WeightsBiasesLogger:
f"W&B Logging Logging - final response object: {response_obj}"
)
except:
# traceback.print_exc()
print_verbose(f"W&B Logging Layer Error - {traceback.format_exc()}")
pass

View file

@ -1,13 +1,14 @@
import os, types, traceback, copy, asyncio
import json
from enum import Enum
import types
import traceback
import copy
import time
from typing import Callable, Optional
from litellm.utils import ModelResponse, get_secret, Choices, Message, Usage
from litellm.utils import ModelResponse, Choices, Message, Usage
import litellm
import sys, httpx
import httpx
from .prompt_templates.factory import prompt_factory, custom_prompt, get_system_prompt
from packaging.version import Version
from litellm import verbose_logger
class GeminiError(Exception):
@ -264,7 +265,8 @@ def completion(
choices_list.append(choice_obj)
model_response["choices"] = choices_list
except Exception as e:
traceback.print_exc()
verbose_logger.error("LiteLLM.gemini.py: Exception occured - {}".format(str(e)))
verbose_logger.debug(traceback.format_exc())
raise GeminiError(
message=traceback.format_exc(), status_code=response.status_code
)
@ -356,7 +358,8 @@ async def async_completion(
choices_list.append(choice_obj)
model_response["choices"] = choices_list
except Exception as e:
traceback.print_exc()
verbose_logger.error("LiteLLM.gemini.py: Exception occured - {}".format(str(e)))
verbose_logger.debug(traceback.format_exc())
raise GeminiError(
message=traceback.format_exc(), status_code=response.status_code
)

View file

@ -2,10 +2,12 @@ from itertools import chain
import requests, types, time # type: ignore
import json, uuid
import traceback
from typing import Optional
from typing import Optional, List
import litellm
from litellm.types.utils import ProviderField
import httpx, aiohttp, asyncio # type: ignore
from .prompt_templates.factory import prompt_factory, custom_prompt
from litellm import verbose_logger
class OllamaError(Exception):
@ -124,6 +126,19 @@ class OllamaConfig:
)
and v is not None
}
def get_required_params(self) -> List[ProviderField]:
"""For a given provider, return it's required fields with a description"""
return [
ProviderField(
field_name="base_url",
field_type="string",
field_description="Your Ollama API Base",
field_value="http://10.10.11.249:11434",
)
]
def get_supported_openai_params(
self,
):
@ -138,10 +153,12 @@ class OllamaConfig:
"response_format",
]
# ollama wants plain base64 jpeg/png files as images. strip any leading dataURI
# and convert to jpeg if necessary.
def _convert_image(image):
import base64, io
try:
from PIL import Image
except:
@ -391,7 +408,13 @@ async def ollama_async_streaming(url, data, model_response, encoding, logging_ob
async for transformed_chunk in streamwrapper:
yield transformed_chunk
except Exception as e:
traceback.print_exc()
verbose_logger.error(
"LiteLLM.ollama.py::ollama_async_streaming(): Exception occured - {}".format(
str(e)
)
)
verbose_logger.debug(traceback.format_exc())
raise e
@ -455,7 +478,12 @@ async def ollama_acompletion(url, data, model_response, encoding, logging_obj):
)
return model_response
except Exception as e:
traceback.print_exc()
verbose_logger.error(
"LiteLLM.ollama.py::ollama_acompletion(): Exception occured - {}".format(
str(e)
)
)
verbose_logger.debug(traceback.format_exc())
raise e

View file

@ -1,11 +1,15 @@
from itertools import chain
import requests, types, time
import json, uuid
import requests
import types
import time
import json
import uuid
import traceback
from typing import Optional
from litellm import verbose_logger
import litellm
import httpx, aiohttp, asyncio
from .prompt_templates.factory import prompt_factory, custom_prompt
import httpx
import aiohttp
class OllamaError(Exception):
@ -299,7 +303,10 @@ def get_ollama_response(
tool_calls=[
{
"id": f"call_{str(uuid.uuid4())}",
"function": {"name": function_call["name"], "arguments": json.dumps(function_call["arguments"])},
"function": {
"name": function_call["name"],
"arguments": json.dumps(function_call["arguments"]),
},
"type": "function",
}
],
@ -307,7 +314,9 @@ def get_ollama_response(
model_response["choices"][0]["message"] = message
model_response["choices"][0]["finish_reason"] = "tool_calls"
else:
model_response["choices"][0]["message"]["content"] = response_json["message"]["content"]
model_response["choices"][0]["message"]["content"] = response_json["message"][
"content"
]
model_response["created"] = int(time.time())
model_response["model"] = "ollama/" + model
prompt_tokens = response_json.get("prompt_eval_count", litellm.token_counter(messages=messages)) # type: ignore
@ -361,7 +370,10 @@ def ollama_completion_stream(url, api_key, data, logging_obj):
tool_calls=[
{
"id": f"call_{str(uuid.uuid4())}",
"function": {"name": function_call["name"], "arguments": json.dumps(function_call["arguments"])},
"function": {
"name": function_call["name"],
"arguments": json.dumps(function_call["arguments"]),
},
"type": "function",
}
],
@ -410,9 +422,10 @@ async def ollama_async_streaming(
first_chunk_content = first_chunk.choices[0].delta.content or ""
response_content = first_chunk_content + "".join(
[
chunk.choices[0].delta.content
async for chunk in streamwrapper
if chunk.choices[0].delta.content]
chunk.choices[0].delta.content
async for chunk in streamwrapper
if chunk.choices[0].delta.content
]
)
function_call = json.loads(response_content)
delta = litellm.utils.Delta(
@ -420,7 +433,10 @@ async def ollama_async_streaming(
tool_calls=[
{
"id": f"call_{str(uuid.uuid4())}",
"function": {"name": function_call["name"], "arguments": json.dumps(function_call["arguments"])},
"function": {
"name": function_call["name"],
"arguments": json.dumps(function_call["arguments"]),
},
"type": "function",
}
],
@ -433,7 +449,8 @@ async def ollama_async_streaming(
async for transformed_chunk in streamwrapper:
yield transformed_chunk
except Exception as e:
traceback.print_exc()
verbose_logger.error("LiteLLM.gemini(): Exception occured - {}".format(str(e)))
verbose_logger.debug(traceback.format_exc())
async def ollama_acompletion(
@ -483,7 +500,10 @@ async def ollama_acompletion(
tool_calls=[
{
"id": f"call_{str(uuid.uuid4())}",
"function": {"name": function_call["name"], "arguments": json.dumps(function_call["arguments"])},
"function": {
"name": function_call["name"],
"arguments": json.dumps(function_call["arguments"]),
},
"type": "function",
}
],
@ -491,7 +511,9 @@ async def ollama_acompletion(
model_response["choices"][0]["message"] = message
model_response["choices"][0]["finish_reason"] = "tool_calls"
else:
model_response["choices"][0]["message"]["content"] = response_json["message"]["content"]
model_response["choices"][0]["message"]["content"] = response_json[
"message"
]["content"]
model_response["created"] = int(time.time())
model_response["model"] = "ollama_chat/" + data["model"]
@ -509,5 +531,9 @@ async def ollama_acompletion(
)
return model_response
except Exception as e:
traceback.print_exc()
verbose_logger.error(
"LiteLLM.ollama_acompletion(): Exception occured - {}".format(str(e))
)
verbose_logger.debug(traceback.format_exc())
raise e

View file

@ -1,11 +1,12 @@
import os, types, traceback, copy
import json
from enum import Enum
import types
import traceback
import copy
import time
from typing import Callable, Optional
from litellm.utils import ModelResponse, get_secret, Choices, Message, Usage
from litellm.utils import ModelResponse, Choices, Message, Usage
import litellm
import sys, httpx
import httpx
from litellm import verbose_logger
class PalmError(Exception):
@ -165,7 +166,10 @@ def completion(
choices_list.append(choice_obj)
model_response["choices"] = choices_list
except Exception as e:
traceback.print_exc()
verbose_logger.error(
"litellm.llms.palm.py::completion(): Exception occured - {}".format(str(e))
)
verbose_logger.debug(traceback.format_exc())
raise PalmError(
message=traceback.format_exc(), status_code=response.status_code
)

View file

@ -826,7 +826,7 @@ def anthropic_messages_pt_xml(messages: list):
) # either string or none
if messages[msg_i].get(
"tool_calls", []
): # support assistant tool invoke convertion
): # support assistant tool invoke conversion
assistant_text += convert_to_anthropic_tool_invoke_xml( # type: ignore
messages[msg_i]["tool_calls"]
)
@ -1217,7 +1217,7 @@ def anthropic_messages_pt(messages: list):
if messages[msg_i].get(
"tool_calls", []
): # support assistant tool invoke convertion
): # support assistant tool invoke conversion
assistant_content.extend(
convert_to_anthropic_tool_invoke(messages[msg_i]["tool_calls"])
)

View file

@ -297,24 +297,29 @@ def _convert_gemini_role(role: str) -> Literal["user", "model"]:
def _process_gemini_image(image_url: str) -> PartType:
try:
if "gs://" in image_url:
# Case 1: Images with Cloud Storage URIs
if ".mp4" in image_url and "gs://" in image_url:
# Case 1: Videos with Cloud Storage URIs
part_mime = "video/mp4"
_file_data = FileDataType(mime_type=part_mime, file_uri=image_url)
return PartType(file_data=_file_data)
elif ".pdf" in image_url and "gs://" in image_url:
# Case 2: PDF's with Cloud Storage URIs
part_mime = "application/pdf"
_file_data = FileDataType(mime_type=part_mime, file_uri=image_url)
return PartType(file_data=_file_data)
elif "gs://" in image_url:
# Case 3: Images with Cloud Storage URIs
# The supported MIME types for images include image/png and image/jpeg.
part_mime = "image/png" if "png" in image_url else "image/jpeg"
_file_data = FileDataType(mime_type=part_mime, file_uri=image_url)
return PartType(file_data=_file_data)
elif "https:/" in image_url:
# Case 2: Images with direct links
# Case 4: Images with direct links
image = _load_image_from_url(image_url)
_blob = BlobType(data=image.data, mime_type=image._mime_type)
return PartType(inline_data=_blob)
elif ".mp4" in image_url and "gs://" in image_url:
# Case 3: Videos with Cloud Storage URIs
part_mime = "video/mp4"
_file_data = FileDataType(mime_type=part_mime, file_uri=image_url)
return PartType(file_data=_file_data)
elif "base64" in image_url:
# Case 4: Images with base64 encoding
# Case 5: Images with base64 encoding
import base64, re
# base 64 is passed as data:image/jpeg;base64,<base-64-encoded-image>
@ -390,7 +395,7 @@ def _gemini_convert_messages_with_history(messages: list) -> List[ContentType]:
assistant_content.extend(_parts)
elif messages[msg_i].get(
"tool_calls", []
): # support assistant tool invoke convertion
): # support assistant tool invoke conversion
assistant_content.extend(
convert_to_gemini_tool_call_invoke(messages[msg_i]["tool_calls"])
)
@ -642,9 +647,9 @@ def completion(
prompt = " ".join(
[
message["content"]
message.get("content")
for message in messages
if isinstance(message["content"], str)
if isinstance(message.get("content", None), str)
]
)

View file

@ -365,7 +365,10 @@ async def acompletion(
) # sets the logging event loop if the user does sync streaming (e.g. on proxy for sagemaker calls)
return response
except Exception as e:
traceback.print_exc()
verbose_logger.error(
"litellm.acompletion(): Exception occured - {}".format(str(e))
)
verbose_logger.debug(traceback.format_exc())
custom_llm_provider = custom_llm_provider or "openai"
raise exception_type(
model=model,
@ -478,7 +481,10 @@ def mock_completion(
except Exception as e:
if isinstance(e, openai.APIError):
raise e
traceback.print_exc()
verbose_logger.error(
"litellm.mock_completion(): Exception occured - {}".format(str(e))
)
verbose_logger.debug(traceback.format_exc())
raise Exception("Mock completion response failed")
@ -4449,7 +4455,10 @@ async def ahealth_check(
response = {} # args like remaining ratelimit etc.
return response
except Exception as e:
traceback.print_exc()
verbose_logger.error(
"litellm.ahealth_check(): Exception occured - {}".format(str(e))
)
verbose_logger.debug(traceback.format_exc())
stack_trace = traceback.format_exc()
if isinstance(stack_trace, str):
stack_trace = stack_trace[:1000]

View file

@ -1,6 +1,7 @@
import json
import logging
from logging import Formatter
import sys
class JsonFormatter(Formatter):

View file

@ -56,8 +56,10 @@ router_settings:
litellm_settings:
success_callback: ["langfuse"]
json_logs: true
general_settings:
alerting: ["email"]
key_management_system: "aws_kms"
key_management_settings:
hosted_keys: ["LITELLM_MASTER_KEY"]

View file

@ -76,6 +76,17 @@ class LitellmUserRoles(str, enum.Enum):
return ui_labels.get(self.value, "")
class LitellmTableNames(str, enum.Enum):
"""
Enum for Table Names used by LiteLLM
"""
TEAM_TABLE_NAME: str = "LiteLLM_TeamTable"
USER_TABLE_NAME: str = "LiteLLM_UserTable"
KEY_TABLE_NAME: str = "LiteLLM_VerificationToken"
PROXY_MODEL_TABLE_NAME: str = "LiteLLM_ModelTable"
AlertType = Literal[
"llm_exceptions",
"llm_too_slow",
@ -935,6 +946,7 @@ class KeyManagementSystem(enum.Enum):
AZURE_KEY_VAULT = "azure_key_vault"
AWS_SECRET_MANAGER = "aws_secret_manager"
LOCAL = "local"
AWS_KMS = "aws_kms"
class KeyManagementSettings(LiteLLMBase):
@ -1276,6 +1288,22 @@ class LiteLLM_ErrorLogs(LiteLLMBase):
endTime: Union[str, datetime, None]
class LiteLLM_AuditLogs(LiteLLMBase):
id: str
updated_at: datetime
changed_by: str
action: Literal["created", "updated", "deleted"]
table_name: Literal[
LitellmTableNames.TEAM_TABLE_NAME,
LitellmTableNames.USER_TABLE_NAME,
LitellmTableNames.KEY_TABLE_NAME,
LitellmTableNames.PROXY_MODEL_TABLE_NAME,
]
object_id: str
before_value: Optional[Json] = None
updated_values: Optional[Json] = None
class LiteLLM_SpendLogs_ResponseObject(LiteLLMBase):
response: Optional[List[Union[LiteLLM_SpendLogs, Any]]] = None

View file

@ -88,7 +88,7 @@ class _PROXY_AzureContentSafety(
verbose_proxy_logger.debug(
"Error in Azure Content-Safety: %s", traceback.format_exc()
)
traceback.print_exc()
verbose_proxy_logger.debug(traceback.format_exc())
raise
result = self._compute_result(response)
@ -123,7 +123,12 @@ class _PROXY_AzureContentSafety(
except HTTPException as e:
raise e
except Exception as e:
traceback.print_exc()
verbose_proxy_logger.error(
"litellm.proxy.hooks.azure_content_safety.py::async_pre_call_hook(): Exception occured - {}".format(
str(e)
)
)
verbose_proxy_logger.debug(traceback.format_exc())
async def async_post_call_success_hook(
self,

View file

@ -94,7 +94,12 @@ class _PROXY_BatchRedisRequests(CustomLogger):
except HTTPException as e:
raise e
except Exception as e:
traceback.print_exc()
verbose_proxy_logger.error(
"litellm.proxy.hooks.batch_redis_get.py::async_pre_call_hook(): Exception occured - {}".format(
str(e)
)
)
verbose_proxy_logger.debug(traceback.format_exc())
async def async_get_cache(self, *args, **kwargs):
"""

View file

@ -1,13 +1,13 @@
# What this does?
## Checks if key is allowed to use the cache controls passed in to the completion() call
from typing import Optional
import litellm
from litellm import verbose_logger
from litellm.caching import DualCache
from litellm.proxy._types import UserAPIKeyAuth
from litellm.integrations.custom_logger import CustomLogger
from fastapi import HTTPException
import json, traceback
import traceback
class _PROXY_CacheControlCheck(CustomLogger):
@ -54,4 +54,9 @@ class _PROXY_CacheControlCheck(CustomLogger):
except HTTPException as e:
raise e
except Exception as e:
traceback.print_exc()
verbose_logger.error(
"litellm.proxy.hooks.cache_control_check.py::async_pre_call_hook(): Exception occured - {}".format(
str(e)
)
)
verbose_logger.debug(traceback.format_exc())

View file

@ -1,10 +1,10 @@
from typing import Optional
from litellm import verbose_logger
import litellm
from litellm.caching import DualCache
from litellm.proxy._types import UserAPIKeyAuth
from litellm.integrations.custom_logger import CustomLogger
from fastapi import HTTPException
import json, traceback
import traceback
class _PROXY_MaxBudgetLimiter(CustomLogger):
@ -44,4 +44,9 @@ class _PROXY_MaxBudgetLimiter(CustomLogger):
except HTTPException as e:
raise e
except Exception as e:
traceback.print_exc()
verbose_logger.error(
"litellm.proxy.hooks.max_budget_limiter.py::async_pre_call_hook(): Exception occured - {}".format(
str(e)
)
)
verbose_logger.debug(traceback.format_exc())

View file

@ -8,8 +8,8 @@
# Tell us how we can improve! - Krrish & Ishaan
from typing import Optional, Literal, Union
import litellm, traceback, sys, uuid, json
from typing import Optional, Union
import litellm, traceback, uuid, json # noqa: E401
from litellm.caching import DualCache
from litellm.proxy._types import UserAPIKeyAuth
from litellm.integrations.custom_logger import CustomLogger
@ -21,8 +21,8 @@ from litellm.utils import (
ImageResponse,
StreamingChoices,
)
from datetime import datetime
import aiohttp, asyncio
import aiohttp
import asyncio
class _OPTIONAL_PresidioPIIMasking(CustomLogger):
@ -138,7 +138,12 @@ class _OPTIONAL_PresidioPIIMasking(CustomLogger):
else:
raise Exception(f"Invalid anonymizer response: {redacted_text}")
except Exception as e:
traceback.print_exc()
verbose_proxy_logger.error(
"litellm.proxy.hooks.presidio_pii_masking.py::async_pre_call_hook(): Exception occured - {}".format(
str(e)
)
)
verbose_proxy_logger.debug(traceback.format_exc())
raise e
async def async_pre_call_hook(

View file

@ -204,7 +204,12 @@ class _OPTIONAL_PromptInjectionDetection(CustomLogger):
return e.detail["error"]
raise e
except Exception as e:
traceback.print_exc()
verbose_proxy_logger.error(
"litellm.proxy.hooks.prompt_injection_detection.py::async_pre_call_hook(): Exception occured - {}".format(
str(e)
)
)
verbose_proxy_logger.debug(traceback.format_exc())
async def async_moderation_hook(
self,

View file

@ -23,4 +23,5 @@ general_settings:
master_key: sk-1234
litellm_settings:
callbacks: ["otel"]
callbacks: ["otel"]
store_audit_logs: true

View file

@ -103,6 +103,7 @@ from litellm.proxy.utils import (
update_spend,
encrypt_value,
decrypt_value,
get_error_message_str,
)
from litellm import (
CreateBatchRequest,
@ -112,7 +113,10 @@ from litellm import (
CreateFileRequest,
)
from litellm.proxy.secret_managers.google_kms import load_google_kms
from litellm.proxy.secret_managers.aws_secret_manager import load_aws_secret_manager
from litellm.proxy.secret_managers.aws_secret_manager import (
load_aws_secret_manager,
load_aws_kms,
)
import pydantic
from litellm.proxy._types import *
from litellm.caching import DualCache, RedisCache
@ -125,7 +129,10 @@ from litellm.router import (
AssistantsTypedDict,
)
from litellm.router import ModelInfo as RouterModelInfo
from litellm._logging import verbose_router_logger, verbose_proxy_logger
from litellm._logging import (
verbose_router_logger,
verbose_proxy_logger,
)
from litellm.proxy.auth.handle_jwt import JWTHandler
from litellm.proxy.auth.litellm_license import LicenseCheck
from litellm.proxy.auth.model_checks import (
@ -1471,7 +1478,12 @@ async def user_api_key_auth(
else:
raise Exception()
except Exception as e:
traceback.print_exc()
verbose_proxy_logger.error(
"litellm.proxy.proxy_server.user_api_key_auth(): Exception occured - {}".format(
str(e)
)
)
verbose_proxy_logger.debug(traceback.format_exc())
if isinstance(e, litellm.BudgetExceededError):
raise ProxyException(
message=e.message, type="auth_error", param=None, code=400
@ -2736,10 +2748,12 @@ class ProxyConfig:
load_google_kms(use_google_kms=True)
elif (
key_management_system
== KeyManagementSystem.AWS_SECRET_MANAGER.value
== KeyManagementSystem.AWS_SECRET_MANAGER.value # noqa: F405
):
### LOAD FROM AWS SECRET MANAGER ###
load_aws_secret_manager(use_aws_secret_manager=True)
elif key_management_system == KeyManagementSystem.AWS_KMS.value:
load_aws_kms(use_aws_kms=True)
else:
raise ValueError("Invalid Key Management System selected")
key_management_settings = general_settings.get(
@ -2773,6 +2787,7 @@ class ProxyConfig:
master_key = general_settings.get(
"master_key", litellm.get_secret("LITELLM_MASTER_KEY", None)
)
if master_key and master_key.startswith("os.environ/"):
master_key = litellm.get_secret(master_key)
if not isinstance(master_key, str):
@ -3476,7 +3491,12 @@ async def generate_key_helper_fn(
)
key_data["token_id"] = getattr(create_key_response, "token", None)
except Exception as e:
traceback.print_exc()
verbose_proxy_logger.error(
"litellm.proxy.proxy_server.generate_key_helper_fn(): Exception occured - {}".format(
str(e)
)
)
verbose_proxy_logger.debug(traceback.format_exc())
if isinstance(e, HTTPException):
raise e
raise HTTPException(
@ -3515,7 +3535,12 @@ async def delete_verification_token(tokens: List, user_id: Optional[str] = None)
else:
raise Exception("DB not connected. prisma_client is None")
except Exception as e:
traceback.print_exc()
verbose_proxy_logger.error(
"litellm.proxy.proxy_server.delete_verification_token(): Exception occured - {}".format(
str(e)
)
)
verbose_proxy_logger.debug(traceback.format_exc())
raise e
return deleted_tokens
@ -3676,7 +3701,12 @@ async def async_assistants_data_generator(
done_message = "[DONE]"
yield f"data: {done_message}\n\n"
except Exception as e:
traceback.print_exc()
verbose_proxy_logger.error(
"litellm.proxy.proxy_server.async_assistants_data_generator(): Exception occured - {}".format(
str(e)
)
)
verbose_proxy_logger.debug(traceback.format_exc())
await proxy_logging_obj.post_call_failure_hook(
user_api_key_dict=user_api_key_dict,
original_exception=e,
@ -3686,9 +3716,6 @@ async def async_assistants_data_generator(
f"\033[1;31mAn error occurred: {e}\n\n Debug this by setting `--debug`, e.g. `litellm --model gpt-3.5-turbo --debug`"
)
router_model_names = llm_router.model_names if llm_router is not None else []
if user_debug:
traceback.print_exc()
if isinstance(e, HTTPException):
raise e
else:
@ -3728,7 +3755,12 @@ async def async_data_generator(
done_message = "[DONE]"
yield f"data: {done_message}\n\n"
except Exception as e:
traceback.print_exc()
verbose_proxy_logger.error(
"litellm.proxy.proxy_server.async_data_generator(): Exception occured - {}".format(
str(e)
)
)
verbose_proxy_logger.debug(traceback.format_exc())
await proxy_logging_obj.post_call_failure_hook(
user_api_key_dict=user_api_key_dict,
original_exception=e,
@ -3738,8 +3770,6 @@ async def async_data_generator(
f"\033[1;31mAn error occurred: {e}\n\n Debug this by setting `--debug`, e.g. `litellm --model gpt-3.5-turbo --debug`"
)
router_model_names = llm_router.model_names if llm_router is not None else []
if user_debug:
traceback.print_exc()
if isinstance(e, HTTPException):
raise e
@ -3800,6 +3830,18 @@ def on_backoff(details):
verbose_proxy_logger.debug("Backing off... this was attempt # %s", details["tries"])
def giveup(e):
result = not (
isinstance(e, ProxyException)
and getattr(e, "message", None) is not None
and isinstance(e.message, str)
and "Max parallel request limit reached" in e.message
)
if result:
verbose_proxy_logger.info(json.dumps({"event": "giveup", "exception": str(e)}))
return result
@router.on_event("startup")
async def startup_event():
global prisma_client, master_key, use_background_health_checks, llm_router, llm_model_list, general_settings, proxy_budget_rescheduler_min_time, proxy_budget_rescheduler_max_time, litellm_proxy_admin_name, db_writer_client, store_model_in_db
@ -4084,12 +4126,8 @@ def model_list(
max_tries=litellm.num_retries or 3, # maximum number of retries
max_time=litellm.request_timeout or 60, # maximum total time to retry for
on_backoff=on_backoff, # specifying the function to call on backoff
giveup=lambda e: not (
isinstance(e, ProxyException)
and getattr(e, "message", None) is not None
and isinstance(e.message, str)
and "Max parallel request limit reached" in e.message
), # the result of the logical expression is on the second position
giveup=giveup,
logger=verbose_proxy_logger,
)
async def chat_completion(
request: Request,
@ -4098,6 +4136,7 @@ async def chat_completion(
user_api_key_dict: UserAPIKeyAuth = Depends(user_api_key_auth),
):
global general_settings, user_debug, proxy_logging_obj, llm_model_list
data = {}
try:
body = await request.body()
@ -4386,7 +4425,12 @@ async def chat_completion(
return _chat_response
except Exception as e:
data["litellm_status"] = "fail" # used for alerting
traceback.print_exc()
verbose_proxy_logger.error(
"litellm.proxy.proxy_server.chat_completion(): Exception occured - {}".format(
get_error_message_str(e=e)
)
)
verbose_proxy_logger.debug(traceback.format_exc())
await proxy_logging_obj.post_call_failure_hook(
user_api_key_dict=user_api_key_dict, original_exception=e, request_data=data
)
@ -4397,8 +4441,6 @@ async def chat_completion(
litellm_debug_info,
)
router_model_names = llm_router.model_names if llm_router is not None else []
if user_debug:
traceback.print_exc()
if isinstance(e, HTTPException):
raise ProxyException(
@ -4630,15 +4672,12 @@ async def completion(
await proxy_logging_obj.post_call_failure_hook(
user_api_key_dict=user_api_key_dict, original_exception=e, request_data=data
)
verbose_proxy_logger.debug("EXCEPTION RAISED IN PROXY MAIN.PY")
litellm_debug_info = getattr(e, "litellm_debug_info", "")
verbose_proxy_logger.debug(
"\033[1;31mAn error occurred: %s %s\n\n Debug this by setting `--debug`, e.g. `litellm --model gpt-3.5-turbo --debug`",
e,
litellm_debug_info,
verbose_proxy_logger.error(
"litellm.proxy.proxy_server.completion(): Exception occured - {}".format(
str(e)
)
)
traceback.print_exc()
error_traceback = traceback.format_exc()
verbose_proxy_logger.debug(traceback.format_exc())
error_msg = f"{str(e)}"
raise ProxyException(
message=getattr(e, "message", error_msg),
@ -4848,7 +4887,12 @@ async def embeddings(
e,
litellm_debug_info,
)
traceback.print_exc()
verbose_proxy_logger.error(
"litellm.proxy.proxy_server.embeddings(): Exception occured - {}".format(
str(e)
)
)
verbose_proxy_logger.debug(traceback.format_exc())
if isinstance(e, HTTPException):
raise ProxyException(
message=getattr(e, "message", str(e)),
@ -5027,7 +5071,12 @@ async def image_generation(
await proxy_logging_obj.post_call_failure_hook(
user_api_key_dict=user_api_key_dict, original_exception=e, request_data=data
)
traceback.print_exc()
verbose_proxy_logger.error(
"litellm.proxy.proxy_server.image_generation(): Exception occured - {}".format(
str(e)
)
)
verbose_proxy_logger.debug(traceback.format_exc())
if isinstance(e, HTTPException):
raise ProxyException(
message=getattr(e, "message", str(e)),
@ -5205,7 +5254,12 @@ async def audio_speech(
)
except Exception as e:
traceback.print_exc()
verbose_proxy_logger.error(
"litellm.proxy.proxy_server.audio_speech(): Exception occured - {}".format(
str(e)
)
)
verbose_proxy_logger.debug(traceback.format_exc())
raise e
@ -5394,7 +5448,12 @@ async def audio_transcriptions(
await proxy_logging_obj.post_call_failure_hook(
user_api_key_dict=user_api_key_dict, original_exception=e, request_data=data
)
traceback.print_exc()
verbose_proxy_logger.error(
"litellm.proxy.proxy_server.audio_transcription(): Exception occured - {}".format(
str(e)
)
)
verbose_proxy_logger.debug(traceback.format_exc())
if isinstance(e, HTTPException):
raise ProxyException(
message=getattr(e, "message", str(e.detail)),
@ -5403,7 +5462,6 @@ async def audio_transcriptions(
code=getattr(e, "status_code", status.HTTP_400_BAD_REQUEST),
)
else:
error_traceback = traceback.format_exc()
error_msg = f"{str(e)}"
raise ProxyException(
message=getattr(e, "message", error_msg),
@ -5531,7 +5589,12 @@ async def get_assistants(
await proxy_logging_obj.post_call_failure_hook(
user_api_key_dict=user_api_key_dict, original_exception=e, request_data=data
)
traceback.print_exc()
verbose_proxy_logger.error(
"litellm.proxy.proxy_server.get_assistants(): Exception occured - {}".format(
str(e)
)
)
verbose_proxy_logger.debug(traceback.format_exc())
if isinstance(e, HTTPException):
raise ProxyException(
message=getattr(e, "message", str(e.detail)),
@ -5540,7 +5603,6 @@ async def get_assistants(
code=getattr(e, "status_code", status.HTTP_400_BAD_REQUEST),
)
else:
error_traceback = traceback.format_exc()
error_msg = f"{str(e)}"
raise ProxyException(
message=getattr(e, "message", error_msg),
@ -5660,7 +5722,12 @@ async def create_threads(
await proxy_logging_obj.post_call_failure_hook(
user_api_key_dict=user_api_key_dict, original_exception=e, request_data=data
)
traceback.print_exc()
verbose_proxy_logger.error(
"litellm.proxy.proxy_server.create_threads(): Exception occured - {}".format(
str(e)
)
)
verbose_proxy_logger.debug(traceback.format_exc())
if isinstance(e, HTTPException):
raise ProxyException(
message=getattr(e, "message", str(e.detail)),
@ -5669,7 +5736,6 @@ async def create_threads(
code=getattr(e, "status_code", status.HTTP_400_BAD_REQUEST),
)
else:
error_traceback = traceback.format_exc()
error_msg = f"{str(e)}"
raise ProxyException(
message=getattr(e, "message", error_msg),
@ -5788,7 +5854,12 @@ async def get_thread(
await proxy_logging_obj.post_call_failure_hook(
user_api_key_dict=user_api_key_dict, original_exception=e, request_data=data
)
traceback.print_exc()
verbose_proxy_logger.error(
"litellm.proxy.proxy_server.get_thread(): Exception occured - {}".format(
str(e)
)
)
verbose_proxy_logger.debug(traceback.format_exc())
if isinstance(e, HTTPException):
raise ProxyException(
message=getattr(e, "message", str(e.detail)),
@ -5797,7 +5868,6 @@ async def get_thread(
code=getattr(e, "status_code", status.HTTP_400_BAD_REQUEST),
)
else:
error_traceback = traceback.format_exc()
error_msg = f"{str(e)}"
raise ProxyException(
message=getattr(e, "message", error_msg),
@ -5919,7 +5989,12 @@ async def add_messages(
await proxy_logging_obj.post_call_failure_hook(
user_api_key_dict=user_api_key_dict, original_exception=e, request_data=data
)
traceback.print_exc()
verbose_proxy_logger.error(
"litellm.proxy.proxy_server.add_messages(): Exception occured - {}".format(
str(e)
)
)
verbose_proxy_logger.debug(traceback.format_exc())
if isinstance(e, HTTPException):
raise ProxyException(
message=getattr(e, "message", str(e.detail)),
@ -5928,7 +6003,6 @@ async def add_messages(
code=getattr(e, "status_code", status.HTTP_400_BAD_REQUEST),
)
else:
error_traceback = traceback.format_exc()
error_msg = f"{str(e)}"
raise ProxyException(
message=getattr(e, "message", error_msg),
@ -6046,7 +6120,12 @@ async def get_messages(
await proxy_logging_obj.post_call_failure_hook(
user_api_key_dict=user_api_key_dict, original_exception=e, request_data=data
)
traceback.print_exc()
verbose_proxy_logger.error(
"litellm.proxy.proxy_server.get_messages(): Exception occured - {}".format(
str(e)
)
)
verbose_proxy_logger.debug(traceback.format_exc())
if isinstance(e, HTTPException):
raise ProxyException(
message=getattr(e, "message", str(e.detail)),
@ -6055,7 +6134,6 @@ async def get_messages(
code=getattr(e, "status_code", status.HTTP_400_BAD_REQUEST),
)
else:
error_traceback = traceback.format_exc()
error_msg = f"{str(e)}"
raise ProxyException(
message=getattr(e, "message", error_msg),
@ -6187,7 +6265,12 @@ async def run_thread(
await proxy_logging_obj.post_call_failure_hook(
user_api_key_dict=user_api_key_dict, original_exception=e, request_data=data
)
traceback.print_exc()
verbose_proxy_logger.error(
"litellm.proxy.proxy_server.run_thread(): Exception occured - {}".format(
str(e)
)
)
verbose_proxy_logger.debug(traceback.format_exc())
if isinstance(e, HTTPException):
raise ProxyException(
message=getattr(e, "message", str(e.detail)),
@ -6196,7 +6279,6 @@ async def run_thread(
code=getattr(e, "status_code", status.HTTP_400_BAD_REQUEST),
)
else:
error_traceback = traceback.format_exc()
error_msg = f"{str(e)}"
raise ProxyException(
message=getattr(e, "message", error_msg),
@ -6335,7 +6417,12 @@ async def create_batch(
await proxy_logging_obj.post_call_failure_hook(
user_api_key_dict=user_api_key_dict, original_exception=e, request_data=data
)
traceback.print_exc()
verbose_proxy_logger.error(
"litellm.proxy.proxy_server.create_batch(): Exception occured - {}".format(
str(e)
)
)
verbose_proxy_logger.debug(traceback.format_exc())
if isinstance(e, HTTPException):
raise ProxyException(
message=getattr(e, "message", str(e.detail)),
@ -6344,7 +6431,6 @@ async def create_batch(
code=getattr(e, "status_code", status.HTTP_400_BAD_REQUEST),
)
else:
error_traceback = traceback.format_exc()
error_msg = f"{str(e)}"
raise ProxyException(
message=getattr(e, "message", error_msg),
@ -6478,7 +6564,12 @@ async def retrieve_batch(
await proxy_logging_obj.post_call_failure_hook(
user_api_key_dict=user_api_key_dict, original_exception=e, request_data=data
)
traceback.print_exc()
verbose_proxy_logger.error(
"litellm.proxy.proxy_server.retrieve_batch(): Exception occured - {}".format(
str(e)
)
)
verbose_proxy_logger.debug(traceback.format_exc())
if isinstance(e, HTTPException):
raise ProxyException(
message=getattr(e, "message", str(e.detail)),
@ -6631,7 +6722,12 @@ async def create_file(
await proxy_logging_obj.post_call_failure_hook(
user_api_key_dict=user_api_key_dict, original_exception=e, request_data=data
)
traceback.print_exc()
verbose_proxy_logger.error(
"litellm.proxy.proxy_server.create_file(): Exception occured - {}".format(
str(e)
)
)
verbose_proxy_logger.debug(traceback.format_exc())
if isinstance(e, HTTPException):
raise ProxyException(
message=getattr(e, "message", str(e.detail)),
@ -6640,7 +6736,6 @@ async def create_file(
code=getattr(e, "status_code", status.HTTP_400_BAD_REQUEST),
)
else:
error_traceback = traceback.format_exc()
error_msg = f"{str(e)}"
raise ProxyException(
message=getattr(e, "message", error_msg),
@ -6816,7 +6911,12 @@ async def moderations(
await proxy_logging_obj.post_call_failure_hook(
user_api_key_dict=user_api_key_dict, original_exception=e, request_data=data
)
traceback.print_exc()
verbose_proxy_logger.error(
"litellm.proxy.proxy_server.moderations(): Exception occured - {}".format(
str(e)
)
)
verbose_proxy_logger.debug(traceback.format_exc())
if isinstance(e, HTTPException):
raise ProxyException(
message=getattr(e, "message", str(e)),
@ -6825,7 +6925,6 @@ async def moderations(
code=getattr(e, "status_code", status.HTTP_400_BAD_REQUEST),
)
else:
error_traceback = traceback.format_exc()
error_msg = f"{str(e)}"
raise ProxyException(
message=getattr(e, "message", error_msg),
@ -7115,9 +7214,33 @@ async def generate_key_fn(
)
)
# Enterprise Feature - Audit Logging. Enable with litellm.store_audit_logs = True
if litellm.store_audit_logs is True:
_updated_values = json.dumps(response)
asyncio.create_task(
create_audit_log_for_update(
request_data=LiteLLM_AuditLogs(
id=str(uuid.uuid4()),
updated_at=datetime.now(timezone.utc),
changed_by=user_api_key_dict.user_id
or litellm_proxy_admin_name,
table_name=LitellmTableNames.KEY_TABLE_NAME,
object_id=response.get("token_id", ""),
action="created",
updated_values=_updated_values,
before_value=None,
)
)
)
return GenerateKeyResponse(**response)
except Exception as e:
traceback.print_exc()
verbose_proxy_logger.error(
"litellm.proxy.proxy_server.generate_key_fn(): Exception occured - {}".format(
str(e)
)
)
verbose_proxy_logger.debug(traceback.format_exc())
if isinstance(e, HTTPException):
raise ProxyException(
message=getattr(e, "detail", f"Authentication Error({str(e)})"),
@ -7138,7 +7261,11 @@ async def generate_key_fn(
@router.post(
"/key/update", tags=["key management"], dependencies=[Depends(user_api_key_auth)]
)
async def update_key_fn(request: Request, data: UpdateKeyRequest):
async def update_key_fn(
request: Request,
data: UpdateKeyRequest,
user_api_key_dict: UserAPIKeyAuth = Depends(user_api_key_auth),
):
"""
Update an existing key
"""
@ -7150,6 +7277,16 @@ async def update_key_fn(request: Request, data: UpdateKeyRequest):
if prisma_client is None:
raise Exception("Not connected to DB!")
existing_key_row = await prisma_client.get_data(
token=data.key, table_name="key", query_type="find_unique"
)
if existing_key_row is None:
raise HTTPException(
status_code=404,
detail={"error": f"Team not found, passed team_id={data.team_id}"},
)
# get non default values for key
non_default_values = {}
for k, v in data_json.items():
@ -7176,6 +7313,29 @@ async def update_key_fn(request: Request, data: UpdateKeyRequest):
hashed_token = hash_token(key)
user_api_key_cache.delete_cache(hashed_token)
# Enterprise Feature - Audit Logging. Enable with litellm.store_audit_logs = True
if litellm.store_audit_logs is True:
_updated_values = json.dumps(data_json)
_before_value = existing_key_row.json(exclude_none=True)
_before_value = json.dumps(_before_value)
asyncio.create_task(
create_audit_log_for_update(
request_data=LiteLLM_AuditLogs(
id=str(uuid.uuid4()),
updated_at=datetime.now(timezone.utc),
changed_by=user_api_key_dict.user_id
or litellm_proxy_admin_name,
table_name=LitellmTableNames.KEY_TABLE_NAME,
object_id=data.key,
action="updated",
updated_values=_updated_values,
before_value=_before_value,
)
)
)
return {"key": key, **response["data"]}
# update based on remaining passed in values
except Exception as e:
@ -7238,6 +7398,34 @@ async def delete_key_fn(
):
user_id = None # unless they're admin
# Enterprise Feature - Audit Logging. Enable with litellm.store_audit_logs = True
# we do this after the first for loop, since first for loop is for validation. we only want this inserted after validation passes
if litellm.store_audit_logs is True:
# make an audit log for each team deleted
for key in data.keys:
key_row = await prisma_client.get_data( # type: ignore
token=key, table_name="key", query_type="find_unique"
)
key_row = key_row.json(exclude_none=True)
_key_row = json.dumps(key_row)
asyncio.create_task(
create_audit_log_for_update(
request_data=LiteLLM_AuditLogs(
id=str(uuid.uuid4()),
updated_at=datetime.now(timezone.utc),
changed_by=user_api_key_dict.user_id
or litellm_proxy_admin_name,
table_name=LitellmTableNames.KEY_TABLE_NAME,
object_id=key,
action="deleted",
updated_values="{}",
before_value=_key_row,
)
)
)
number_deleted_keys = await delete_verification_token(
tokens=keys, user_id=user_id
)
@ -9507,7 +9695,12 @@ async def user_info(
}
return response_data
except Exception as e:
traceback.print_exc()
verbose_proxy_logger.error(
"litellm.proxy.proxy_server.user_info(): Exception occured - {}".format(
str(e)
)
)
verbose_proxy_logger.debug(traceback.format_exc())
if isinstance(e, HTTPException):
raise ProxyException(
message=getattr(e, "detail", f"Authentication Error({str(e)})"),
@ -9602,7 +9795,12 @@ async def user_update(data: UpdateUserRequest):
return response
# update based on remaining passed in values
except Exception as e:
traceback.print_exc()
verbose_proxy_logger.error(
"litellm.proxy.proxy_server.user_update(): Exception occured - {}".format(
str(e)
)
)
verbose_proxy_logger.debug(traceback.format_exc())
if isinstance(e, HTTPException):
raise ProxyException(
message=getattr(e, "detail", f"Authentication Error({str(e)})"),
@ -9655,7 +9853,12 @@ async def user_request_model(request: Request):
return {"status": "success"}
# update based on remaining passed in values
except Exception as e:
traceback.print_exc()
verbose_proxy_logger.error(
"litellm.proxy.proxy_server.user_request_model(): Exception occured - {}".format(
str(e)
)
)
verbose_proxy_logger.debug(traceback.format_exc())
if isinstance(e, HTTPException):
raise ProxyException(
message=getattr(e, "detail", f"Authentication Error({str(e)})"),
@ -9697,7 +9900,12 @@ async def user_get_requests():
return {"requests": response}
# update based on remaining passed in values
except Exception as e:
traceback.print_exc()
verbose_proxy_logger.error(
"litellm.proxy.proxy_server.user_get_requests(): Exception occured - {}".format(
str(e)
)
)
verbose_proxy_logger.debug(traceback.format_exc())
if isinstance(e, HTTPException):
raise ProxyException(
message=getattr(e, "detail", f"Authentication Error({str(e)})"),
@ -10087,7 +10295,12 @@ async def update_end_user(
# update based on remaining passed in values
except Exception as e:
traceback.print_exc()
verbose_proxy_logger.error(
"litellm.proxy.proxy_server.update_end_user(): Exception occured - {}".format(
str(e)
)
)
verbose_proxy_logger.debug(traceback.format_exc())
if isinstance(e, HTTPException):
raise ProxyException(
message=getattr(e, "detail", f"Internal Server Error({str(e)})"),
@ -10171,7 +10384,12 @@ async def delete_end_user(
# update based on remaining passed in values
except Exception as e:
traceback.print_exc()
verbose_proxy_logger.error(
"litellm.proxy.proxy_server.delete_end_user(): Exception occured - {}".format(
str(e)
)
)
verbose_proxy_logger.debug(traceback.format_exc())
if isinstance(e, HTTPException):
raise ProxyException(
message=getattr(e, "detail", f"Internal Server Error({str(e)})"),
@ -10365,12 +10583,65 @@ async def new_team(
}
},
)
# Enterprise Feature - Audit Logging. Enable with litellm.store_audit_logs = True
if litellm.store_audit_logs is True:
_updated_values = complete_team_data.json(exclude_none=True)
_updated_values = json.dumps(_updated_values)
asyncio.create_task(
create_audit_log_for_update(
request_data=LiteLLM_AuditLogs(
id=str(uuid.uuid4()),
updated_at=datetime.now(timezone.utc),
changed_by=user_api_key_dict.user_id or litellm_proxy_admin_name,
table_name=LitellmTableNames.TEAM_TABLE_NAME,
object_id=data.team_id,
action="created",
updated_values=_updated_values,
before_value=None,
)
)
)
try:
return team_row.model_dump()
except Exception as e:
return team_row.dict()
async def create_audit_log_for_update(request_data: LiteLLM_AuditLogs):
if premium_user is not True:
return
if litellm.store_audit_logs is not True:
return
if prisma_client is None:
raise Exception("prisma_client is None, no DB connected")
verbose_proxy_logger.debug("creating audit log for %s", request_data)
if isinstance(request_data.updated_values, dict):
request_data.updated_values = json.dumps(request_data.updated_values)
if isinstance(request_data.before_value, dict):
request_data.before_value = json.dumps(request_data.before_value)
_request_data = request_data.dict(exclude_none=True)
try:
await prisma_client.db.litellm_auditlog.create(
data={
**_request_data, # type: ignore
}
)
except Exception as e:
# [Non-Blocking Exception. Do not allow blocking LLM API call]
verbose_proxy_logger.error(f"Failed Creating audit log {e}")
return
@router.post(
"/team/update", tags=["team management"], dependencies=[Depends(user_api_key_auth)]
)
@ -10443,6 +10714,27 @@ async def update_team(
team_id=data.team_id,
)
# Enterprise Feature - Audit Logging. Enable with litellm.store_audit_logs = True
if litellm.store_audit_logs is True:
_before_value = existing_team_row.json(exclude_none=True)
_before_value = json.dumps(_before_value)
_after_value: str = json.dumps(updated_kv)
asyncio.create_task(
create_audit_log_for_update(
request_data=LiteLLM_AuditLogs(
id=str(uuid.uuid4()),
updated_at=datetime.now(timezone.utc),
changed_by=user_api_key_dict.user_id or litellm_proxy_admin_name,
table_name=LitellmTableNames.TEAM_TABLE_NAME,
object_id=data.team_id,
action="updated",
updated_values=_after_value,
before_value=_before_value,
)
)
)
return team_row
@ -10714,6 +11006,35 @@ async def delete_team(
detail={"error": f"Team not found, passed team_id={team_id}"},
)
# Enterprise Feature - Audit Logging. Enable with litellm.store_audit_logs = True
# we do this after the first for loop, since first for loop is for validation. we only want this inserted after validation passes
if litellm.store_audit_logs is True:
# make an audit log for each team deleted
for team_id in data.team_ids:
team_row = await prisma_client.get_data( # type: ignore
team_id=team_id, table_name="team", query_type="find_unique"
)
_team_row = team_row.json(exclude_none=True)
asyncio.create_task(
create_audit_log_for_update(
request_data=LiteLLM_AuditLogs(
id=str(uuid.uuid4()),
updated_at=datetime.now(timezone.utc),
changed_by=user_api_key_dict.user_id
or litellm_proxy_admin_name,
table_name=LitellmTableNames.TEAM_TABLE_NAME,
object_id=team_id,
action="deleted",
updated_values="{}",
before_value=_team_row,
)
)
)
# End of Audit logging
## DELETE ASSOCIATED KEYS
await prisma_client.delete_data(team_id_list=data.team_ids, table_name="key")
## DELETE TEAMS
@ -11371,7 +11692,12 @@ async def add_new_model(
return model_response
except Exception as e:
traceback.print_exc()
verbose_proxy_logger.error(
"litellm.proxy.proxy_server.add_new_model(): Exception occured - {}".format(
str(e)
)
)
verbose_proxy_logger.debug(traceback.format_exc())
if isinstance(e, HTTPException):
raise ProxyException(
message=getattr(e, "detail", f"Authentication Error({str(e)})"),
@ -11485,7 +11811,12 @@ async def update_model(
return model_response
except Exception as e:
traceback.print_exc()
verbose_proxy_logger.error(
"litellm.proxy.proxy_server.update_model(): Exception occured - {}".format(
str(e)
)
)
verbose_proxy_logger.debug(traceback.format_exc())
if isinstance(e, HTTPException):
raise ProxyException(
message=getattr(e, "detail", f"Authentication Error({str(e)})"),
@ -13719,7 +14050,12 @@ async def update_config(config_info: ConfigYAML):
return {"message": "Config updated successfully"}
except Exception as e:
traceback.print_exc()
verbose_proxy_logger.error(
"litellm.proxy.proxy_server.update_config(): Exception occured - {}".format(
str(e)
)
)
verbose_proxy_logger.debug(traceback.format_exc())
if isinstance(e, HTTPException):
raise ProxyException(
message=getattr(e, "detail", f"Authentication Error({str(e)})"),
@ -14192,7 +14528,12 @@ async def get_config():
"available_callbacks": all_available_callbacks,
}
except Exception as e:
traceback.print_exc()
verbose_proxy_logger.error(
"litellm.proxy.proxy_server.get_config(): Exception occured - {}".format(
str(e)
)
)
verbose_proxy_logger.debug(traceback.format_exc())
if isinstance(e, HTTPException):
raise ProxyException(
message=getattr(e, "detail", f"Authentication Error({str(e)})"),
@ -14443,7 +14784,12 @@ async def health_services_endpoint(
}
except Exception as e:
traceback.print_exc()
verbose_proxy_logger.error(
"litellm.proxy.proxy_server.health_services_endpoint(): Exception occured - {}".format(
str(e)
)
)
verbose_proxy_logger.debug(traceback.format_exc())
if isinstance(e, HTTPException):
raise ProxyException(
message=getattr(e, "detail", f"Authentication Error({str(e)})"),
@ -14522,7 +14868,12 @@ async def health_endpoint(
"unhealthy_count": len(unhealthy_endpoints),
}
except Exception as e:
traceback.print_exc()
verbose_proxy_logger.error(
"litellm.proxy.proxy_server.py::health_endpoint(): Exception occured - {}".format(
str(e)
)
)
verbose_proxy_logger.debug(traceback.format_exc())
raise e

View file

@ -243,4 +243,16 @@ model LiteLLM_InvitationLink {
liteLLM_user_table_user LiteLLM_UserTable @relation("UserId", fields: [user_id], references: [user_id])
liteLLM_user_table_created LiteLLM_UserTable @relation("CreatedBy", fields: [created_by], references: [user_id])
liteLLM_user_table_updated LiteLLM_UserTable @relation("UpdatedBy", fields: [updated_by], references: [user_id])
}
model LiteLLM_AuditLog {
id String @id @default(uuid())
updated_at DateTime @default(now())
changed_by String // user or system that performed the action
action String // create, update, delete
table_name String // on of LitellmTableNames.TEAM_TABLE_NAME, LitellmTableNames.USER_TABLE_NAME, LitellmTableNames.PROXY_MODEL_TABLE_NAME,
object_id String // id of the object being audited. This can be the key id, team id, user id, model id
before_value Json? // value of the row
updated_values Json? // value of the row after change
}

View file

@ -8,7 +8,8 @@ Requires:
* `pip install boto3>=1.28.57`
"""
import litellm, os
import litellm
import os
from typing import Optional
from litellm.proxy._types import KeyManagementSystem
@ -38,3 +39,21 @@ def load_aws_secret_manager(use_aws_secret_manager: Optional[bool]):
except Exception as e:
raise e
def load_aws_kms(use_aws_kms: Optional[bool]):
if use_aws_kms is None or use_aws_kms is False:
return
try:
import boto3
validate_environment()
# Create a Secrets Manager client
kms_client = boto3.client("kms", region_name=os.getenv("AWS_REGION_NAME"))
litellm.secret_manager_client = kms_client
litellm._key_management_system = KeyManagementSystem.AWS_KMS
except Exception as e:
raise e

View file

@ -2709,13 +2709,15 @@ def decrypt_value(value: bytes, master_key: str) -> str:
# LiteLLM Admin UI - Non SSO Login
html_form = """
url_to_redirect_to = os.getenv("PROXY_BASE_URL", "")
url_to_redirect_to += "/login"
html_form = f"""
<!DOCTYPE html>
<html>
<head>
<title>LiteLLM Login</title>
<style>
body {
body {{
font-family: Arial, sans-serif;
background-color: #f4f4f4;
margin: 0;
@ -2724,42 +2726,42 @@ html_form = """
justify-content: center;
align-items: center;
height: 100vh;
}
}}
form {
form {{
background-color: #fff;
padding: 20px;
border-radius: 8px;
box-shadow: 0 0 10px rgba(0, 0, 0, 0.1);
}
}}
label {
label {{
display: block;
margin-bottom: 8px;
}
}}
input {
input {{
width: 100%;
padding: 8px;
margin-bottom: 16px;
box-sizing: border-box;
border: 1px solid #ccc;
border-radius: 4px;
}
}}
input[type="submit"] {
input[type="submit"] {{
background-color: #4caf50;
color: #fff;
cursor: pointer;
}
}}
input[type="submit"]:hover {
input[type="submit"]:hover {{
background-color: #45a049;
}
}}
</style>
</head>
<body>
<form action="/login" method="post">
<form action="{url_to_redirect_to}" method="post">
<h2>LiteLLM Login</h2>
<p>By default Username is "admin" and Password is your set LiteLLM Proxy `MASTER_KEY`</p>
@ -2771,8 +2773,6 @@ html_form = """
<input type="password" id="password" name="password" required>
<input type="submit" value="Submit">
</form>
</body>
</html>
"""
@ -2837,3 +2837,17 @@ missing_keys_html_form = """
</body>
</html>
"""
def get_error_message_str(e: Exception) -> str:
error_message = ""
if isinstance(e, HTTPException):
if isinstance(e.detail, str):
error_message = e.detail
elif isinstance(e.detail, dict):
error_message = json.dumps(e.detail)
else:
error_message = str(e)
else:
error_message = str(e)
return error_message

View file

@ -220,8 +220,6 @@ class Router:
[]
) # names of models under litellm_params. ex. azure/chatgpt-v-2
self.deployment_latency_map = {}
### SCHEDULER ###
self.scheduler = Scheduler(polling_interval=polling_interval)
### CACHING ###
cache_type: Literal["local", "redis"] = "local" # default to an in-memory cache
redis_cache = None
@ -259,6 +257,10 @@ class Router:
redis_cache=redis_cache, in_memory_cache=InMemoryCache()
) # use a dual cache (Redis+In-Memory) for tracking cooldowns, usage, etc.
### SCHEDULER ###
self.scheduler = Scheduler(
polling_interval=polling_interval, redis_cache=redis_cache
)
self.default_deployment = None # use this to track the users default deployment, when they want to use model = *
self.default_max_parallel_requests = default_max_parallel_requests
@ -2096,8 +2098,8 @@ class Router:
except Exception as e:
raise e
except Exception as e:
verbose_router_logger.debug(f"An exception occurred - {str(e)}")
traceback.print_exc()
verbose_router_logger.error(f"An exception occurred - {str(e)}")
verbose_router_logger.debug(traceback.format_exc())
raise original_exception
async def async_function_with_retries(self, *args, **kwargs):
@ -4048,6 +4050,12 @@ class Router:
for idx in reversed(invalid_model_indices):
_returned_deployments.pop(idx)
## ORDER FILTERING ## -> if user set 'order' in deployments, return deployments with lowest order (e.g. order=1 > order=2)
if len(_returned_deployments) > 0:
_returned_deployments = litellm.utils._get_order_filtered_deployments(
_returned_deployments
)
return _returned_deployments
def _common_checks_available_deployment(

View file

@ -1,11 +1,9 @@
#### What this does ####
# picks based on response time (for streaming, this is time to first token)
from pydantic import BaseModel, Extra, Field, root_validator
import os, requests, random # type: ignore
from pydantic import BaseModel
from typing import Optional, Union, List, Dict
from datetime import datetime, timedelta
import random
from litellm import verbose_logger
import traceback
from litellm.caching import DualCache
from litellm.integrations.custom_logger import CustomLogger
@ -119,7 +117,12 @@ class LowestCostLoggingHandler(CustomLogger):
if self.test_flag:
self.logged_success += 1
except Exception as e:
traceback.print_exc()
verbose_logger.error(
"litellm.proxy.hooks.prompt_injection_detection.py::async_pre_call_hook(): Exception occured - {}".format(
str(e)
)
)
verbose_logger.debug(traceback.format_exc())
pass
async def async_log_success_event(self, kwargs, response_obj, start_time, end_time):
@ -201,7 +204,12 @@ class LowestCostLoggingHandler(CustomLogger):
if self.test_flag:
self.logged_success += 1
except Exception as e:
traceback.print_exc()
verbose_logger.error(
"litellm.proxy.hooks.prompt_injection_detection.py::async_pre_call_hook(): Exception occured - {}".format(
str(e)
)
)
verbose_logger.debug(traceback.format_exc())
pass
async def async_get_available_deployments(

View file

@ -1,16 +1,16 @@
#### What this does ####
# picks based on response time (for streaming, this is time to first token)
from pydantic import BaseModel, Extra, Field, root_validator # type: ignore
import dotenv, os, requests, random # type: ignore
from pydantic import BaseModel
import random
from typing import Optional, Union, List, Dict
from datetime import datetime, timedelta
import random
import traceback
from litellm.caching import DualCache
from litellm.integrations.custom_logger import CustomLogger
from litellm import ModelResponse
from litellm import token_counter
import litellm
from litellm import verbose_logger
class LiteLLMBase(BaseModel):
@ -165,7 +165,12 @@ class LowestLatencyLoggingHandler(CustomLogger):
if self.test_flag:
self.logged_success += 1
except Exception as e:
traceback.print_exc()
verbose_logger.error(
"litellm.proxy.hooks.prompt_injection_detection.py::async_pre_call_hook(): Exception occured - {}".format(
str(e)
)
)
verbose_logger.debug(traceback.format_exc())
pass
async def async_log_failure_event(self, kwargs, response_obj, start_time, end_time):
@ -229,7 +234,12 @@ class LowestLatencyLoggingHandler(CustomLogger):
# do nothing if it's not a timeout error
return
except Exception as e:
traceback.print_exc()
verbose_logger.error(
"litellm.proxy.hooks.prompt_injection_detection.py::async_pre_call_hook(): Exception occured - {}".format(
str(e)
)
)
verbose_logger.debug(traceback.format_exc())
pass
async def async_log_success_event(self, kwargs, response_obj, start_time, end_time):
@ -352,7 +362,12 @@ class LowestLatencyLoggingHandler(CustomLogger):
if self.test_flag:
self.logged_success += 1
except Exception as e:
traceback.print_exc()
verbose_logger.error(
"litellm.router_strategy.lowest_latency.py::async_log_success_event(): Exception occured - {}".format(
str(e)
)
)
verbose_logger.debug(traceback.format_exc())
pass
def get_available_deployments(

View file

@ -11,6 +11,7 @@ from litellm.integrations.custom_logger import CustomLogger
from litellm._logging import verbose_router_logger
from litellm.utils import print_verbose
class LiteLLMBase(BaseModel):
"""
Implements default functions, all pydantic objects should have.
@ -23,16 +24,20 @@ class LiteLLMBase(BaseModel):
# if using pydantic v1
return self.dict()
class RoutingArgs(LiteLLMBase):
ttl: int = 1 * 60 # 1min (RPM/TPM expire key)
ttl: int = 1 * 60 # 1min (RPM/TPM expire key)
class LowestTPMLoggingHandler(CustomLogger):
test_flag: bool = False
logged_success: int = 0
logged_failure: int = 0
default_cache_time_seconds: int = 1 * 60 * 60 # 1 hour
def __init__(self, router_cache: DualCache, model_list: list, routing_args: dict = {}):
def __init__(
self, router_cache: DualCache, model_list: list, routing_args: dict = {}
):
self.router_cache = router_cache
self.model_list = model_list
self.routing_args = RoutingArgs(**routing_args)
@ -72,19 +77,28 @@ class LowestTPMLoggingHandler(CustomLogger):
request_count_dict = self.router_cache.get_cache(key=tpm_key) or {}
request_count_dict[id] = request_count_dict.get(id, 0) + total_tokens
self.router_cache.set_cache(key=tpm_key, value=request_count_dict, ttl=self.routing_args.ttl)
self.router_cache.set_cache(
key=tpm_key, value=request_count_dict, ttl=self.routing_args.ttl
)
## RPM
request_count_dict = self.router_cache.get_cache(key=rpm_key) or {}
request_count_dict[id] = request_count_dict.get(id, 0) + 1
self.router_cache.set_cache(key=rpm_key, value=request_count_dict, ttl=self.routing_args.ttl)
self.router_cache.set_cache(
key=rpm_key, value=request_count_dict, ttl=self.routing_args.ttl
)
### TESTING ###
if self.test_flag:
self.logged_success += 1
except Exception as e:
traceback.print_exc()
verbose_router_logger.error(
"litellm.router_strategy.lowest_tpm_rpm.py::async_log_success_event(): Exception occured - {}".format(
str(e)
)
)
verbose_router_logger.debug(traceback.format_exc())
pass
async def async_log_success_event(self, kwargs, response_obj, start_time, end_time):
@ -123,19 +137,28 @@ class LowestTPMLoggingHandler(CustomLogger):
request_count_dict = self.router_cache.get_cache(key=tpm_key) or {}
request_count_dict[id] = request_count_dict.get(id, 0) + total_tokens
self.router_cache.set_cache(key=tpm_key, value=request_count_dict, ttl=self.routing_args.ttl)
self.router_cache.set_cache(
key=tpm_key, value=request_count_dict, ttl=self.routing_args.ttl
)
## RPM
request_count_dict = self.router_cache.get_cache(key=rpm_key) or {}
request_count_dict[id] = request_count_dict.get(id, 0) + 1
self.router_cache.set_cache(key=rpm_key, value=request_count_dict, ttl=self.routing_args.ttl)
self.router_cache.set_cache(
key=rpm_key, value=request_count_dict, ttl=self.routing_args.ttl
)
### TESTING ###
if self.test_flag:
self.logged_success += 1
except Exception as e:
traceback.print_exc()
verbose_router_logger.error(
"litellm.router_strategy.lowest_tpm_rpm.py::async_log_success_event(): Exception occured - {}".format(
str(e)
)
)
verbose_router_logger.debug(traceback.format_exc())
pass
def get_available_deployments(

View file

@ -1,19 +1,19 @@
#### What this does ####
# identifies lowest tpm deployment
from pydantic import BaseModel
import dotenv, os, requests, random
import random
from typing import Optional, Union, List, Dict
import datetime as datetime_og
from datetime import datetime
import traceback, asyncio, httpx
import traceback
import httpx
import litellm
from litellm import token_counter
from litellm.caching import DualCache
from litellm.integrations.custom_logger import CustomLogger
from litellm._logging import verbose_router_logger
from litellm._logging import verbose_router_logger, verbose_logger
from litellm.utils import print_verbose, get_utc_datetime
from litellm.types.router import RouterErrors
class LiteLLMBase(BaseModel):
"""
Implements default functions, all pydantic objects should have.
@ -22,12 +22,14 @@ class LiteLLMBase(BaseModel):
def json(self, **kwargs):
try:
return self.model_dump() # noqa
except:
except Exception as e:
# if using pydantic v1
return self.dict()
class RoutingArgs(LiteLLMBase):
ttl: int = 1 * 60 # 1min (RPM/TPM expire key)
ttl: int = 1 * 60 # 1min (RPM/TPM expire key)
class LowestTPMLoggingHandler_v2(CustomLogger):
"""
@ -47,7 +49,9 @@ class LowestTPMLoggingHandler_v2(CustomLogger):
logged_failure: int = 0
default_cache_time_seconds: int = 1 * 60 * 60 # 1 hour
def __init__(self, router_cache: DualCache, model_list: list, routing_args: dict = {}):
def __init__(
self, router_cache: DualCache, model_list: list, routing_args: dict = {}
):
self.router_cache = router_cache
self.model_list = model_list
self.routing_args = RoutingArgs(**routing_args)
@ -104,7 +108,9 @@ class LowestTPMLoggingHandler_v2(CustomLogger):
)
else:
# if local result below limit, check redis ## prevent unnecessary redis checks
result = self.router_cache.increment_cache(key=rpm_key, value=1, ttl=self.routing_args.ttl)
result = self.router_cache.increment_cache(
key=rpm_key, value=1, ttl=self.routing_args.ttl
)
if result is not None and result > deployment_rpm:
raise litellm.RateLimitError(
message="Deployment over defined rpm limit={}. current usage={}".format(
@ -244,12 +250,19 @@ class LowestTPMLoggingHandler_v2(CustomLogger):
# update cache
## TPM
self.router_cache.increment_cache(key=tpm_key, value=total_tokens, ttl=self.routing_args.ttl)
self.router_cache.increment_cache(
key=tpm_key, value=total_tokens, ttl=self.routing_args.ttl
)
### TESTING ###
if self.test_flag:
self.logged_success += 1
except Exception as e:
traceback.print_exc()
verbose_logger.error(
"litellm.proxy.hooks.prompt_injection_detection.py::async_pre_call_hook(): Exception occured - {}".format(
str(e)
)
)
verbose_logger.debug(traceback.format_exc())
pass
async def async_log_success_event(self, kwargs, response_obj, start_time, end_time):
@ -295,7 +308,12 @@ class LowestTPMLoggingHandler_v2(CustomLogger):
if self.test_flag:
self.logged_success += 1
except Exception as e:
traceback.print_exc()
verbose_logger.error(
"litellm.proxy.hooks.prompt_injection_detection.py::async_pre_call_hook(): Exception occured - {}".format(
str(e)
)
)
verbose_logger.debug(traceback.format_exc())
pass
def _common_checks_available_deployment(

View file

@ -1,13 +1,14 @@
import heapq, time
import heapq
from pydantic import BaseModel
from typing import Optional
import enum
from litellm.caching import DualCache
from litellm.caching import DualCache, RedisCache
from litellm import print_verbose
class SchedulerCacheKeys(enum.Enum):
queue = "scheduler:queue"
default_in_memory_ttl = 5 # cache queue in-memory for 5s when redis cache available
class DefaultPriorities(enum.Enum):
@ -25,18 +26,24 @@ class FlowItem(BaseModel):
class Scheduler:
cache: DualCache
def __init__(self, polling_interval: Optional[float] = None):
def __init__(
self,
polling_interval: Optional[float] = None,
redis_cache: Optional[RedisCache] = None,
):
"""
polling_interval: float or null - frequency of polling queue. Default is 3ms.
"""
self.queue: list = []
self.cache = DualCache()
default_in_memory_ttl: Optional[float] = None
if redis_cache is not None:
# if redis-cache available frequently poll that instead of using in-memory.
default_in_memory_ttl = SchedulerCacheKeys.default_in_memory_ttl.value
self.cache = DualCache(
redis_cache=redis_cache, default_in_memory_ttl=default_in_memory_ttl
)
self.polling_interval = polling_interval or 0.03 # default to 3ms
def update_variables(self, cache: Optional[DualCache] = None):
if cache is not None:
self.cache = cache
async def add_request(self, request: FlowItem):
# We use the priority directly, as lower values indicate higher priority
# get the queue

View file

@ -198,7 +198,11 @@ async def test_aarun_thread_litellm(sync_mode, provider, is_streaming):
)
assert isinstance(messages.data[0], Message)
else:
pytest.fail("An unexpected error occurred when running the thread")
pytest.fail(
"An unexpected error occurred when running the thread, {}".format(
run
)
)
else:
added_message = await litellm.a_add_message(**data)
@ -226,4 +230,8 @@ async def test_aarun_thread_litellm(sync_mode, provider, is_streaming):
)
assert isinstance(messages.data[0], Message)
else:
pytest.fail("An unexpected error occurred when running the thread")
pytest.fail(
"An unexpected error occurred when running the thread, {}".format(
run
)
)

View file

@ -2169,6 +2169,7 @@ def test_completion_azure_key_completion_arg():
logprobs=True,
max_tokens=10,
)
print(f"response: {response}")
print("Hidden Params", response._hidden_params)
@ -2544,6 +2545,8 @@ def test_replicate_custom_prompt_dict():
"content": "what is yc write 1 paragraph",
}
],
mock_response="Hello world",
mock_response="hello world",
repetition_penalty=0.1,
num_retries=3,
)

View file

@ -76,7 +76,7 @@ def test_image_generation_azure_dall_e_3():
)
print(f"response: {response}")
assert len(response.data) > 0
except litellm.RateLimitError as e:
except litellm.InternalServerError as e:
pass
except litellm.ContentPolicyViolationError:
pass # OpenAI randomly raises these errors - skip when they occur

View file

@ -102,18 +102,18 @@ async def test_get_available_deployments_custom_price():
@pytest.mark.asyncio
async def test_lowest_cost_routing():
"""
Test if router returns model with the lowest cost
Test if router, returns model with the lowest cost
"""
model_list = [
{
"model_name": "gpt-3.5-turbo",
"model_name": "gpt-4",
"litellm_params": {"model": "gpt-4"},
"model_info": {"id": "openai-gpt-4"},
},
{
"model_name": "gpt-3.5-turbo",
"litellm_params": {"model": "groq/llama3-8b-8192"},
"model_info": {"id": "groq-llama"},
"litellm_params": {"model": "gpt-3.5-turbo"},
"model_info": {"id": "gpt-3.5-turbo"},
},
]
@ -127,7 +127,7 @@ async def test_lowest_cost_routing():
print(
response._hidden_params["model_id"]
) # expect groq-llama, since groq/llama has lowest cost
assert "groq-llama" == response._hidden_params["model_id"]
assert "gpt-3.5-turbo" == response._hidden_params["model_id"]
async def _deploy(lowest_cost_logger, deployment_id, tokens_used, duration):

View file

@ -38,6 +38,48 @@ def test_router_sensitive_keys():
assert "special-key" not in str(e)
def test_router_order():
"""
Asserts for 2 models in a model group, model with order=1 always called first
"""
router = Router(
model_list=[
{
"model_name": "gpt-3.5-turbo",
"litellm_params": {
"model": "gpt-4o",
"api_key": os.getenv("OPENAI_API_KEY"),
"mock_response": "Hello world",
"order": 1,
},
"model_info": {"id": "1"},
},
{
"model_name": "gpt-3.5-turbo",
"litellm_params": {
"model": "gpt-4o",
"api_key": "bad-key",
"mock_response": Exception("this is a bad key"),
"order": 2,
},
"model_info": {"id": "2"},
},
],
num_retries=0,
allowed_fails=0,
enable_pre_call_checks=True,
)
for _ in range(100):
response = router.completion(
model="gpt-3.5-turbo",
messages=[{"role": "user", "content": "Hey, how's it going?"}],
)
assert isinstance(response, litellm.ModelResponse)
assert response._hidden_params["model_id"] == "1"
@pytest.mark.parametrize("num_retries", [None, 2])
@pytest.mark.parametrize("max_retries", [None, 4])
def test_router_num_retries_init(num_retries, max_retries):

View file

@ -186,3 +186,13 @@ def test_load_test_token_counter(model):
total_time = end_time - start_time
print("model={}, total test time={}".format(model, total_time))
assert total_time < 10, f"Total encoding time > 10s, {total_time}"
def test_openai_token_with_image_and_text():
model = "gpt-4o"
full_request = {'model': 'gpt-4o', 'tools': [{'type': 'function', 'function': {'name': 'json', 'parameters': {'type': 'object', 'required': ['clause'], 'properties': {'clause': {'type': 'string'}}}, 'description': 'Respond with a JSON object.'}}], 'logprobs': False, 'messages': [{'role': 'user', 'content': [{'text': '\n Just some long text, long long text, and you know it will be longer than 7 tokens definetly.', 'type': 'text'}]}], 'tool_choice': {'type': 'function', 'function': {'name': 'json'}}, 'exclude_models': [], 'disable_fallback': False, 'exclude_providers': []}
messages = full_request.get("messages", [])
token_count = token_counter(model=model, messages=messages)
print(token_count)
test_openai_token_with_image_and_text()

View file

@ -1374,8 +1374,12 @@ class Logging:
callback_func=callback,
)
except Exception as e:
traceback.print_exc()
print_verbose(
verbose_logger.error(
"litellm.Logging.pre_call(): Exception occured - {}".format(
str(e)
)
)
verbose_logger.debug(
f"LiteLLM.LoggingError: [Non-Blocking] Exception occurred while input logging with integrations {traceback.format_exc()}"
)
print_verbose(
@ -4062,6 +4066,7 @@ def openai_token_counter(
for c in value:
if c["type"] == "text":
text += c["text"]
num_tokens += len(encoding.encode(c["text"], disallowed_special=()))
elif c["type"] == "image_url":
if isinstance(c["image_url"], dict):
image_url_dict = c["image_url"]
@ -6194,6 +6199,27 @@ def calculate_max_parallel_requests(
return None
def _get_order_filtered_deployments(healthy_deployments: List[Dict]) -> List:
min_order = min(
(
deployment["litellm_params"]["order"]
for deployment in healthy_deployments
if "order" in deployment["litellm_params"]
),
default=None,
)
if min_order is not None:
filtered_deployments = [
deployment
for deployment in healthy_deployments
if deployment["litellm_params"].get("order") == min_order
]
return filtered_deployments
return healthy_deployments
def _get_model_region(
custom_llm_provider: str, litellm_params: LiteLLM_Params
) -> Optional[str]:
@ -7336,6 +7362,10 @@ def get_provider_fields(custom_llm_provider: str) -> List[ProviderField]:
if custom_llm_provider == "databricks":
return litellm.DatabricksConfig().get_required_params()
elif custom_llm_provider == "ollama":
return litellm.OllamaConfig().get_required_params()
else:
return []
@ -9782,8 +9812,7 @@ def exception_type(
elif custom_llm_provider == "azure":
if "Internal server error" in error_str:
exception_mapping_worked = True
raise APIError(
status_code=500,
raise litellm.InternalServerError(
message=f"AzureException Internal server error - {original_exception.message}",
llm_provider="azure",
model=model,
@ -10033,6 +10062,8 @@ def get_secret(
):
key_management_system = litellm._key_management_system
key_management_settings = litellm._key_management_settings
args = locals()
if secret_name.startswith("os.environ/"):
secret_name = secret_name.replace("os.environ/", "")
@ -10120,13 +10151,13 @@ def get_secret(
key_manager = "local"
if (
key_manager == KeyManagementSystem.AZURE_KEY_VAULT
key_manager == KeyManagementSystem.AZURE_KEY_VAULT.value
or type(client).__module__ + "." + type(client).__name__
== "azure.keyvault.secrets._client.SecretClient"
): # support Azure Secret Client - from azure.keyvault.secrets import SecretClient
secret = client.get_secret(secret_name).value
elif (
key_manager == KeyManagementSystem.GOOGLE_KMS
key_manager == KeyManagementSystem.GOOGLE_KMS.value
or client.__class__.__name__ == "KeyManagementServiceClient"
):
encrypted_secret: Any = os.getenv(secret_name)
@ -10154,6 +10185,25 @@ def get_secret(
secret = response.plaintext.decode(
"utf-8"
) # assumes the original value was encoded with utf-8
elif key_manager == KeyManagementSystem.AWS_KMS.value:
"""
Only check the tokens which start with 'aws_kms/'. This prevents latency impact caused by checking all keys.
"""
encrypted_value = os.getenv(secret_name, None)
if encrypted_value is None:
raise Exception("encrypted value for AWS KMS cannot be None.")
# Decode the base64 encoded ciphertext
ciphertext_blob = base64.b64decode(encrypted_value)
# Set up the parameters for the decrypt call
params = {"CiphertextBlob": ciphertext_blob}
# Perform the decryption
response = client.decrypt(**params)
# Extract and decode the plaintext
plaintext = response["Plaintext"]
secret = plaintext.decode("utf-8")
elif key_manager == KeyManagementSystem.AWS_SECRET_MANAGER.value:
try:
get_secret_value_response = client.get_secret_value(
@ -10174,10 +10224,14 @@ def get_secret(
for k, v in secret_dict.items():
secret = v
print_verbose(f"secret: {secret}")
elif key_manager == "local":
secret = os.getenv(secret_name)
else: # assume the default is infisicial client
secret = client.get_secret(secret_name).secret_value
except Exception as e: # check if it's in os.environ
print_verbose(f"An exception occurred - {str(e)}")
verbose_logger.error(
f"An exception occurred - {str(e)}\n\n{traceback.format_exc()}"
)
secret = os.getenv(secret_name)
try:
secret_value_as_bool = ast.literal_eval(secret)
@ -10511,7 +10565,12 @@ class CustomStreamWrapper:
"finish_reason": finish_reason,
}
except Exception as e:
traceback.print_exc()
verbose_logger.error(
"litellm.CustomStreamWrapper.handle_predibase_chunk(): Exception occured - {}".format(
str(e)
)
)
verbose_logger.debug(traceback.format_exc())
raise e
def handle_huggingface_chunk(self, chunk):
@ -10555,7 +10614,12 @@ class CustomStreamWrapper:
"finish_reason": finish_reason,
}
except Exception as e:
traceback.print_exc()
verbose_logger.error(
"litellm.CustomStreamWrapper.handle_huggingface_chunk(): Exception occured - {}".format(
str(e)
)
)
verbose_logger.debug(traceback.format_exc())
raise e
def handle_ai21_chunk(self, chunk): # fake streaming
@ -10790,7 +10854,12 @@ class CustomStreamWrapper:
"usage": usage,
}
except Exception as e:
traceback.print_exc()
verbose_logger.error(
"litellm.CustomStreamWrapper.handle_openai_chat_completion_chunk(): Exception occured - {}".format(
str(e)
)
)
verbose_logger.debug(traceback.format_exc())
raise e
def handle_azure_text_completion_chunk(self, chunk):
@ -10871,7 +10940,12 @@ class CustomStreamWrapper:
else:
return ""
except:
traceback.print_exc()
verbose_logger.error(
"litellm.CustomStreamWrapper.handle_baseten_chunk(): Exception occured - {}".format(
str(e)
)
)
verbose_logger.debug(traceback.format_exc())
return ""
def handle_cloudlfare_stream(self, chunk):
@ -11070,7 +11144,12 @@ class CustomStreamWrapper:
"is_finished": True,
}
except:
traceback.print_exc()
verbose_logger.error(
"litellm.CustomStreamWrapper.handle_clarifai_chunk(): Exception occured - {}".format(
str(e)
)
)
verbose_logger.debug(traceback.format_exc())
return ""
def model_response_creator(self):
@ -11557,7 +11636,12 @@ class CustomStreamWrapper:
tool["type"] = "function"
model_response.choices[0].delta = Delta(**_json_delta)
except Exception as e:
traceback.print_exc()
verbose_logger.error(
"litellm.CustomStreamWrapper.chunk_creator(): Exception occured - {}".format(
str(e)
)
)
verbose_logger.debug(traceback.format_exc())
model_response.choices[0].delta = Delta()
else:
try:

14
poetry.lock generated
View file

@ -1,4 +1,4 @@
# This file is automatically @generated by Poetry 1.7.1 and should not be changed by hand.
# This file is automatically @generated by Poetry 1.8.3 and should not be changed by hand.
[[package]]
name = "aiohttp"
@ -2114,6 +2114,7 @@ files = [
{file = "PyYAML-6.0.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:69b023b2b4daa7548bcfbd4aa3da05b3a74b772db9e23b982788168117739938"},
{file = "PyYAML-6.0.1-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:81e0b275a9ecc9c0c0c07b4b90ba548307583c125f54d5b6946cfee6360c733d"},
{file = "PyYAML-6.0.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ba336e390cd8e4d1739f42dfe9bb83a3cc2e80f567d8805e11b46f4a943f5515"},
{file = "PyYAML-6.0.1-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:326c013efe8048858a6d312ddd31d56e468118ad4cdeda36c719bf5bb6192290"},
{file = "PyYAML-6.0.1-cp310-cp310-win32.whl", hash = "sha256:bd4af7373a854424dabd882decdc5579653d7868b8fb26dc7d0e99f823aa5924"},
{file = "PyYAML-6.0.1-cp310-cp310-win_amd64.whl", hash = "sha256:fd1592b3fdf65fff2ad0004b5e363300ef59ced41c2e6b3a99d4089fa8c5435d"},
{file = "PyYAML-6.0.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:6965a7bc3cf88e5a1c3bd2e0b5c22f8d677dc88a455344035f03399034eb3007"},
@ -2121,8 +2122,15 @@ files = [
{file = "PyYAML-6.0.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:42f8152b8dbc4fe7d96729ec2b99c7097d656dc1213a3229ca5383f973a5ed6d"},
{file = "PyYAML-6.0.1-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:062582fca9fabdd2c8b54a3ef1c978d786e0f6b3a1510e0ac93ef59e0ddae2bc"},
{file = "PyYAML-6.0.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d2b04aac4d386b172d5b9692e2d2da8de7bfb6c387fa4f801fbf6fb2e6ba4673"},
{file = "PyYAML-6.0.1-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:e7d73685e87afe9f3b36c799222440d6cf362062f78be1013661b00c5c6f678b"},
{file = "PyYAML-6.0.1-cp311-cp311-win32.whl", hash = "sha256:1635fd110e8d85d55237ab316b5b011de701ea0f29d07611174a1b42f1444741"},
{file = "PyYAML-6.0.1-cp311-cp311-win_amd64.whl", hash = "sha256:bf07ee2fef7014951eeb99f56f39c9bb4af143d8aa3c21b1677805985307da34"},
{file = "PyYAML-6.0.1-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:855fb52b0dc35af121542a76b9a84f8d1cd886ea97c84703eaa6d88e37a2ad28"},
{file = "PyYAML-6.0.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:40df9b996c2b73138957fe23a16a4f0ba614f4c0efce1e9406a184b6d07fa3a9"},
{file = "PyYAML-6.0.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6c22bec3fbe2524cde73d7ada88f6566758a8f7227bfbf93a408a9d86bcc12a0"},
{file = "PyYAML-6.0.1-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:8d4e9c88387b0f5c7d5f281e55304de64cf7f9c0021a3525bd3b1c542da3b0e4"},
{file = "PyYAML-6.0.1-cp312-cp312-win32.whl", hash = "sha256:d483d2cdf104e7c9fa60c544d92981f12ad66a457afae824d146093b8c294c54"},
{file = "PyYAML-6.0.1-cp312-cp312-win_amd64.whl", hash = "sha256:0d3304d8c0adc42be59c5f8a4d9e3d7379e6955ad754aa9d6ab7a398b59dd1df"},
{file = "PyYAML-6.0.1-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:50550eb667afee136e9a77d6dc71ae76a44df8b3e51e41b77f6de2932bfe0f47"},
{file = "PyYAML-6.0.1-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1fe35611261b29bd1de0070f0b2f47cb6ff71fa6595c077e42bd0c419fa27b98"},
{file = "PyYAML-6.0.1-cp36-cp36m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:704219a11b772aea0d8ecd7058d0082713c3562b4e271b849ad7dc4a5c90c13c"},
@ -2139,6 +2147,7 @@ files = [
{file = "PyYAML-6.0.1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a0cd17c15d3bb3fa06978b4e8958dcdc6e0174ccea823003a106c7d4d7899ac5"},
{file = "PyYAML-6.0.1-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:28c119d996beec18c05208a8bd78cbe4007878c6dd15091efb73a30e90539696"},
{file = "PyYAML-6.0.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7e07cbde391ba96ab58e532ff4803f79c4129397514e1413a7dc761ccd755735"},
{file = "PyYAML-6.0.1-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:49a183be227561de579b4a36efbb21b3eab9651dd81b1858589f796549873dd6"},
{file = "PyYAML-6.0.1-cp38-cp38-win32.whl", hash = "sha256:184c5108a2aca3c5b3d3bf9395d50893a7ab82a38004c8f61c258d4428e80206"},
{file = "PyYAML-6.0.1-cp38-cp38-win_amd64.whl", hash = "sha256:1e2722cc9fbb45d9b87631ac70924c11d3a401b2d7f410cc0e3bbf249f2dca62"},
{file = "PyYAML-6.0.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:9eb6caa9a297fc2c2fb8862bc5370d0303ddba53ba97e71f08023b6cd73d16a8"},
@ -2146,6 +2155,7 @@ files = [
{file = "PyYAML-6.0.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5773183b6446b2c99bb77e77595dd486303b4faab2b086e7b17bc6bef28865f6"},
{file = "PyYAML-6.0.1-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:b786eecbdf8499b9ca1d697215862083bd6d2a99965554781d0d8d1ad31e13a0"},
{file = "PyYAML-6.0.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bc1bf2925a1ecd43da378f4db9e4f799775d6367bdb94671027b73b393a7c42c"},
{file = "PyYAML-6.0.1-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:04ac92ad1925b2cff1db0cfebffb6ffc43457495c9b3c39d3fcae417d7125dc5"},
{file = "PyYAML-6.0.1-cp39-cp39-win32.whl", hash = "sha256:faca3bdcf85b2fc05d06ff3fbc1f83e1391b3e724afa3feba7d13eeab355484c"},
{file = "PyYAML-6.0.1-cp39-cp39-win_amd64.whl", hash = "sha256:510c9deebc5c0225e8c96813043e62b680ba2f9c50a08d3724c7f28a747d1486"},
{file = "PyYAML-6.0.1.tar.gz", hash = "sha256:bfdf460b1736c775f2ba9f6a92bca30bc2095067b8a9d77876d1fad6cc3b4a43"},
@ -3140,4 +3150,4 @@ proxy = ["PyJWT", "apscheduler", "backoff", "cryptography", "fastapi", "fastapi-
[metadata]
lock-version = "2.0"
python-versions = ">=3.8.1,<4.0, !=3.9.7"
content-hash = "a54d969a1a707413e7cd3ce869d14ef73dd41bb9d36ebf0fb878d9e929bc15b3"
content-hash = "6a37992b63b11d254f5f40687bd96898b1d9515728f663f30dcc81c4ef8df7b7"

View file

@ -1,6 +1,6 @@
[tool.poetry]
name = "litellm"
version = "1.40.3"
version = "1.40.5"
description = "Library to easily interface with LLM API providers"
authors = ["BerriAI"]
license = "MIT"
@ -62,7 +62,8 @@ extra_proxy = [
"azure-identity",
"azure-keyvault-secrets",
"google-cloud-kms",
"resend"
"resend",
"pynacl"
]
[tool.poetry.scripts]
@ -79,7 +80,7 @@ requires = ["poetry-core", "wheel"]
build-backend = "poetry.core.masonry.api"
[tool.commitizen]
version = "1.40.3"
version = "1.40.5"
version_files = [
"pyproject.toml:^version"
]

View file

@ -243,4 +243,16 @@ model LiteLLM_InvitationLink {
liteLLM_user_table_user LiteLLM_UserTable @relation("UserId", fields: [user_id], references: [user_id])
liteLLM_user_table_created LiteLLM_UserTable @relation("CreatedBy", fields: [created_by], references: [user_id])
liteLLM_user_table_updated LiteLLM_UserTable @relation("UpdatedBy", fields: [updated_by], references: [user_id])
}
model LiteLLM_AuditLog {
id String @id @default(uuid())
updated_at DateTime @default(now())
changed_by String // user or system that performed the action
action String // create, update, delete
table_name String // on of LitellmTableNames.TEAM_TABLE_NAME, LitellmTableNames.USER_TABLE_NAME, LitellmTableNames.PROXY_MODEL_TABLE_NAME,
object_id String // id of the object being audited. This can be the key id, team id, user id, model id
before_value Json? // value of the row
updated_values Json? // value of the row after change
}

View file

@ -145,6 +145,7 @@ enum Providers {
OpenAI_Compatible = "OpenAI-Compatible Endpoints (Groq, Together AI, Mistral AI, etc.)",
Vertex_AI = "Vertex AI (Anthropic, Gemini, etc.)",
Databricks = "Databricks",
Ollama = "Ollama",
}
const provider_map: Record<string, string> = {
@ -156,6 +157,7 @@ const provider_map: Record<string, string> = {
OpenAI_Compatible: "openai",
Vertex_AI: "vertex_ai",
Databricks: "databricks",
Ollama: "ollama",
};
const retry_policy_map: Record<string, string> = {
@ -1747,6 +1749,7 @@ const ModelDashboard: React.FC<ModelDashboardProps> = ({
)}
{selectedProvider != Providers.Bedrock &&
selectedProvider != Providers.Vertex_AI &&
selectedProvider != Providers.Ollama &&
(dynamicProviderForm === undefined ||
dynamicProviderForm.fields.length == 0) && (
<Form.Item