forked from phoenix/litellm-mirror
Merge branch 'main' into litellm_bedrock_converse_api
This commit is contained in:
commit
26993c067e
82 changed files with 2540 additions and 1147 deletions
62
README.md
62
README.md
|
@ -225,37 +225,37 @@ curl 'http://0.0.0.0:4000/key/generate' \
|
||||||
## Supported Providers ([Docs](https://docs.litellm.ai/docs/providers))
|
## Supported Providers ([Docs](https://docs.litellm.ai/docs/providers))
|
||||||
|
|
||||||
| Provider | [Completion](https://docs.litellm.ai/docs/#basic-usage) | [Streaming](https://docs.litellm.ai/docs/completion/stream#streaming-responses) | [Async Completion](https://docs.litellm.ai/docs/completion/stream#async-completion) | [Async Streaming](https://docs.litellm.ai/docs/completion/stream#async-streaming) | [Async Embedding](https://docs.litellm.ai/docs/embedding/supported_embedding) | [Async Image Generation](https://docs.litellm.ai/docs/image_generation) |
|
| Provider | [Completion](https://docs.litellm.ai/docs/#basic-usage) | [Streaming](https://docs.litellm.ai/docs/completion/stream#streaming-responses) | [Async Completion](https://docs.litellm.ai/docs/completion/stream#async-completion) | [Async Streaming](https://docs.litellm.ai/docs/completion/stream#async-streaming) | [Async Embedding](https://docs.litellm.ai/docs/embedding/supported_embedding) | [Async Image Generation](https://docs.litellm.ai/docs/image_generation) |
|
||||||
| ----------------------------------------------------------------------------------- | ------------------------------------------------------- | ------------------------------------------------------------------------------- | ----------------------------------------------------------------------------------- | --------------------------------------------------------------------------------- | ----------------------------------------------------------------------------- | ----------------------------------------------------------------------- |
|
|-------------------------------------------------------------------------------------|---------------------------------------------------------|---------------------------------------------------------------------------------|-------------------------------------------------------------------------------------|-----------------------------------------------------------------------------------|-------------------------------------------------------------------------------|-------------------------------------------------------------------------|
|
||||||
| [openai](https://docs.litellm.ai/docs/providers/openai) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ |
|
| [openai](https://docs.litellm.ai/docs/providers/openai) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ |
|
||||||
| [azure](https://docs.litellm.ai/docs/providers/azure) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ |
|
| [azure](https://docs.litellm.ai/docs/providers/azure) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ |
|
||||||
| [aws - sagemaker](https://docs.litellm.ai/docs/providers/aws_sagemaker) | ✅ | ✅ | ✅ | ✅ | ✅ |
|
| [aws - sagemaker](https://docs.litellm.ai/docs/providers/aws_sagemaker) | ✅ | ✅ | ✅ | ✅ | ✅ | |
|
||||||
| [aws - bedrock](https://docs.litellm.ai/docs/providers/bedrock) | ✅ | ✅ | ✅ | ✅ | ✅ |
|
| [aws - bedrock](https://docs.litellm.ai/docs/providers/bedrock) | ✅ | ✅ | ✅ | ✅ | ✅ | |
|
||||||
| [google - vertex_ai](https://docs.litellm.ai/docs/providers/vertex) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅
|
| [google - vertex_ai](https://docs.litellm.ai/docs/providers/vertex) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ |
|
||||||
| [google - palm](https://docs.litellm.ai/docs/providers/palm) | ✅ | ✅ | ✅ | ✅ |
|
| [google - palm](https://docs.litellm.ai/docs/providers/palm) | ✅ | ✅ | ✅ | ✅ | | |
|
||||||
| [google AI Studio - gemini](https://docs.litellm.ai/docs/providers/gemini) | ✅ | ✅ | ✅ | ✅ | |
|
| [google AI Studio - gemini](https://docs.litellm.ai/docs/providers/gemini) | ✅ | ✅ | ✅ | ✅ | | |
|
||||||
| [mistral ai api](https://docs.litellm.ai/docs/providers/mistral) | ✅ | ✅ | ✅ | ✅ | ✅ |
|
| [mistral ai api](https://docs.litellm.ai/docs/providers/mistral) | ✅ | ✅ | ✅ | ✅ | ✅ | |
|
||||||
| [cloudflare AI Workers](https://docs.litellm.ai/docs/providers/cloudflare_workers) | ✅ | ✅ | ✅ | ✅ |
|
| [cloudflare AI Workers](https://docs.litellm.ai/docs/providers/cloudflare_workers) | ✅ | ✅ | ✅ | ✅ | | |
|
||||||
| [cohere](https://docs.litellm.ai/docs/providers/cohere) | ✅ | ✅ | ✅ | ✅ | ✅ |
|
| [cohere](https://docs.litellm.ai/docs/providers/cohere) | ✅ | ✅ | ✅ | ✅ | ✅ | |
|
||||||
| [anthropic](https://docs.litellm.ai/docs/providers/anthropic) | ✅ | ✅ | ✅ | ✅ |
|
| [anthropic](https://docs.litellm.ai/docs/providers/anthropic) | ✅ | ✅ | ✅ | ✅ | | |
|
||||||
| [huggingface](https://docs.litellm.ai/docs/providers/huggingface) | ✅ | ✅ | ✅ | ✅ | ✅ |
|
| [huggingface](https://docs.litellm.ai/docs/providers/huggingface) | ✅ | ✅ | ✅ | ✅ | ✅ | |
|
||||||
| [replicate](https://docs.litellm.ai/docs/providers/replicate) | ✅ | ✅ | ✅ | ✅ |
|
| [replicate](https://docs.litellm.ai/docs/providers/replicate) | ✅ | ✅ | ✅ | ✅ | | |
|
||||||
| [together_ai](https://docs.litellm.ai/docs/providers/togetherai) | ✅ | ✅ | ✅ | ✅ |
|
| [together_ai](https://docs.litellm.ai/docs/providers/togetherai) | ✅ | ✅ | ✅ | ✅ | | |
|
||||||
| [openrouter](https://docs.litellm.ai/docs/providers/openrouter) | ✅ | ✅ | ✅ | ✅ |
|
| [openrouter](https://docs.litellm.ai/docs/providers/openrouter) | ✅ | ✅ | ✅ | ✅ | | |
|
||||||
| [ai21](https://docs.litellm.ai/docs/providers/ai21) | ✅ | ✅ | ✅ | ✅ |
|
| [ai21](https://docs.litellm.ai/docs/providers/ai21) | ✅ | ✅ | ✅ | ✅ | | |
|
||||||
| [baseten](https://docs.litellm.ai/docs/providers/baseten) | ✅ | ✅ | ✅ | ✅ |
|
| [baseten](https://docs.litellm.ai/docs/providers/baseten) | ✅ | ✅ | ✅ | ✅ | | |
|
||||||
| [vllm](https://docs.litellm.ai/docs/providers/vllm) | ✅ | ✅ | ✅ | ✅ |
|
| [vllm](https://docs.litellm.ai/docs/providers/vllm) | ✅ | ✅ | ✅ | ✅ | | |
|
||||||
| [nlp_cloud](https://docs.litellm.ai/docs/providers/nlp_cloud) | ✅ | ✅ | ✅ | ✅ |
|
| [nlp_cloud](https://docs.litellm.ai/docs/providers/nlp_cloud) | ✅ | ✅ | ✅ | ✅ | | |
|
||||||
| [aleph alpha](https://docs.litellm.ai/docs/providers/aleph_alpha) | ✅ | ✅ | ✅ | ✅ |
|
| [aleph alpha](https://docs.litellm.ai/docs/providers/aleph_alpha) | ✅ | ✅ | ✅ | ✅ | | |
|
||||||
| [petals](https://docs.litellm.ai/docs/providers/petals) | ✅ | ✅ | ✅ | ✅ |
|
| [petals](https://docs.litellm.ai/docs/providers/petals) | ✅ | ✅ | ✅ | ✅ | | |
|
||||||
| [ollama](https://docs.litellm.ai/docs/providers/ollama) | ✅ | ✅ | ✅ | ✅ | ✅ |
|
| [ollama](https://docs.litellm.ai/docs/providers/ollama) | ✅ | ✅ | ✅ | ✅ | ✅ | |
|
||||||
| [deepinfra](https://docs.litellm.ai/docs/providers/deepinfra) | ✅ | ✅ | ✅ | ✅ |
|
| [deepinfra](https://docs.litellm.ai/docs/providers/deepinfra) | ✅ | ✅ | ✅ | ✅ | | |
|
||||||
| [perplexity-ai](https://docs.litellm.ai/docs/providers/perplexity) | ✅ | ✅ | ✅ | ✅ |
|
| [perplexity-ai](https://docs.litellm.ai/docs/providers/perplexity) | ✅ | ✅ | ✅ | ✅ | | |
|
||||||
| [Groq AI](https://docs.litellm.ai/docs/providers/groq) | ✅ | ✅ | ✅ | ✅ |
|
| [Groq AI](https://docs.litellm.ai/docs/providers/groq) | ✅ | ✅ | ✅ | ✅ | | |
|
||||||
| [Deepseek](https://docs.litellm.ai/docs/providers/deepseek) | ✅ | ✅ | ✅ | ✅ |
|
| [Deepseek](https://docs.litellm.ai/docs/providers/deepseek) | ✅ | ✅ | ✅ | ✅ | | |
|
||||||
| [anyscale](https://docs.litellm.ai/docs/providers/anyscale) | ✅ | ✅ | ✅ | ✅ |
|
| [anyscale](https://docs.litellm.ai/docs/providers/anyscale) | ✅ | ✅ | ✅ | ✅ | | |
|
||||||
| [IBM - watsonx.ai](https://docs.litellm.ai/docs/providers/watsonx) | ✅ | ✅ | ✅ | ✅ | ✅
|
| [IBM - watsonx.ai](https://docs.litellm.ai/docs/providers/watsonx) | ✅ | ✅ | ✅ | ✅ | ✅ | |
|
||||||
| [voyage ai](https://docs.litellm.ai/docs/providers/voyage) | | | | | ✅ |
|
| [voyage ai](https://docs.litellm.ai/docs/providers/voyage) | | | | | ✅ | |
|
||||||
| [xinference [Xorbits Inference]](https://docs.litellm.ai/docs/providers/xinference) | | | | | ✅ |
|
| [xinference [Xorbits Inference]](https://docs.litellm.ai/docs/providers/xinference) | | | | | ✅ | |
|
||||||
|
|
||||||
[**Read the Docs**](https://docs.litellm.ai/docs/)
|
[**Read the Docs**](https://docs.litellm.ai/docs/)
|
||||||
|
|
||||||
|
|
|
@ -10,6 +10,7 @@ For companies that need SSO, user management and professional support for LiteLL
|
||||||
This covers:
|
This covers:
|
||||||
- ✅ **Features under the [LiteLLM Commercial License (Content Mod, Custom Tags, etc.)](https://docs.litellm.ai/docs/proxy/enterprise)**
|
- ✅ **Features under the [LiteLLM Commercial License (Content Mod, Custom Tags, etc.)](https://docs.litellm.ai/docs/proxy/enterprise)**
|
||||||
- ✅ [**Secure UI access with Single Sign-On**](../docs/proxy/ui.md#setup-ssoauth-for-ui)
|
- ✅ [**Secure UI access with Single Sign-On**](../docs/proxy/ui.md#setup-ssoauth-for-ui)
|
||||||
|
- ✅ [**Audit Logs with retention policy**](../docs/proxy/enterprise.md#audit-logs)
|
||||||
- ✅ [**JWT-Auth**](../docs/proxy/token_auth.md)
|
- ✅ [**JWT-Auth**](../docs/proxy/token_auth.md)
|
||||||
- ✅ [**Prompt Injection Detection**](#prompt-injection-detection-lakeraai)
|
- ✅ [**Prompt Injection Detection**](#prompt-injection-detection-lakeraai)
|
||||||
- ✅ [**Invite Team Members to access `/spend` Routes**](../docs/proxy/cost_tracking#allowing-non-proxy-admins-to-access-spend-endpoints)
|
- ✅ [**Invite Team Members to access `/spend` Routes**](../docs/proxy/cost_tracking#allowing-non-proxy-admins-to-access-spend-endpoints)
|
||||||
|
|
|
@ -38,7 +38,7 @@ class MyCustomHandler(CustomLogger):
|
||||||
print(f"On Async Success")
|
print(f"On Async Success")
|
||||||
|
|
||||||
async def async_log_failure_event(self, kwargs, response_obj, start_time, end_time):
|
async def async_log_failure_event(self, kwargs, response_obj, start_time, end_time):
|
||||||
print(f"On Async Success")
|
print(f"On Async Failure")
|
||||||
|
|
||||||
customHandler = MyCustomHandler()
|
customHandler = MyCustomHandler()
|
||||||
|
|
||||||
|
|
|
@ -144,6 +144,26 @@ print(response)
|
||||||
|
|
||||||
```
|
```
|
||||||
|
|
||||||
|
You can also pass `metadata` as part of the request header with a `langfuse_*` prefix:
|
||||||
|
|
||||||
|
```shell
|
||||||
|
curl --location 'http://0.0.0.0:4000/chat/completions' \
|
||||||
|
--header 'Content-Type: application/json' \
|
||||||
|
--header 'langfuse_trace_id: trace-id22' \
|
||||||
|
--header 'langfuse_trace_user_id: user-id2' \
|
||||||
|
--header 'langfuse_trace_metadata: {"key":"value"}' \
|
||||||
|
--data '{
|
||||||
|
"model": "gpt-3.5-turbo",
|
||||||
|
"messages": [
|
||||||
|
{
|
||||||
|
"role": "user",
|
||||||
|
"content": "what llm are you"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}'
|
||||||
|
```
|
||||||
|
|
||||||
|
|
||||||
### Trace & Generation Parameters
|
### Trace & Generation Parameters
|
||||||
|
|
||||||
#### Trace Specific Parameters
|
#### Trace Specific Parameters
|
||||||
|
|
|
@ -0,0 +1,3 @@
|
||||||
|
llmcord.py lets you and your friends chat with LLMs directly in your Discord server. It works with practically any LLM, remote or locally hosted.
|
||||||
|
|
||||||
|
Github: https://github.com/jakobdylanc/discord-llm-chatbot
|
|
@ -46,13 +46,13 @@ for chunk in response:
|
||||||
## Supported Models - ALL Groq Models Supported!
|
## Supported Models - ALL Groq Models Supported!
|
||||||
We support ALL Groq models, just set `groq/` as a prefix when sending completion requests
|
We support ALL Groq models, just set `groq/` as a prefix when sending completion requests
|
||||||
|
|
||||||
| Model Name | Function Call |
|
| Model Name | Function Call |
|
||||||
|--------------------------|------------------------------------------------------------------------------------------------------------------------------------------------------------------|
|
|--------------------|---------------------------------------------------------|
|
||||||
| llama3-8b-8192 | `completion(model="groq/llama3-8b-8192", messages)` |
|
| llama3-8b-8192 | `completion(model="groq/llama3-8b-8192", messages)` |
|
||||||
| llama3-70b-8192 | `completion(model="groq/llama3-70b-8192", messages)` |
|
| llama3-70b-8192 | `completion(model="groq/llama3-70b-8192", messages)` |
|
||||||
| llama2-70b-4096 | `completion(model="groq/llama2-70b-4096", messages)` |
|
| llama2-70b-4096 | `completion(model="groq/llama2-70b-4096", messages)` |
|
||||||
| mixtral-8x7b-32768 | `completion(model="groq/mixtral-8x7b-32768", messages)` |
|
| mixtral-8x7b-32768 | `completion(model="groq/mixtral-8x7b-32768", messages)` |
|
||||||
| gemma-7b-it | `completion(model="groq/gemma-7b-it", messages)` |
|
| gemma-7b-it | `completion(model="groq/gemma-7b-it", messages)` |
|
||||||
|
|
||||||
## Groq - Tool / Function Calling Example
|
## Groq - Tool / Function Calling Example
|
||||||
|
|
||||||
|
|
|
@ -26,52 +26,52 @@ Example TogetherAI Usage - Note: liteLLM supports all models deployed on Togethe
|
||||||
|
|
||||||
|
|
||||||
### Llama LLMs - Chat
|
### Llama LLMs - Chat
|
||||||
| Model Name | Function Call | Required OS Variables |
|
| Model Name | Function Call | Required OS Variables |
|
||||||
|-----------------------------------|------------------------------------------------------------------------|---------------------------------|
|
|-----------------------------------|-------------------------------------------------------------------------|------------------------------------|
|
||||||
| togethercomputer/llama-2-70b-chat | `completion('together_ai/togethercomputer/llama-2-70b-chat', messages)` | `os.environ['TOGETHERAI_API_KEY']` |
|
| togethercomputer/llama-2-70b-chat | `completion('together_ai/togethercomputer/llama-2-70b-chat', messages)` | `os.environ['TOGETHERAI_API_KEY']` |
|
||||||
|
|
||||||
### Llama LLMs - Language / Instruct
|
### Llama LLMs - Language / Instruct
|
||||||
| Model Name | Function Call | Required OS Variables |
|
| Model Name | Function Call | Required OS Variables |
|
||||||
|-----------------------------------|------------------------------------------------------------------------|---------------------------------|
|
|------------------------------------------|--------------------------------------------------------------------------------|------------------------------------|
|
||||||
| togethercomputer/llama-2-70b | `completion('together_ai/togethercomputer/llama-2-70b', messages)` | `os.environ['TOGETHERAI_API_KEY']` |
|
| togethercomputer/llama-2-70b | `completion('together_ai/togethercomputer/llama-2-70b', messages)` | `os.environ['TOGETHERAI_API_KEY']` |
|
||||||
| togethercomputer/LLaMA-2-7B-32K | `completion('together_ai/togethercomputer/LLaMA-2-7B-32K', messages)` | `os.environ['TOGETHERAI_API_KEY']` |
|
| togethercomputer/LLaMA-2-7B-32K | `completion('together_ai/togethercomputer/LLaMA-2-7B-32K', messages)` | `os.environ['TOGETHERAI_API_KEY']` |
|
||||||
| togethercomputer/Llama-2-7B-32K-Instruct | `completion('together_ai/togethercomputer/Llama-2-7B-32K-Instruct', messages)` | `os.environ['TOGETHERAI_API_KEY']` |
|
| togethercomputer/Llama-2-7B-32K-Instruct | `completion('together_ai/togethercomputer/Llama-2-7B-32K-Instruct', messages)` | `os.environ['TOGETHERAI_API_KEY']` |
|
||||||
| togethercomputer/llama-2-7b | `completion('together_ai/togethercomputer/llama-2-7b', messages)` | `os.environ['TOGETHERAI_API_KEY']` |
|
| togethercomputer/llama-2-7b | `completion('together_ai/togethercomputer/llama-2-7b', messages)` | `os.environ['TOGETHERAI_API_KEY']` |
|
||||||
|
|
||||||
### Falcon LLMs
|
### Falcon LLMs
|
||||||
| Model Name | Function Call | Required OS Variables |
|
| Model Name | Function Call | Required OS Variables |
|
||||||
|-----------------------------------|------------------------------------------------------------------------|---------------------------------|
|
|--------------------------------------|----------------------------------------------------------------------------|------------------------------------|
|
||||||
| togethercomputer/falcon-40b-instruct | `completion('together_ai/togethercomputer/falcon-40b-instruct', messages)` | `os.environ['TOGETHERAI_API_KEY']` |
|
| togethercomputer/falcon-40b-instruct | `completion('together_ai/togethercomputer/falcon-40b-instruct', messages)` | `os.environ['TOGETHERAI_API_KEY']` |
|
||||||
| togethercomputer/falcon-7b-instruct | `completion('together_ai/togethercomputer/falcon-7b-instruct', messages)` | `os.environ['TOGETHERAI_API_KEY']` |
|
| togethercomputer/falcon-7b-instruct | `completion('together_ai/togethercomputer/falcon-7b-instruct', messages)` | `os.environ['TOGETHERAI_API_KEY']` |
|
||||||
|
|
||||||
### Alpaca LLMs
|
### Alpaca LLMs
|
||||||
| Model Name | Function Call | Required OS Variables |
|
| Model Name | Function Call | Required OS Variables |
|
||||||
|-----------------------------------|------------------------------------------------------------------------|---------------------------------|
|
|----------------------------|------------------------------------------------------------------|------------------------------------|
|
||||||
| togethercomputer/alpaca-7b | `completion('together_ai/togethercomputer/alpaca-7b', messages)` | `os.environ['TOGETHERAI_API_KEY']` |
|
| togethercomputer/alpaca-7b | `completion('together_ai/togethercomputer/alpaca-7b', messages)` | `os.environ['TOGETHERAI_API_KEY']` |
|
||||||
|
|
||||||
### Other Chat LLMs
|
### Other Chat LLMs
|
||||||
| Model Name | Function Call | Required OS Variables |
|
| Model Name | Function Call | Required OS Variables |
|
||||||
|-----------------------------------|------------------------------------------------------------------------|---------------------------------|
|
|------------------------------|--------------------------------------------------------------------|------------------------------------|
|
||||||
| HuggingFaceH4/starchat-alpha | `completion('together_ai/HuggingFaceH4/starchat-alpha', messages)` | `os.environ['TOGETHERAI_API_KEY']` |
|
| HuggingFaceH4/starchat-alpha | `completion('together_ai/HuggingFaceH4/starchat-alpha', messages)` | `os.environ['TOGETHERAI_API_KEY']` |
|
||||||
|
|
||||||
### Code LLMs
|
### Code LLMs
|
||||||
| Model Name | Function Call | Required OS Variables |
|
| Model Name | Function Call | Required OS Variables |
|
||||||
|-----------------------------------|------------------------------------------------------------------------|---------------------------------|
|
|-----------------------------------------|-------------------------------------------------------------------------------|------------------------------------|
|
||||||
| togethercomputer/CodeLlama-34b | `completion('together_ai/togethercomputer/CodeLlama-34b', messages)` | `os.environ['TOGETHERAI_API_KEY']` |
|
| togethercomputer/CodeLlama-34b | `completion('together_ai/togethercomputer/CodeLlama-34b', messages)` | `os.environ['TOGETHERAI_API_KEY']` |
|
||||||
| togethercomputer/CodeLlama-34b-Instruct | `completion('together_ai/togethercomputer/CodeLlama-34b-Instruct', messages)` | `os.environ['TOGETHERAI_API_KEY']` |
|
| togethercomputer/CodeLlama-34b-Instruct | `completion('together_ai/togethercomputer/CodeLlama-34b-Instruct', messages)` | `os.environ['TOGETHERAI_API_KEY']` |
|
||||||
| togethercomputer/CodeLlama-34b-Python | `completion('together_ai/togethercomputer/CodeLlama-34b-Python', messages)` | `os.environ['TOGETHERAI_API_KEY']` |
|
| togethercomputer/CodeLlama-34b-Python | `completion('together_ai/togethercomputer/CodeLlama-34b-Python', messages)` | `os.environ['TOGETHERAI_API_KEY']` |
|
||||||
| defog/sqlcoder | `completion('together_ai/defog/sqlcoder', messages)` | `os.environ['TOGETHERAI_API_KEY']` |
|
| defog/sqlcoder | `completion('together_ai/defog/sqlcoder', messages)` | `os.environ['TOGETHERAI_API_KEY']` |
|
||||||
| NumbersStation/nsql-llama-2-7B | `completion('together_ai/NumbersStation/nsql-llama-2-7B', messages)` | `os.environ['TOGETHERAI_API_KEY']` |
|
| NumbersStation/nsql-llama-2-7B | `completion('together_ai/NumbersStation/nsql-llama-2-7B', messages)` | `os.environ['TOGETHERAI_API_KEY']` |
|
||||||
| WizardLM/WizardCoder-15B-V1.0 | `completion('together_ai/WizardLM/WizardCoder-15B-V1.0', messages)` | `os.environ['TOGETHERAI_API_KEY']` |
|
| WizardLM/WizardCoder-15B-V1.0 | `completion('together_ai/WizardLM/WizardCoder-15B-V1.0', messages)` | `os.environ['TOGETHERAI_API_KEY']` |
|
||||||
| WizardLM/WizardCoder-Python-34B-V1.0 | `completion('together_ai/WizardLM/WizardCoder-Python-34B-V1.0', messages)` | `os.environ['TOGETHERAI_API_KEY']` |
|
| WizardLM/WizardCoder-Python-34B-V1.0 | `completion('together_ai/WizardLM/WizardCoder-Python-34B-V1.0', messages)` | `os.environ['TOGETHERAI_API_KEY']` |
|
||||||
|
|
||||||
### Language LLMs
|
### Language LLMs
|
||||||
| Model Name | Function Call | Required OS Variables |
|
| Model Name | Function Call | Required OS Variables |
|
||||||
|-----------------------------------|------------------------------------------------------------------------|---------------------------------|
|
|-------------------------------------|---------------------------------------------------------------------------|------------------------------------|
|
||||||
| NousResearch/Nous-Hermes-Llama2-13b | `completion('together_ai/NousResearch/Nous-Hermes-Llama2-13b', messages)` | `os.environ['TOGETHERAI_API_KEY']` |
|
| NousResearch/Nous-Hermes-Llama2-13b | `completion('together_ai/NousResearch/Nous-Hermes-Llama2-13b', messages)` | `os.environ['TOGETHERAI_API_KEY']` |
|
||||||
| Austism/chronos-hermes-13b | `completion('together_ai/Austism/chronos-hermes-13b', messages)` | `os.environ['TOGETHERAI_API_KEY']` |
|
| Austism/chronos-hermes-13b | `completion('together_ai/Austism/chronos-hermes-13b', messages)` | `os.environ['TOGETHERAI_API_KEY']` |
|
||||||
| upstage/SOLAR-0-70b-16bit | `completion('together_ai/upstage/SOLAR-0-70b-16bit', messages)` | `os.environ['TOGETHERAI_API_KEY']` |
|
| upstage/SOLAR-0-70b-16bit | `completion('together_ai/upstage/SOLAR-0-70b-16bit', messages)` | `os.environ['TOGETHERAI_API_KEY']` |
|
||||||
| WizardLM/WizardLM-70B-V1.0 | `completion('together_ai/WizardLM/WizardLM-70B-V1.0', messages)` | `os.environ['TOGETHERAI_API_KEY']` |
|
| WizardLM/WizardLM-70B-V1.0 | `completion('together_ai/WizardLM/WizardLM-70B-V1.0', messages)` | `os.environ['TOGETHERAI_API_KEY']` |
|
||||||
|
|
||||||
|
|
||||||
## Prompt Templates
|
## Prompt Templates
|
||||||
|
|
|
@ -155,14 +155,14 @@ def default_pt(messages):
|
||||||
|
|
||||||
#### Models we already have Prompt Templates for
|
#### Models we already have Prompt Templates for
|
||||||
|
|
||||||
| Model Name | Works for Models | Function Call |
|
| Model Name | Works for Models | Function Call |
|
||||||
| -------- | -------- | -------- |
|
|--------------------------------------|-----------------------------------|------------------------------------------------------------------------------------------------------------------|
|
||||||
| meta-llama/Llama-2-7b-chat | All meta-llama llama2 chat models| `completion(model='vllm/meta-llama/Llama-2-7b', messages=messages, api_base="your_api_endpoint")` |
|
| meta-llama/Llama-2-7b-chat | All meta-llama llama2 chat models | `completion(model='vllm/meta-llama/Llama-2-7b', messages=messages, api_base="your_api_endpoint")` |
|
||||||
| tiiuae/falcon-7b-instruct | All falcon instruct models | `completion(model='vllm/tiiuae/falcon-7b-instruct', messages=messages, api_base="your_api_endpoint")` |
|
| tiiuae/falcon-7b-instruct | All falcon instruct models | `completion(model='vllm/tiiuae/falcon-7b-instruct', messages=messages, api_base="your_api_endpoint")` |
|
||||||
| mosaicml/mpt-7b-chat | All mpt chat models | `completion(model='vllm/mosaicml/mpt-7b-chat', messages=messages, api_base="your_api_endpoint")` |
|
| mosaicml/mpt-7b-chat | All mpt chat models | `completion(model='vllm/mosaicml/mpt-7b-chat', messages=messages, api_base="your_api_endpoint")` |
|
||||||
| codellama/CodeLlama-34b-Instruct-hf | All codellama instruct models | `completion(model='vllm/codellama/CodeLlama-34b-Instruct-hf', messages=messages, api_base="your_api_endpoint")` |
|
| codellama/CodeLlama-34b-Instruct-hf | All codellama instruct models | `completion(model='vllm/codellama/CodeLlama-34b-Instruct-hf', messages=messages, api_base="your_api_endpoint")` |
|
||||||
| WizardLM/WizardCoder-Python-34B-V1.0 | All wizardcoder models | `completion(model='vllm/WizardLM/WizardCoder-Python-34B-V1.0', messages=messages, api_base="your_api_endpoint")` |
|
| WizardLM/WizardCoder-Python-34B-V1.0 | All wizardcoder models | `completion(model='vllm/WizardLM/WizardCoder-Python-34B-V1.0', messages=messages, api_base="your_api_endpoint")` |
|
||||||
| Phind/Phind-CodeLlama-34B-v2 | All phind-codellama models | `completion(model='vllm/Phind/Phind-CodeLlama-34B-v2', messages=messages, api_base="your_api_endpoint")` |
|
| Phind/Phind-CodeLlama-34B-v2 | All phind-codellama models | `completion(model='vllm/Phind/Phind-CodeLlama-34B-v2', messages=messages, api_base="your_api_endpoint")` |
|
||||||
|
|
||||||
#### Custom prompt templates
|
#### Custom prompt templates
|
||||||
|
|
||||||
|
|
|
@ -251,23 +251,23 @@ response = completion(
|
||||||
|
|
||||||
Here are some examples of models available in IBM watsonx.ai that you can use with LiteLLM:
|
Here are some examples of models available in IBM watsonx.ai that you can use with LiteLLM:
|
||||||
|
|
||||||
| Mode Name | Command |
|
| Mode Name | Command |
|
||||||
| ---------- | --------- |
|
|------------------------------------|------------------------------------------------------------------------------------------|
|
||||||
| Flan T5 XXL | `completion(model=watsonx/google/flan-t5-xxl, messages=messages)` |
|
| Flan T5 XXL | `completion(model=watsonx/google/flan-t5-xxl, messages=messages)` |
|
||||||
| Flan Ul2 | `completion(model=watsonx/google/flan-ul2, messages=messages)` |
|
| Flan Ul2 | `completion(model=watsonx/google/flan-ul2, messages=messages)` |
|
||||||
| Mt0 XXL | `completion(model=watsonx/bigscience/mt0-xxl, messages=messages)` |
|
| Mt0 XXL | `completion(model=watsonx/bigscience/mt0-xxl, messages=messages)` |
|
||||||
| Gpt Neox | `completion(model=watsonx/eleutherai/gpt-neox-20b, messages=messages)` |
|
| Gpt Neox | `completion(model=watsonx/eleutherai/gpt-neox-20b, messages=messages)` |
|
||||||
| Mpt 7B Instruct2 | `completion(model=watsonx/ibm/mpt-7b-instruct2, messages=messages)` |
|
| Mpt 7B Instruct2 | `completion(model=watsonx/ibm/mpt-7b-instruct2, messages=messages)` |
|
||||||
| Starcoder | `completion(model=watsonx/bigcode/starcoder, messages=messages)` |
|
| Starcoder | `completion(model=watsonx/bigcode/starcoder, messages=messages)` |
|
||||||
| Llama 2 70B Chat | `completion(model=watsonx/meta-llama/llama-2-70b-chat, messages=messages)` |
|
| Llama 2 70B Chat | `completion(model=watsonx/meta-llama/llama-2-70b-chat, messages=messages)` |
|
||||||
| Llama 2 13B Chat | `completion(model=watsonx/meta-llama/llama-2-13b-chat, messages=messages)` |
|
| Llama 2 13B Chat | `completion(model=watsonx/meta-llama/llama-2-13b-chat, messages=messages)` |
|
||||||
| Granite 13B Instruct | `completion(model=watsonx/ibm/granite-13b-instruct-v1, messages=messages)` |
|
| Granite 13B Instruct | `completion(model=watsonx/ibm/granite-13b-instruct-v1, messages=messages)` |
|
||||||
| Granite 13B Chat | `completion(model=watsonx/ibm/granite-13b-chat-v1, messages=messages)` |
|
| Granite 13B Chat | `completion(model=watsonx/ibm/granite-13b-chat-v1, messages=messages)` |
|
||||||
| Flan T5 XL | `completion(model=watsonx/google/flan-t5-xl, messages=messages)` |
|
| Flan T5 XL | `completion(model=watsonx/google/flan-t5-xl, messages=messages)` |
|
||||||
| Granite 13B Chat V2 | `completion(model=watsonx/ibm/granite-13b-chat-v2, messages=messages)` |
|
| Granite 13B Chat V2 | `completion(model=watsonx/ibm/granite-13b-chat-v2, messages=messages)` |
|
||||||
| Granite 13B Instruct V2 | `completion(model=watsonx/ibm/granite-13b-instruct-v2, messages=messages)` |
|
| Granite 13B Instruct V2 | `completion(model=watsonx/ibm/granite-13b-instruct-v2, messages=messages)` |
|
||||||
| Elyza Japanese Llama 2 7B Instruct | `completion(model=watsonx/elyza/elyza-japanese-llama-2-7b-instruct, messages=messages)` |
|
| Elyza Japanese Llama 2 7B Instruct | `completion(model=watsonx/elyza/elyza-japanese-llama-2-7b-instruct, messages=messages)` |
|
||||||
| Mixtral 8X7B Instruct V01 Q | `completion(model=watsonx/ibm-mistralai/mixtral-8x7b-instruct-v01-q, messages=messages)` |
|
| Mixtral 8X7B Instruct V01 Q | `completion(model=watsonx/ibm-mistralai/mixtral-8x7b-instruct-v01-q, messages=messages)` |
|
||||||
|
|
||||||
|
|
||||||
For a list of all available models in watsonx.ai, see [here](https://dataplatform.cloud.ibm.com/docs/content/wsj/analyze-data/fm-models.html?context=wx&locale=en&audience=wdp).
|
For a list of all available models in watsonx.ai, see [here](https://dataplatform.cloud.ibm.com/docs/content/wsj/analyze-data/fm-models.html?context=wx&locale=en&audience=wdp).
|
||||||
|
@ -275,10 +275,10 @@ For a list of all available models in watsonx.ai, see [here](https://dataplatfor
|
||||||
|
|
||||||
## Supported IBM watsonx.ai Embedding Models
|
## Supported IBM watsonx.ai Embedding Models
|
||||||
|
|
||||||
| Model Name | Function Call |
|
| Model Name | Function Call |
|
||||||
|----------------------|---------------------------------------------|
|
|------------|------------------------------------------------------------------------|
|
||||||
| Slate 30m | `embedding(model="watsonx/ibm/slate-30m-english-rtrvr", input=input)` |
|
| Slate 30m | `embedding(model="watsonx/ibm/slate-30m-english-rtrvr", input=input)` |
|
||||||
| Slate 125m | `embedding(model="watsonx/ibm/slate-125m-english-rtrvr", input=input)` |
|
| Slate 125m | `embedding(model="watsonx/ibm/slate-125m-english-rtrvr", input=input)` |
|
||||||
|
|
||||||
|
|
||||||
For a list of all available embedding models in watsonx.ai, see [here](https://dataplatform.cloud.ibm.com/docs/content/wsj/analyze-data/fm-models-embed.html?context=wx).
|
For a list of all available embedding models in watsonx.ai, see [here](https://dataplatform.cloud.ibm.com/docs/content/wsj/analyze-data/fm-models-embed.html?context=wx).
|
|
@ -37,26 +37,26 @@ print(response)
|
||||||
## Supported Models
|
## Supported Models
|
||||||
All models listed here https://inference.readthedocs.io/en/latest/models/builtin/embedding/index.html are supported
|
All models listed here https://inference.readthedocs.io/en/latest/models/builtin/embedding/index.html are supported
|
||||||
|
|
||||||
| Model Name | Function Call |
|
| Model Name | Function Call |
|
||||||
|------------------------------|--------------------------------------------------------|
|
|-----------------------------|--------------------------------------------------------------------|
|
||||||
| bge-base-en | `embedding(model="xinference/bge-base-en", input)` |
|
| bge-base-en | `embedding(model="xinference/bge-base-en", input)` |
|
||||||
| bge-base-en-v1.5 | `embedding(model="xinference/bge-base-en-v1.5", input)` |
|
| bge-base-en-v1.5 | `embedding(model="xinference/bge-base-en-v1.5", input)` |
|
||||||
| bge-base-zh | `embedding(model="xinference/bge-base-zh", input)` |
|
| bge-base-zh | `embedding(model="xinference/bge-base-zh", input)` |
|
||||||
| bge-base-zh-v1.5 | `embedding(model="xinference/bge-base-zh-v1.5", input)` |
|
| bge-base-zh-v1.5 | `embedding(model="xinference/bge-base-zh-v1.5", input)` |
|
||||||
| bge-large-en | `embedding(model="xinference/bge-large-en", input)` |
|
| bge-large-en | `embedding(model="xinference/bge-large-en", input)` |
|
||||||
| bge-large-en-v1.5 | `embedding(model="xinference/bge-large-en-v1.5", input)` |
|
| bge-large-en-v1.5 | `embedding(model="xinference/bge-large-en-v1.5", input)` |
|
||||||
| bge-large-zh | `embedding(model="xinference/bge-large-zh", input)` |
|
| bge-large-zh | `embedding(model="xinference/bge-large-zh", input)` |
|
||||||
| bge-large-zh-noinstruct | `embedding(model="xinference/bge-large-zh-noinstruct", input)` |
|
| bge-large-zh-noinstruct | `embedding(model="xinference/bge-large-zh-noinstruct", input)` |
|
||||||
| bge-large-zh-v1.5 | `embedding(model="xinference/bge-large-zh-v1.5", input)` |
|
| bge-large-zh-v1.5 | `embedding(model="xinference/bge-large-zh-v1.5", input)` |
|
||||||
| bge-small-en-v1.5 | `embedding(model="xinference/bge-small-en-v1.5", input)` |
|
| bge-small-en-v1.5 | `embedding(model="xinference/bge-small-en-v1.5", input)` |
|
||||||
| bge-small-zh | `embedding(model="xinference/bge-small-zh", input)` |
|
| bge-small-zh | `embedding(model="xinference/bge-small-zh", input)` |
|
||||||
| bge-small-zh-v1.5 | `embedding(model="xinference/bge-small-zh-v1.5", input)` |
|
| bge-small-zh-v1.5 | `embedding(model="xinference/bge-small-zh-v1.5", input)` |
|
||||||
| e5-large-v2 | `embedding(model="xinference/e5-large-v2", input)` |
|
| e5-large-v2 | `embedding(model="xinference/e5-large-v2", input)` |
|
||||||
| gte-base | `embedding(model="xinference/gte-base", input)` |
|
| gte-base | `embedding(model="xinference/gte-base", input)` |
|
||||||
| gte-large | `embedding(model="xinference/gte-large", input)` |
|
| gte-large | `embedding(model="xinference/gte-large", input)` |
|
||||||
| jina-embeddings-v2-base-en | `embedding(model="xinference/jina-embeddings-v2-base-en", input)` |
|
| jina-embeddings-v2-base-en | `embedding(model="xinference/jina-embeddings-v2-base-en", input)` |
|
||||||
| jina-embeddings-v2-small-en | `embedding(model="xinference/jina-embeddings-v2-small-en", input)` |
|
| jina-embeddings-v2-small-en | `embedding(model="xinference/jina-embeddings-v2-small-en", input)` |
|
||||||
| multilingual-e5-large | `embedding(model="xinference/multilingual-e5-large", input)` |
|
| multilingual-e5-large | `embedding(model="xinference/multilingual-e5-large", input)` |
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -260,7 +260,7 @@ Requirements:
|
||||||
|
|
||||||
<TabItem value="docker-deploy" label="Dockerfile">
|
<TabItem value="docker-deploy" label="Dockerfile">
|
||||||
|
|
||||||
We maintain a [seperate Dockerfile](https://github.com/BerriAI/litellm/pkgs/container/litellm-database) for reducing build time when running LiteLLM proxy with a connected Postgres Database
|
We maintain a [separate Dockerfile](https://github.com/BerriAI/litellm/pkgs/container/litellm-database) for reducing build time when running LiteLLM proxy with a connected Postgres Database
|
||||||
|
|
||||||
```shell
|
```shell
|
||||||
docker pull ghcr.io/berriai/litellm-database:main-latest
|
docker pull ghcr.io/berriai/litellm-database:main-latest
|
||||||
|
|
|
@ -2,30 +2,213 @@ import Image from '@theme/IdealImage';
|
||||||
import Tabs from '@theme/Tabs';
|
import Tabs from '@theme/Tabs';
|
||||||
import TabItem from '@theme/TabItem';
|
import TabItem from '@theme/TabItem';
|
||||||
|
|
||||||
# ✨ Enterprise Features - Content Mod, SSO, Custom Swagger
|
# ✨ Enterprise Features - SSO, Audit Logs, Guardrails
|
||||||
|
|
||||||
Features here are behind a commercial license in our `/enterprise` folder. [**See Code**](https://github.com/BerriAI/litellm/tree/main/enterprise)
|
:::tip
|
||||||
|
|
||||||
:::info
|
Get in touch with us [here](https://calendly.com/d/4mp-gd3-k5k/litellm-1-1-onboarding-chat)
|
||||||
|
|
||||||
[Get Started with Enterprise here](https://github.com/BerriAI/litellm/tree/main/enterprise)
|
|
||||||
|
|
||||||
:::
|
:::
|
||||||
|
|
||||||
Features:
|
Features:
|
||||||
- ✅ [SSO for Admin UI](./ui.md#✨-enterprise-features)
|
- ✅ [SSO for Admin UI](./ui.md#✨-enterprise-features)
|
||||||
- ✅ Content Moderation with LLM Guard, LlamaGuard, Google Text Moderations
|
- ✅ [Audit Logs](#audit-logs)
|
||||||
- ✅ [Prompt Injection Detection (with LakeraAI API)](#prompt-injection-detection-lakeraai)
|
- ✅ [Tracking Spend for Custom Tags](#tracking-spend-for-custom-tags)
|
||||||
|
- ✅ [Content Moderation with LLM Guard, LlamaGuard, Google Text Moderations](#content-moderation)
|
||||||
|
- ✅ [Prompt Injection Detection (with LakeraAI API)](#prompt-injection-detection---lakeraai)
|
||||||
|
- ✅ [Custom Branding + Routes on Swagger Docs](#swagger-docs---custom-routes--branding)
|
||||||
- ✅ Reject calls from Blocked User list
|
- ✅ Reject calls from Blocked User list
|
||||||
- ✅ Reject calls (incoming / outgoing) with Banned Keywords (e.g. competitors)
|
- ✅ Reject calls (incoming / outgoing) with Banned Keywords (e.g. competitors)
|
||||||
- ✅ Don't log/store specific requests to Langfuse, Sentry, etc. (eg confidential LLM requests)
|
|
||||||
- ✅ Tracking Spend for Custom Tags
|
## Audit Logs
|
||||||
- ✅ Custom Branding + Routes on Swagger Docs
|
|
||||||
- ✅ Audit Logs for `Created At, Created By` when Models Added
|
Store Audit logs for **Create, Update Delete Operations** done on `Teams` and `Virtual Keys`
|
||||||
|
|
||||||
|
**Step 1** Switch on audit Logs
|
||||||
|
```shell
|
||||||
|
litellm_settings:
|
||||||
|
store_audit_logs: true
|
||||||
|
```
|
||||||
|
|
||||||
|
Start the litellm proxy with this config
|
||||||
|
|
||||||
|
**Step 2** Test it - Create a Team
|
||||||
|
|
||||||
|
```shell
|
||||||
|
curl --location 'http://0.0.0.0:4000/team/new' \
|
||||||
|
--header 'Authorization: Bearer sk-1234' \
|
||||||
|
--header 'Content-Type: application/json' \
|
||||||
|
--data '{
|
||||||
|
"max_budget": 2
|
||||||
|
}'
|
||||||
|
```
|
||||||
|
|
||||||
|
**Step 3** Expected Log
|
||||||
|
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"id": "e1760e10-4264-4499-82cd-c08c86c8d05b",
|
||||||
|
"updated_at": "2024-06-06T02:10:40.836420+00:00",
|
||||||
|
"changed_by": "109010464461339474872",
|
||||||
|
"action": "created",
|
||||||
|
"table_name": "LiteLLM_TeamTable",
|
||||||
|
"object_id": "82e725b5-053f-459d-9a52-867191635446",
|
||||||
|
"before_value": null,
|
||||||
|
"updated_values": {
|
||||||
|
"team_id": "82e725b5-053f-459d-9a52-867191635446",
|
||||||
|
"admins": [],
|
||||||
|
"members": [],
|
||||||
|
"members_with_roles": [
|
||||||
|
{
|
||||||
|
"role": "admin",
|
||||||
|
"user_id": "109010464461339474872"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"max_budget": 2.0,
|
||||||
|
"models": [],
|
||||||
|
"blocked": false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
|
||||||
|
## Tracking Spend for Custom Tags
|
||||||
|
|
||||||
|
Requirements:
|
||||||
|
|
||||||
|
- Virtual Keys & a database should be set up, see [virtual keys](https://docs.litellm.ai/docs/proxy/virtual_keys)
|
||||||
|
|
||||||
|
#### Usage - /chat/completions requests with request tags
|
||||||
|
|
||||||
|
|
||||||
|
<Tabs>
|
||||||
|
|
||||||
|
|
||||||
|
<TabItem value="openai" label="OpenAI Python v1.0.0+">
|
||||||
|
|
||||||
|
Set `extra_body={"metadata": { }}` to `metadata` you want to pass
|
||||||
|
|
||||||
|
```python
|
||||||
|
import openai
|
||||||
|
client = openai.OpenAI(
|
||||||
|
api_key="anything",
|
||||||
|
base_url="http://0.0.0.0:4000"
|
||||||
|
)
|
||||||
|
|
||||||
|
# request sent to model set on litellm proxy, `litellm --model`
|
||||||
|
response = client.chat.completions.create(
|
||||||
|
model="gpt-3.5-turbo",
|
||||||
|
messages = [
|
||||||
|
{
|
||||||
|
"role": "user",
|
||||||
|
"content": "this is a test request, write a short poem"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
extra_body={
|
||||||
|
"metadata": {
|
||||||
|
"tags": ["model-anthropic-claude-v2.1", "app-ishaan-prod"]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
|
print(response)
|
||||||
|
```
|
||||||
|
</TabItem>
|
||||||
|
|
||||||
|
<TabItem value="Curl" label="Curl Request">
|
||||||
|
|
||||||
|
Pass `metadata` as part of the request body
|
||||||
|
|
||||||
|
```shell
|
||||||
|
curl --location 'http://0.0.0.0:4000/chat/completions' \
|
||||||
|
--header 'Content-Type: application/json' \
|
||||||
|
--data '{
|
||||||
|
"model": "gpt-3.5-turbo",
|
||||||
|
"messages": [
|
||||||
|
{
|
||||||
|
"role": "user",
|
||||||
|
"content": "what llm are you"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"metadata": {"tags": ["model-anthropic-claude-v2.1", "app-ishaan-prod"]}
|
||||||
|
}'
|
||||||
|
```
|
||||||
|
</TabItem>
|
||||||
|
<TabItem value="langchain" label="Langchain">
|
||||||
|
|
||||||
|
```python
|
||||||
|
from langchain.chat_models import ChatOpenAI
|
||||||
|
from langchain.prompts.chat import (
|
||||||
|
ChatPromptTemplate,
|
||||||
|
HumanMessagePromptTemplate,
|
||||||
|
SystemMessagePromptTemplate,
|
||||||
|
)
|
||||||
|
from langchain.schema import HumanMessage, SystemMessage
|
||||||
|
|
||||||
|
chat = ChatOpenAI(
|
||||||
|
openai_api_base="http://0.0.0.0:4000",
|
||||||
|
model = "gpt-3.5-turbo",
|
||||||
|
temperature=0.1,
|
||||||
|
extra_body={
|
||||||
|
"metadata": {
|
||||||
|
"tags": ["model-anthropic-claude-v2.1", "app-ishaan-prod"]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
|
messages = [
|
||||||
|
SystemMessage(
|
||||||
|
content="You are a helpful assistant that im using to make a test request to."
|
||||||
|
),
|
||||||
|
HumanMessage(
|
||||||
|
content="test from litellm. tell me why it's amazing in 1 sentence"
|
||||||
|
),
|
||||||
|
]
|
||||||
|
response = chat(messages)
|
||||||
|
|
||||||
|
print(response)
|
||||||
|
```
|
||||||
|
|
||||||
|
</TabItem>
|
||||||
|
</Tabs>
|
||||||
|
|
||||||
|
|
||||||
|
#### Viewing Spend per tag
|
||||||
|
|
||||||
|
#### `/spend/tags` Request Format
|
||||||
|
```shell
|
||||||
|
curl -X GET "http://0.0.0.0:4000/spend/tags" \
|
||||||
|
-H "Authorization: Bearer sk-1234"
|
||||||
|
```
|
||||||
|
|
||||||
|
#### `/spend/tags`Response Format
|
||||||
|
```shell
|
||||||
|
[
|
||||||
|
{
|
||||||
|
"individual_request_tag": "model-anthropic-claude-v2.1",
|
||||||
|
"log_count": 6,
|
||||||
|
"total_spend": 0.000672
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"individual_request_tag": "app-ishaan-local",
|
||||||
|
"log_count": 4,
|
||||||
|
"total_spend": 0.000448
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"individual_request_tag": "app-ishaan-prod",
|
||||||
|
"log_count": 2,
|
||||||
|
"total_spend": 0.000224
|
||||||
|
}
|
||||||
|
]
|
||||||
|
|
||||||
|
```
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
## Content Moderation
|
## Content Moderation
|
||||||
### Content Moderation with LLM Guard
|
#### Content Moderation with LLM Guard
|
||||||
|
|
||||||
Set the LLM Guard API Base in your environment
|
Set the LLM Guard API Base in your environment
|
||||||
|
|
||||||
|
@ -160,7 +343,7 @@ curl --location 'http://0.0.0.0:4000/v1/chat/completions' \
|
||||||
</TabItem>
|
</TabItem>
|
||||||
</Tabs>
|
</Tabs>
|
||||||
|
|
||||||
### Content Moderation with LlamaGuard
|
#### Content Moderation with LlamaGuard
|
||||||
|
|
||||||
Currently works with Sagemaker's LlamaGuard endpoint.
|
Currently works with Sagemaker's LlamaGuard endpoint.
|
||||||
|
|
||||||
|
@ -194,7 +377,7 @@ callbacks: ["llamaguard_moderations"]
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
### Content Moderation with Google Text Moderation
|
#### Content Moderation with Google Text Moderation
|
||||||
|
|
||||||
Requires your GOOGLE_APPLICATION_CREDENTIALS to be set in your .env (same as VertexAI).
|
Requires your GOOGLE_APPLICATION_CREDENTIALS to be set in your .env (same as VertexAI).
|
||||||
|
|
||||||
|
@ -250,7 +433,7 @@ Here are the category specific values:
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
### Content Moderation with OpenAI Moderations
|
#### Content Moderation with OpenAI Moderations
|
||||||
|
|
||||||
Use this if you want to reject /chat, /completions, /embeddings calls that fail OpenAI Moderations checks
|
Use this if you want to reject /chat, /completions, /embeddings calls that fail OpenAI Moderations checks
|
||||||
|
|
||||||
|
@ -276,7 +459,7 @@ Step 1 Set a `LAKERA_API_KEY` in your env
|
||||||
LAKERA_API_KEY="7a91a1a6059da*******"
|
LAKERA_API_KEY="7a91a1a6059da*******"
|
||||||
```
|
```
|
||||||
|
|
||||||
Step 2. Add `lakera_prompt_injection` to your calbacks
|
Step 2. Add `lakera_prompt_injection` to your callbacks
|
||||||
|
|
||||||
```yaml
|
```yaml
|
||||||
litellm_settings:
|
litellm_settings:
|
||||||
|
@ -302,6 +485,42 @@ curl --location 'http://localhost:4000/chat/completions' \
|
||||||
}'
|
}'
|
||||||
```
|
```
|
||||||
|
|
||||||
|
## Swagger Docs - Custom Routes + Branding
|
||||||
|
|
||||||
|
:::info
|
||||||
|
|
||||||
|
Requires a LiteLLM Enterprise key to use. Get a free 2-week license [here](https://forms.gle/sTDVprBs18M4V8Le8)
|
||||||
|
|
||||||
|
:::
|
||||||
|
|
||||||
|
Set LiteLLM Key in your environment
|
||||||
|
|
||||||
|
```bash
|
||||||
|
LITELLM_LICENSE=""
|
||||||
|
```
|
||||||
|
|
||||||
|
#### Customize Title + Description
|
||||||
|
|
||||||
|
In your environment, set:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
DOCS_TITLE="TotalGPT"
|
||||||
|
DOCS_DESCRIPTION="Sample Company Description"
|
||||||
|
```
|
||||||
|
|
||||||
|
#### Customize Routes
|
||||||
|
|
||||||
|
Hide admin routes from users.
|
||||||
|
|
||||||
|
In your environment, set:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
DOCS_FILTERED="True" # only shows openai routes to user
|
||||||
|
```
|
||||||
|
|
||||||
|
<Image img={require('../../img/custom_swagger.png')} style={{ width: '900px', height: 'auto' }} />
|
||||||
|
|
||||||
|
|
||||||
## Enable Blocked User Lists
|
## Enable Blocked User Lists
|
||||||
If any call is made to proxy with this user id, it'll be rejected - use this if you want to let users opt-out of ai features
|
If any call is made to proxy with this user id, it'll be rejected - use this if you want to let users opt-out of ai features
|
||||||
|
|
||||||
|
@ -417,176 +636,6 @@ curl --location 'http://0.0.0.0:4000/chat/completions' \
|
||||||
}
|
}
|
||||||
'
|
'
|
||||||
```
|
```
|
||||||
## Tracking Spend for Custom Tags
|
|
||||||
|
|
||||||
Requirements:
|
|
||||||
|
|
||||||
- Virtual Keys & a database should be set up, see [virtual keys](https://docs.litellm.ai/docs/proxy/virtual_keys)
|
|
||||||
|
|
||||||
### Usage - /chat/completions requests with request tags
|
|
||||||
|
|
||||||
|
|
||||||
<Tabs>
|
|
||||||
|
|
||||||
|
|
||||||
<TabItem value="openai" label="OpenAI Python v1.0.0+">
|
|
||||||
|
|
||||||
Set `extra_body={"metadata": { }}` to `metadata` you want to pass
|
|
||||||
|
|
||||||
```python
|
|
||||||
import openai
|
|
||||||
client = openai.OpenAI(
|
|
||||||
api_key="anything",
|
|
||||||
base_url="http://0.0.0.0:4000"
|
|
||||||
)
|
|
||||||
|
|
||||||
# request sent to model set on litellm proxy, `litellm --model`
|
|
||||||
response = client.chat.completions.create(
|
|
||||||
model="gpt-3.5-turbo",
|
|
||||||
messages = [
|
|
||||||
{
|
|
||||||
"role": "user",
|
|
||||||
"content": "this is a test request, write a short poem"
|
|
||||||
}
|
|
||||||
],
|
|
||||||
extra_body={
|
|
||||||
"metadata": {
|
|
||||||
"tags": ["model-anthropic-claude-v2.1", "app-ishaan-prod"]
|
|
||||||
}
|
|
||||||
}
|
|
||||||
)
|
|
||||||
|
|
||||||
print(response)
|
|
||||||
```
|
|
||||||
</TabItem>
|
|
||||||
|
|
||||||
<TabItem value="Curl" label="Curl Request">
|
|
||||||
|
|
||||||
Pass `metadata` as part of the request body
|
|
||||||
|
|
||||||
```shell
|
|
||||||
curl --location 'http://0.0.0.0:4000/chat/completions' \
|
|
||||||
--header 'Content-Type: application/json' \
|
|
||||||
--data '{
|
|
||||||
"model": "gpt-3.5-turbo",
|
|
||||||
"messages": [
|
|
||||||
{
|
|
||||||
"role": "user",
|
|
||||||
"content": "what llm are you"
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"metadata": {"tags": ["model-anthropic-claude-v2.1", "app-ishaan-prod"]}
|
|
||||||
}'
|
|
||||||
```
|
|
||||||
</TabItem>
|
|
||||||
<TabItem value="langchain" label="Langchain">
|
|
||||||
|
|
||||||
```python
|
|
||||||
from langchain.chat_models import ChatOpenAI
|
|
||||||
from langchain.prompts.chat import (
|
|
||||||
ChatPromptTemplate,
|
|
||||||
HumanMessagePromptTemplate,
|
|
||||||
SystemMessagePromptTemplate,
|
|
||||||
)
|
|
||||||
from langchain.schema import HumanMessage, SystemMessage
|
|
||||||
|
|
||||||
chat = ChatOpenAI(
|
|
||||||
openai_api_base="http://0.0.0.0:4000",
|
|
||||||
model = "gpt-3.5-turbo",
|
|
||||||
temperature=0.1,
|
|
||||||
extra_body={
|
|
||||||
"metadata": {
|
|
||||||
"tags": ["model-anthropic-claude-v2.1", "app-ishaan-prod"]
|
|
||||||
}
|
|
||||||
}
|
|
||||||
)
|
|
||||||
|
|
||||||
messages = [
|
|
||||||
SystemMessage(
|
|
||||||
content="You are a helpful assistant that im using to make a test request to."
|
|
||||||
),
|
|
||||||
HumanMessage(
|
|
||||||
content="test from litellm. tell me why it's amazing in 1 sentence"
|
|
||||||
),
|
|
||||||
]
|
|
||||||
response = chat(messages)
|
|
||||||
|
|
||||||
print(response)
|
|
||||||
```
|
|
||||||
|
|
||||||
</TabItem>
|
|
||||||
</Tabs>
|
|
||||||
|
|
||||||
|
|
||||||
### Viewing Spend per tag
|
|
||||||
|
|
||||||
#### `/spend/tags` Request Format
|
|
||||||
```shell
|
|
||||||
curl -X GET "http://0.0.0.0:4000/spend/tags" \
|
|
||||||
-H "Authorization: Bearer sk-1234"
|
|
||||||
```
|
|
||||||
|
|
||||||
#### `/spend/tags`Response Format
|
|
||||||
```shell
|
|
||||||
[
|
|
||||||
{
|
|
||||||
"individual_request_tag": "model-anthropic-claude-v2.1",
|
|
||||||
"log_count": 6,
|
|
||||||
"total_spend": 0.000672
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"individual_request_tag": "app-ishaan-local",
|
|
||||||
"log_count": 4,
|
|
||||||
"total_spend": 0.000448
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"individual_request_tag": "app-ishaan-prod",
|
|
||||||
"log_count": 2,
|
|
||||||
"total_spend": 0.000224
|
|
||||||
}
|
|
||||||
]
|
|
||||||
|
|
||||||
```
|
|
||||||
|
|
||||||
|
|
||||||
<!-- ## Tracking Spend per Key
|
|
||||||
|
|
||||||
## Tracking Spend per User -->
|
|
||||||
|
|
||||||
## Swagger Docs - Custom Routes + Branding
|
|
||||||
|
|
||||||
:::info
|
|
||||||
|
|
||||||
Requires a LiteLLM Enterprise key to use. Get a free 2-week license [here](https://forms.gle/sTDVprBs18M4V8Le8)
|
|
||||||
|
|
||||||
:::
|
|
||||||
|
|
||||||
Set LiteLLM Key in your environment
|
|
||||||
|
|
||||||
```bash
|
|
||||||
LITELLM_LICENSE=""
|
|
||||||
```
|
|
||||||
|
|
||||||
### Customize Title + Description
|
|
||||||
|
|
||||||
In your environment, set:
|
|
||||||
|
|
||||||
```bash
|
|
||||||
DOCS_TITLE="TotalGPT"
|
|
||||||
DOCS_DESCRIPTION="Sample Company Description"
|
|
||||||
```
|
|
||||||
|
|
||||||
### Customize Routes
|
|
||||||
|
|
||||||
Hide admin routes from users.
|
|
||||||
|
|
||||||
In your environment, set:
|
|
||||||
|
|
||||||
```bash
|
|
||||||
DOCS_FILTERED="True" # only shows openai routes to user
|
|
||||||
```
|
|
||||||
|
|
||||||
<Image img={require('../../img/custom_swagger.png')} style={{ width: '900px', height: 'auto' }} />
|
|
||||||
|
|
||||||
## Public Model Hub
|
## Public Model Hub
|
||||||
|
|
||||||
|
|
|
@ -41,7 +41,9 @@ litellm_settings:
|
||||||
**Step 3**: Set required env variables for logging to langfuse
|
**Step 3**: Set required env variables for logging to langfuse
|
||||||
```shell
|
```shell
|
||||||
export LANGFUSE_PUBLIC_KEY="pk_kk"
|
export LANGFUSE_PUBLIC_KEY="pk_kk"
|
||||||
export LANGFUSE_SECRET_KEY="sk_ss
|
export LANGFUSE_SECRET_KEY="sk_ss"
|
||||||
|
# Optional, defaults to https://cloud.langfuse.com
|
||||||
|
export LANGFUSE_HOST="https://xxx.langfuse.com"
|
||||||
```
|
```
|
||||||
|
|
||||||
**Step 4**: Start the proxy, make a test request
|
**Step 4**: Start the proxy, make a test request
|
||||||
|
|
|
@ -101,3 +101,75 @@ print(response)
|
||||||
|
|
||||||
</TabItem>
|
</TabItem>
|
||||||
</Tabs>
|
</Tabs>
|
||||||
|
|
||||||
|
## Advanced - Redis Caching
|
||||||
|
|
||||||
|
Use redis caching to do request prioritization across multiple instances of LiteLLM.
|
||||||
|
|
||||||
|
### SDK
|
||||||
|
```python
|
||||||
|
from litellm import Router
|
||||||
|
|
||||||
|
router = Router(
|
||||||
|
model_list=[
|
||||||
|
{
|
||||||
|
"model_name": "gpt-3.5-turbo",
|
||||||
|
"litellm_params": {
|
||||||
|
"model": "gpt-3.5-turbo",
|
||||||
|
"mock_response": "Hello world this is Macintosh!", # fakes the LLM API call
|
||||||
|
"rpm": 1,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
],
|
||||||
|
### REDIS PARAMS ###
|
||||||
|
redis_host=os.environ["REDIS_HOST"],
|
||||||
|
redis_password=os.environ["REDIS_PASSWORD"],
|
||||||
|
redis_port=os.environ["REDIS_PORT"],
|
||||||
|
)
|
||||||
|
|
||||||
|
try:
|
||||||
|
_response = await router.schedule_acompletion( # 👈 ADDS TO QUEUE + POLLS + MAKES CALL
|
||||||
|
model="gpt-3.5-turbo",
|
||||||
|
messages=[{"role": "user", "content": "Hey!"}],
|
||||||
|
priority=0, # 👈 LOWER IS BETTER
|
||||||
|
)
|
||||||
|
except Exception as e:
|
||||||
|
print("didn't make request")
|
||||||
|
```
|
||||||
|
|
||||||
|
### PROXY
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
model_list:
|
||||||
|
- model_name: gpt-3.5-turbo-fake-model
|
||||||
|
litellm_params:
|
||||||
|
model: gpt-3.5-turbo
|
||||||
|
mock_response: "hello world!"
|
||||||
|
api_key: my-good-key
|
||||||
|
|
||||||
|
router_settings:
|
||||||
|
redis_host; os.environ/REDIS_HOST
|
||||||
|
redis_password: os.environ/REDIS_PASSWORD
|
||||||
|
redis_port: os.environ/REDIS_PORT
|
||||||
|
```
|
||||||
|
|
||||||
|
```bash
|
||||||
|
$ litellm --config /path/to/config.yaml
|
||||||
|
|
||||||
|
# RUNNING on http://0.0.0.0:4000s
|
||||||
|
```
|
||||||
|
|
||||||
|
```bash
|
||||||
|
curl -X POST 'http://localhost:4000/queue/chat/completions' \
|
||||||
|
-H 'Content-Type: application/json' \
|
||||||
|
-H 'Authorization: Bearer sk-1234' \
|
||||||
|
-D '{
|
||||||
|
"model": "gpt-3.5-turbo-fake-model",
|
||||||
|
"messages": [
|
||||||
|
{
|
||||||
|
"role": "user",
|
||||||
|
"content": "what is the meaning of the universe? 1234"
|
||||||
|
}],
|
||||||
|
"priority": 0 👈 SET VALUE HERE
|
||||||
|
}'
|
||||||
|
```
|
|
@ -1,11 +1,31 @@
|
||||||
# Secret Manager
|
# Secret Manager
|
||||||
LiteLLM supports reading secrets from Azure Key Vault and Infisical
|
LiteLLM supports reading secrets from Azure Key Vault and Infisical
|
||||||
|
|
||||||
|
- AWS Key Managemenet Service
|
||||||
|
- AWS Secret Manager
|
||||||
- [Azure Key Vault](#azure-key-vault)
|
- [Azure Key Vault](#azure-key-vault)
|
||||||
- Google Key Management Service
|
- Google Key Management Service
|
||||||
- [Infisical Secret Manager](#infisical-secret-manager)
|
- [Infisical Secret Manager](#infisical-secret-manager)
|
||||||
- [.env Files](#env-files)
|
- [.env Files](#env-files)
|
||||||
|
|
||||||
|
## AWS Key Management Service
|
||||||
|
|
||||||
|
Use AWS KMS to storing a hashed copy of your Proxy Master Key in the environment.
|
||||||
|
|
||||||
|
```bash
|
||||||
|
export LITELLM_MASTER_KEY="djZ9xjVaZ..." # 👈 ENCRYPTED KEY
|
||||||
|
export AWS_REGION_NAME="us-west-2"
|
||||||
|
```
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
general_settings:
|
||||||
|
key_management_system: "aws_kms"
|
||||||
|
key_management_settings:
|
||||||
|
hosted_keys: ["LITELLM_MASTER_KEY"] # 👈 WHICH KEYS ARE STORED ON KMS
|
||||||
|
```
|
||||||
|
|
||||||
|
[**See Decryption Code**](https://github.com/BerriAI/litellm/blob/a2da2a8f168d45648b61279d4795d647d94f90c9/litellm/utils.py#L10182)
|
||||||
|
|
||||||
## AWS Secret Manager
|
## AWS Secret Manager
|
||||||
|
|
||||||
Store your proxy keys in AWS Secret Manager.
|
Store your proxy keys in AWS Secret Manager.
|
||||||
|
|
|
@ -1,8 +1,8 @@
|
||||||
# Using Fine-Tuned gpt-3.5-turbo
|
# Using Fine-Tuned gpt-3.5-turbo
|
||||||
LiteLLM allows you to call `completion` with your fine-tuned gpt-3.5-turbo models
|
LiteLLM allows you to call `completion` with your fine-tuned gpt-3.5-turbo models
|
||||||
If you're trying to create your custom finetuned gpt-3.5-turbo model following along on this tutorial: https://platform.openai.com/docs/guides/fine-tuning/preparing-your-dataset
|
If you're trying to create your custom fine-tuned gpt-3.5-turbo model following along on this tutorial: https://platform.openai.com/docs/guides/fine-tuning/preparing-your-dataset
|
||||||
|
|
||||||
Once you've created your fine tuned model, you can call it with `litellm.completion()`
|
Once you've created your fine-tuned model, you can call it with `litellm.completion()`
|
||||||
|
|
||||||
## Usage
|
## Usage
|
||||||
```python
|
```python
|
||||||
|
|
6
docs/my-website/package-lock.json
generated
6
docs/my-website/package-lock.json
generated
|
@ -5975,9 +5975,9 @@
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"node_modules/caniuse-lite": {
|
"node_modules/caniuse-lite": {
|
||||||
"version": "1.0.30001519",
|
"version": "1.0.30001629",
|
||||||
"resolved": "https://registry.npmjs.org/caniuse-lite/-/caniuse-lite-1.0.30001519.tgz",
|
"resolved": "https://registry.npmjs.org/caniuse-lite/-/caniuse-lite-1.0.30001629.tgz",
|
||||||
"integrity": "sha512-0QHgqR+Jv4bxHMp8kZ1Kn8CH55OikjKJ6JmKkZYP1F3D7w+lnFXF70nG5eNfsZS89jadi5Ywy5UCSKLAglIRkg==",
|
"integrity": "sha512-c3dl911slnQhmxUIT4HhYzT7wnBK/XYpGnYLOj4nJBaRiw52Ibe7YxlDaAeRECvA786zCuExhxIUJ2K7nHMrBw==",
|
||||||
"funding": [
|
"funding": [
|
||||||
{
|
{
|
||||||
"type": "opencollective",
|
"type": "opencollective",
|
||||||
|
|
|
@ -36,6 +36,7 @@ const sidebars = {
|
||||||
label: "📖 All Endpoints (Swagger)",
|
label: "📖 All Endpoints (Swagger)",
|
||||||
href: "https://litellm-api.up.railway.app/",
|
href: "https://litellm-api.up.railway.app/",
|
||||||
},
|
},
|
||||||
|
"proxy/enterprise",
|
||||||
"proxy/demo",
|
"proxy/demo",
|
||||||
"proxy/configs",
|
"proxy/configs",
|
||||||
"proxy/reliability",
|
"proxy/reliability",
|
||||||
|
@ -45,7 +46,6 @@ const sidebars = {
|
||||||
"proxy/customers",
|
"proxy/customers",
|
||||||
"proxy/billing",
|
"proxy/billing",
|
||||||
"proxy/user_keys",
|
"proxy/user_keys",
|
||||||
"proxy/enterprise",
|
|
||||||
"proxy/virtual_keys",
|
"proxy/virtual_keys",
|
||||||
"proxy/alerting",
|
"proxy/alerting",
|
||||||
{
|
{
|
||||||
|
|
File diff suppressed because it is too large
Load diff
|
@ -18,10 +18,6 @@ async def log_event(request: Request):
|
||||||
|
|
||||||
return {"message": "Request received successfully"}
|
return {"message": "Request received successfully"}
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print(f"Error processing request: {str(e)}")
|
|
||||||
import traceback
|
|
||||||
|
|
||||||
traceback.print_exc()
|
|
||||||
raise HTTPException(status_code=500, detail="Internal Server Error")
|
raise HTTPException(status_code=500, detail="Internal Server Error")
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -120,6 +120,5 @@ class GenericAPILogger:
|
||||||
)
|
)
|
||||||
return response
|
return response
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
traceback.print_exc()
|
verbose_logger.error(f"Generic - {str(e)}\n{traceback.format_exc()}")
|
||||||
verbose_logger.debug(f"Generic - {str(e)}\n{traceback.format_exc()}")
|
|
||||||
pass
|
pass
|
||||||
|
|
|
@ -82,7 +82,7 @@ class _ENTERPRISE_BannedKeywords(CustomLogger):
|
||||||
except HTTPException as e:
|
except HTTPException as e:
|
||||||
raise e
|
raise e
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
traceback.print_exc()
|
verbose_proxy_logger.error(traceback.format_exc())
|
||||||
|
|
||||||
async def async_post_call_success_hook(
|
async def async_post_call_success_hook(
|
||||||
self,
|
self,
|
||||||
|
|
|
@ -118,4 +118,4 @@ class _ENTERPRISE_BlockedUserList(CustomLogger):
|
||||||
except HTTPException as e:
|
except HTTPException as e:
|
||||||
raise e
|
raise e
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
traceback.print_exc()
|
verbose_proxy_logger.error(traceback.format_exc())
|
||||||
|
|
|
@ -92,7 +92,7 @@ class _ENTERPRISE_LLMGuard(CustomLogger):
|
||||||
},
|
},
|
||||||
)
|
)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
traceback.print_exc()
|
verbose_proxy_logger.error(traceback.format_exc())
|
||||||
raise e
|
raise e
|
||||||
|
|
||||||
def should_proceed(self, user_api_key_dict: UserAPIKeyAuth, data: dict) -> bool:
|
def should_proceed(self, user_api_key_dict: UserAPIKeyAuth, data: dict) -> bool:
|
||||||
|
|
|
@ -60,6 +60,7 @@ _async_failure_callback: List[Callable] = (
|
||||||
pre_call_rules: List[Callable] = []
|
pre_call_rules: List[Callable] = []
|
||||||
post_call_rules: List[Callable] = []
|
post_call_rules: List[Callable] = []
|
||||||
turn_off_message_logging: Optional[bool] = False
|
turn_off_message_logging: Optional[bool] = False
|
||||||
|
store_audit_logs = False # Enterprise feature, allow users to see audit logs
|
||||||
## end of callbacks #############
|
## end of callbacks #############
|
||||||
|
|
||||||
email: Optional[str] = (
|
email: Optional[str] = (
|
||||||
|
@ -808,6 +809,7 @@ from .exceptions import (
|
||||||
APIConnectionError,
|
APIConnectionError,
|
||||||
APIResponseValidationError,
|
APIResponseValidationError,
|
||||||
UnprocessableEntityError,
|
UnprocessableEntityError,
|
||||||
|
InternalServerError,
|
||||||
LITELLM_EXCEPTION_TYPES,
|
LITELLM_EXCEPTION_TYPES,
|
||||||
)
|
)
|
||||||
from .budget_manager import BudgetManager
|
from .budget_manager import BudgetManager
|
||||||
|
|
|
@ -1,5 +1,6 @@
|
||||||
import logging, os, json
|
import logging, os, json
|
||||||
from logging import Formatter
|
from logging import Formatter
|
||||||
|
import traceback
|
||||||
|
|
||||||
set_verbose = False
|
set_verbose = False
|
||||||
json_logs = bool(os.getenv("JSON_LOGS", False))
|
json_logs = bool(os.getenv("JSON_LOGS", False))
|
||||||
|
|
|
@ -253,7 +253,6 @@ class RedisCache(BaseCache):
|
||||||
str(e),
|
str(e),
|
||||||
value,
|
value,
|
||||||
)
|
)
|
||||||
traceback.print_exc()
|
|
||||||
raise e
|
raise e
|
||||||
|
|
||||||
async def async_scan_iter(self, pattern: str, count: int = 100) -> list:
|
async def async_scan_iter(self, pattern: str, count: int = 100) -> list:
|
||||||
|
@ -313,7 +312,6 @@ class RedisCache(BaseCache):
|
||||||
str(e),
|
str(e),
|
||||||
value,
|
value,
|
||||||
)
|
)
|
||||||
traceback.print_exc()
|
|
||||||
|
|
||||||
key = self.check_and_fix_namespace(key=key)
|
key = self.check_and_fix_namespace(key=key)
|
||||||
async with _redis_client as redis_client:
|
async with _redis_client as redis_client:
|
||||||
|
@ -352,7 +350,6 @@ class RedisCache(BaseCache):
|
||||||
str(e),
|
str(e),
|
||||||
value,
|
value,
|
||||||
)
|
)
|
||||||
traceback.print_exc()
|
|
||||||
|
|
||||||
async def async_set_cache_pipeline(self, cache_list, ttl=None):
|
async def async_set_cache_pipeline(self, cache_list, ttl=None):
|
||||||
"""
|
"""
|
||||||
|
@ -413,7 +410,6 @@ class RedisCache(BaseCache):
|
||||||
str(e),
|
str(e),
|
||||||
cache_value,
|
cache_value,
|
||||||
)
|
)
|
||||||
traceback.print_exc()
|
|
||||||
|
|
||||||
async def batch_cache_write(self, key, value, **kwargs):
|
async def batch_cache_write(self, key, value, **kwargs):
|
||||||
print_verbose(
|
print_verbose(
|
||||||
|
@ -458,7 +454,6 @@ class RedisCache(BaseCache):
|
||||||
str(e),
|
str(e),
|
||||||
value,
|
value,
|
||||||
)
|
)
|
||||||
traceback.print_exc()
|
|
||||||
raise e
|
raise e
|
||||||
|
|
||||||
async def flush_cache_buffer(self):
|
async def flush_cache_buffer(self):
|
||||||
|
@ -495,8 +490,9 @@ class RedisCache(BaseCache):
|
||||||
return self._get_cache_logic(cached_response=cached_response)
|
return self._get_cache_logic(cached_response=cached_response)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
# NON blocking - notify users Redis is throwing an exception
|
# NON blocking - notify users Redis is throwing an exception
|
||||||
traceback.print_exc()
|
verbose_logger.error(
|
||||||
logging.debug("LiteLLM Caching: get() - Got exception from REDIS: ", e)
|
"LiteLLM Caching: get() - Got exception from REDIS: ", e
|
||||||
|
)
|
||||||
|
|
||||||
def batch_get_cache(self, key_list) -> dict:
|
def batch_get_cache(self, key_list) -> dict:
|
||||||
"""
|
"""
|
||||||
|
@ -646,10 +642,9 @@ class RedisCache(BaseCache):
|
||||||
error=e,
|
error=e,
|
||||||
call_type="sync_ping",
|
call_type="sync_ping",
|
||||||
)
|
)
|
||||||
print_verbose(
|
verbose_logger.error(
|
||||||
f"LiteLLM Redis Cache PING: - Got exception from REDIS : {str(e)}"
|
f"LiteLLM Redis Cache PING: - Got exception from REDIS : {str(e)}"
|
||||||
)
|
)
|
||||||
traceback.print_exc()
|
|
||||||
raise e
|
raise e
|
||||||
|
|
||||||
async def ping(self) -> bool:
|
async def ping(self) -> bool:
|
||||||
|
@ -683,10 +678,9 @@ class RedisCache(BaseCache):
|
||||||
call_type="async_ping",
|
call_type="async_ping",
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
print_verbose(
|
verbose_logger.error(
|
||||||
f"LiteLLM Redis Cache PING: - Got exception from REDIS : {str(e)}"
|
f"LiteLLM Redis Cache PING: - Got exception from REDIS : {str(e)}"
|
||||||
)
|
)
|
||||||
traceback.print_exc()
|
|
||||||
raise e
|
raise e
|
||||||
|
|
||||||
async def delete_cache_keys(self, keys):
|
async def delete_cache_keys(self, keys):
|
||||||
|
@ -1138,22 +1132,23 @@ class S3Cache(BaseCache):
|
||||||
cached_response = ast.literal_eval(cached_response)
|
cached_response = ast.literal_eval(cached_response)
|
||||||
if type(cached_response) is not dict:
|
if type(cached_response) is not dict:
|
||||||
cached_response = dict(cached_response)
|
cached_response = dict(cached_response)
|
||||||
print_verbose(
|
verbose_logger.debug(
|
||||||
f"Got S3 Cache: key: {key}, cached_response {cached_response}. Type Response {type(cached_response)}"
|
f"Got S3 Cache: key: {key}, cached_response {cached_response}. Type Response {type(cached_response)}"
|
||||||
)
|
)
|
||||||
|
|
||||||
return cached_response
|
return cached_response
|
||||||
except botocore.exceptions.ClientError as e:
|
except botocore.exceptions.ClientError as e:
|
||||||
if e.response["Error"]["Code"] == "NoSuchKey":
|
if e.response["Error"]["Code"] == "NoSuchKey":
|
||||||
print_verbose(
|
verbose_logger.error(
|
||||||
f"S3 Cache: The specified key '{key}' does not exist in the S3 bucket."
|
f"S3 Cache: The specified key '{key}' does not exist in the S3 bucket."
|
||||||
)
|
)
|
||||||
return None
|
return None
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
# NON blocking - notify users S3 is throwing an exception
|
# NON blocking - notify users S3 is throwing an exception
|
||||||
traceback.print_exc()
|
verbose_logger.error(
|
||||||
print_verbose(f"S3 Caching: get_cache() - Got exception from S3: {e}")
|
f"S3 Caching: get_cache() - Got exception from S3: {e}"
|
||||||
|
)
|
||||||
|
|
||||||
async def async_get_cache(self, key, **kwargs):
|
async def async_get_cache(self, key, **kwargs):
|
||||||
return self.get_cache(key=key, **kwargs)
|
return self.get_cache(key=key, **kwargs)
|
||||||
|
@ -1234,8 +1229,7 @@ class DualCache(BaseCache):
|
||||||
|
|
||||||
return result
|
return result
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print_verbose(f"LiteLLM Cache: Excepton async add_cache: {str(e)}")
|
verbose_logger.error(f"LiteLLM Cache: Excepton async add_cache: {str(e)}")
|
||||||
traceback.print_exc()
|
|
||||||
raise e
|
raise e
|
||||||
|
|
||||||
def get_cache(self, key, local_only: bool = False, **kwargs):
|
def get_cache(self, key, local_only: bool = False, **kwargs):
|
||||||
|
@ -1262,7 +1256,7 @@ class DualCache(BaseCache):
|
||||||
print_verbose(f"get cache: cache result: {result}")
|
print_verbose(f"get cache: cache result: {result}")
|
||||||
return result
|
return result
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
traceback.print_exc()
|
verbose_logger.error(traceback.format_exc())
|
||||||
|
|
||||||
def batch_get_cache(self, keys: list, local_only: bool = False, **kwargs):
|
def batch_get_cache(self, keys: list, local_only: bool = False, **kwargs):
|
||||||
try:
|
try:
|
||||||
|
@ -1295,7 +1289,7 @@ class DualCache(BaseCache):
|
||||||
print_verbose(f"async batch get cache: cache result: {result}")
|
print_verbose(f"async batch get cache: cache result: {result}")
|
||||||
return result
|
return result
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
traceback.print_exc()
|
verbose_logger.error(traceback.format_exc())
|
||||||
|
|
||||||
async def async_get_cache(self, key, local_only: bool = False, **kwargs):
|
async def async_get_cache(self, key, local_only: bool = False, **kwargs):
|
||||||
# Try to fetch from in-memory cache first
|
# Try to fetch from in-memory cache first
|
||||||
|
@ -1328,7 +1322,7 @@ class DualCache(BaseCache):
|
||||||
print_verbose(f"get cache: cache result: {result}")
|
print_verbose(f"get cache: cache result: {result}")
|
||||||
return result
|
return result
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
traceback.print_exc()
|
verbose_logger.error(traceback.format_exc())
|
||||||
|
|
||||||
async def async_batch_get_cache(
|
async def async_batch_get_cache(
|
||||||
self, keys: list, local_only: bool = False, **kwargs
|
self, keys: list, local_only: bool = False, **kwargs
|
||||||
|
@ -1368,7 +1362,7 @@ class DualCache(BaseCache):
|
||||||
|
|
||||||
return result
|
return result
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
traceback.print_exc()
|
verbose_logger.error(traceback.format_exc())
|
||||||
|
|
||||||
async def async_set_cache(self, key, value, local_only: bool = False, **kwargs):
|
async def async_set_cache(self, key, value, local_only: bool = False, **kwargs):
|
||||||
print_verbose(
|
print_verbose(
|
||||||
|
@ -1381,8 +1375,8 @@ class DualCache(BaseCache):
|
||||||
if self.redis_cache is not None and local_only == False:
|
if self.redis_cache is not None and local_only == False:
|
||||||
await self.redis_cache.async_set_cache(key, value, **kwargs)
|
await self.redis_cache.async_set_cache(key, value, **kwargs)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print_verbose(f"LiteLLM Cache: Excepton async add_cache: {str(e)}")
|
verbose_logger.error(f"LiteLLM Cache: Excepton async add_cache: {str(e)}")
|
||||||
traceback.print_exc()
|
verbose_logger.debug(traceback.format_exc())
|
||||||
|
|
||||||
async def async_batch_set_cache(
|
async def async_batch_set_cache(
|
||||||
self, cache_list: list, local_only: bool = False, **kwargs
|
self, cache_list: list, local_only: bool = False, **kwargs
|
||||||
|
@ -1404,8 +1398,8 @@ class DualCache(BaseCache):
|
||||||
cache_list=cache_list, ttl=kwargs.get("ttl", None)
|
cache_list=cache_list, ttl=kwargs.get("ttl", None)
|
||||||
)
|
)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print_verbose(f"LiteLLM Cache: Excepton async add_cache: {str(e)}")
|
verbose_logger.error(f"LiteLLM Cache: Excepton async add_cache: {str(e)}")
|
||||||
traceback.print_exc()
|
verbose_logger.debug(traceback.format_exc())
|
||||||
|
|
||||||
async def async_increment_cache(
|
async def async_increment_cache(
|
||||||
self, key, value: float, local_only: bool = False, **kwargs
|
self, key, value: float, local_only: bool = False, **kwargs
|
||||||
|
@ -1429,8 +1423,8 @@ class DualCache(BaseCache):
|
||||||
|
|
||||||
return result
|
return result
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print_verbose(f"LiteLLM Cache: Excepton async add_cache: {str(e)}")
|
verbose_logger.error(f"LiteLLM Cache: Excepton async add_cache: {str(e)}")
|
||||||
traceback.print_exc()
|
verbose_logger.debug(traceback.format_exc())
|
||||||
raise e
|
raise e
|
||||||
|
|
||||||
def flush_cache(self):
|
def flush_cache(self):
|
||||||
|
@ -1846,8 +1840,8 @@ class Cache:
|
||||||
)
|
)
|
||||||
self.cache.set_cache(cache_key, cached_data, **kwargs)
|
self.cache.set_cache(cache_key, cached_data, **kwargs)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print_verbose(f"LiteLLM Cache: Excepton add_cache: {str(e)}")
|
verbose_logger.error(f"LiteLLM Cache: Excepton add_cache: {str(e)}")
|
||||||
traceback.print_exc()
|
verbose_logger.debug(traceback.format_exc())
|
||||||
pass
|
pass
|
||||||
|
|
||||||
async def async_add_cache(self, result, *args, **kwargs):
|
async def async_add_cache(self, result, *args, **kwargs):
|
||||||
|
@ -1864,8 +1858,8 @@ class Cache:
|
||||||
)
|
)
|
||||||
await self.cache.async_set_cache(cache_key, cached_data, **kwargs)
|
await self.cache.async_set_cache(cache_key, cached_data, **kwargs)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print_verbose(f"LiteLLM Cache: Excepton add_cache: {str(e)}")
|
verbose_logger.error(f"LiteLLM Cache: Excepton add_cache: {str(e)}")
|
||||||
traceback.print_exc()
|
verbose_logger.debug(traceback.format_exc())
|
||||||
|
|
||||||
async def async_add_cache_pipeline(self, result, *args, **kwargs):
|
async def async_add_cache_pipeline(self, result, *args, **kwargs):
|
||||||
"""
|
"""
|
||||||
|
@ -1897,8 +1891,8 @@ class Cache:
|
||||||
)
|
)
|
||||||
await asyncio.gather(*tasks)
|
await asyncio.gather(*tasks)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print_verbose(f"LiteLLM Cache: Excepton add_cache: {str(e)}")
|
verbose_logger.error(f"LiteLLM Cache: Excepton add_cache: {str(e)}")
|
||||||
traceback.print_exc()
|
verbose_logger.debug(traceback.format_exc())
|
||||||
|
|
||||||
async def batch_cache_write(self, result, *args, **kwargs):
|
async def batch_cache_write(self, result, *args, **kwargs):
|
||||||
cache_key, cached_data, kwargs = self._add_cache_logic(
|
cache_key, cached_data, kwargs = self._add_cache_logic(
|
||||||
|
|
|
@ -638,6 +638,7 @@ LITELLM_EXCEPTION_TYPES = [
|
||||||
APIConnectionError,
|
APIConnectionError,
|
||||||
APIResponseValidationError,
|
APIResponseValidationError,
|
||||||
OpenAIError,
|
OpenAIError,
|
||||||
|
InternalServerError,
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -169,6 +169,5 @@ class AISpendLogger:
|
||||||
|
|
||||||
print_verbose(f"AISpend Logging - final data object: {data}")
|
print_verbose(f"AISpend Logging - final data object: {data}")
|
||||||
except:
|
except:
|
||||||
# traceback.print_exc()
|
|
||||||
print_verbose(f"AISpend Logging Error - {traceback.format_exc()}")
|
print_verbose(f"AISpend Logging Error - {traceback.format_exc()}")
|
||||||
pass
|
pass
|
||||||
|
|
|
@ -178,6 +178,5 @@ class BerriSpendLogger:
|
||||||
print_verbose(f"BerriSpend Logging - final data object: {data}")
|
print_verbose(f"BerriSpend Logging - final data object: {data}")
|
||||||
response = requests.post(url, headers=headers, json=data)
|
response = requests.post(url, headers=headers, json=data)
|
||||||
except:
|
except:
|
||||||
# traceback.print_exc()
|
|
||||||
print_verbose(f"BerriSpend Logging Error - {traceback.format_exc()}")
|
print_verbose(f"BerriSpend Logging Error - {traceback.format_exc()}")
|
||||||
pass
|
pass
|
||||||
|
|
|
@ -297,6 +297,5 @@ class ClickhouseLogger:
|
||||||
# make request to endpoint with payload
|
# make request to endpoint with payload
|
||||||
verbose_logger.debug(f"Clickhouse Logger - final response = {response}")
|
verbose_logger.debug(f"Clickhouse Logger - final response = {response}")
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
traceback.print_exc()
|
|
||||||
verbose_logger.debug(f"Clickhouse - {str(e)}\n{traceback.format_exc()}")
|
verbose_logger.debug(f"Clickhouse - {str(e)}\n{traceback.format_exc()}")
|
||||||
pass
|
pass
|
||||||
|
|
|
@ -115,7 +115,6 @@ class CustomLogger: # https://docs.litellm.ai/docs/observability/custom_callbac
|
||||||
)
|
)
|
||||||
print_verbose(f"Custom Logger - model call details: {kwargs}")
|
print_verbose(f"Custom Logger - model call details: {kwargs}")
|
||||||
except:
|
except:
|
||||||
traceback.print_exc()
|
|
||||||
print_verbose(f"Custom Logger Error - {traceback.format_exc()}")
|
print_verbose(f"Custom Logger Error - {traceback.format_exc()}")
|
||||||
|
|
||||||
async def async_log_input_event(
|
async def async_log_input_event(
|
||||||
|
@ -130,7 +129,6 @@ class CustomLogger: # https://docs.litellm.ai/docs/observability/custom_callbac
|
||||||
)
|
)
|
||||||
print_verbose(f"Custom Logger - model call details: {kwargs}")
|
print_verbose(f"Custom Logger - model call details: {kwargs}")
|
||||||
except:
|
except:
|
||||||
traceback.print_exc()
|
|
||||||
print_verbose(f"Custom Logger Error - {traceback.format_exc()}")
|
print_verbose(f"Custom Logger Error - {traceback.format_exc()}")
|
||||||
|
|
||||||
def log_event(
|
def log_event(
|
||||||
|
@ -146,7 +144,6 @@ class CustomLogger: # https://docs.litellm.ai/docs/observability/custom_callbac
|
||||||
end_time,
|
end_time,
|
||||||
)
|
)
|
||||||
except:
|
except:
|
||||||
# traceback.print_exc()
|
|
||||||
print_verbose(f"Custom Logger Error - {traceback.format_exc()}")
|
print_verbose(f"Custom Logger Error - {traceback.format_exc()}")
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
@ -163,6 +160,5 @@ class CustomLogger: # https://docs.litellm.ai/docs/observability/custom_callbac
|
||||||
end_time,
|
end_time,
|
||||||
)
|
)
|
||||||
except:
|
except:
|
||||||
# traceback.print_exc()
|
|
||||||
print_verbose(f"Custom Logger Error - {traceback.format_exc()}")
|
print_verbose(f"Custom Logger Error - {traceback.format_exc()}")
|
||||||
pass
|
pass
|
||||||
|
|
|
@ -134,7 +134,6 @@ class DataDogLogger:
|
||||||
f"Datadog Layer Logging - final response object: {response_obj}"
|
f"Datadog Layer Logging - final response object: {response_obj}"
|
||||||
)
|
)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
traceback.print_exc()
|
|
||||||
verbose_logger.debug(
|
verbose_logger.debug(
|
||||||
f"Datadog Layer Error - {str(e)}\n{traceback.format_exc()}"
|
f"Datadog Layer Error - {str(e)}\n{traceback.format_exc()}"
|
||||||
)
|
)
|
||||||
|
|
|
@ -85,6 +85,5 @@ class DyanmoDBLogger:
|
||||||
)
|
)
|
||||||
return response
|
return response
|
||||||
except:
|
except:
|
||||||
traceback.print_exc()
|
|
||||||
print_verbose(f"DynamoDB Layer Error - {traceback.format_exc()}")
|
print_verbose(f"DynamoDB Layer Error - {traceback.format_exc()}")
|
||||||
pass
|
pass
|
||||||
|
|
|
@ -112,6 +112,5 @@ class HeliconeLogger:
|
||||||
)
|
)
|
||||||
print_verbose(f"Helicone Logging - Error {response.text}")
|
print_verbose(f"Helicone Logging - Error {response.text}")
|
||||||
except:
|
except:
|
||||||
# traceback.print_exc()
|
|
||||||
print_verbose(f"Helicone Logging Error - {traceback.format_exc()}")
|
print_verbose(f"Helicone Logging Error - {traceback.format_exc()}")
|
||||||
pass
|
pass
|
||||||
|
|
|
@ -69,6 +69,43 @@ class LangFuseLogger:
|
||||||
else:
|
else:
|
||||||
self.upstream_langfuse = None
|
self.upstream_langfuse = None
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def add_metadata_from_header(litellm_params: dict, metadata: dict) -> dict:
|
||||||
|
"""
|
||||||
|
Adds metadata from proxy request headers to Langfuse logging if keys start with "langfuse_"
|
||||||
|
and overwrites litellm_params.metadata if already included.
|
||||||
|
|
||||||
|
For example if you want to append your trace to an existing `trace_id` via header, send
|
||||||
|
`headers: { ..., langfuse_existing_trace_id: your-existing-trace-id }` via proxy request.
|
||||||
|
"""
|
||||||
|
if litellm_params is None:
|
||||||
|
return metadata
|
||||||
|
|
||||||
|
if litellm_params.get("proxy_server_request") is None:
|
||||||
|
return metadata
|
||||||
|
|
||||||
|
if metadata is None:
|
||||||
|
metadata = {}
|
||||||
|
|
||||||
|
proxy_headers = (
|
||||||
|
litellm_params.get("proxy_server_request", {}).get("headers", {}) or {}
|
||||||
|
)
|
||||||
|
|
||||||
|
for metadata_param_key in proxy_headers:
|
||||||
|
if metadata_param_key.startswith("langfuse_"):
|
||||||
|
trace_param_key = metadata_param_key.replace("langfuse_", "", 1)
|
||||||
|
if trace_param_key in metadata:
|
||||||
|
verbose_logger.warning(
|
||||||
|
f"Overwriting Langfuse `{trace_param_key}` from request header"
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
verbose_logger.debug(
|
||||||
|
f"Found Langfuse `{trace_param_key}` in request header"
|
||||||
|
)
|
||||||
|
metadata[trace_param_key] = proxy_headers.get(metadata_param_key)
|
||||||
|
|
||||||
|
return metadata
|
||||||
|
|
||||||
# def log_error(kwargs, response_obj, start_time, end_time):
|
# def log_error(kwargs, response_obj, start_time, end_time):
|
||||||
# generation = trace.generation(
|
# generation = trace.generation(
|
||||||
# level ="ERROR" # can be any of DEBUG, DEFAULT, WARNING or ERROR
|
# level ="ERROR" # can be any of DEBUG, DEFAULT, WARNING or ERROR
|
||||||
|
@ -97,6 +134,7 @@ class LangFuseLogger:
|
||||||
metadata = (
|
metadata = (
|
||||||
litellm_params.get("metadata", {}) or {}
|
litellm_params.get("metadata", {}) or {}
|
||||||
) # if litellm_params['metadata'] == None
|
) # if litellm_params['metadata'] == None
|
||||||
|
metadata = self.add_metadata_from_header(litellm_params, metadata)
|
||||||
optional_params = copy.deepcopy(kwargs.get("optional_params", {}))
|
optional_params = copy.deepcopy(kwargs.get("optional_params", {}))
|
||||||
|
|
||||||
prompt = {"messages": kwargs.get("messages")}
|
prompt = {"messages": kwargs.get("messages")}
|
||||||
|
@ -182,9 +220,11 @@ class LangFuseLogger:
|
||||||
verbose_logger.info(f"Langfuse Layer Logging - logging success")
|
verbose_logger.info(f"Langfuse Layer Logging - logging success")
|
||||||
|
|
||||||
return {"trace_id": trace_id, "generation_id": generation_id}
|
return {"trace_id": trace_id, "generation_id": generation_id}
|
||||||
except:
|
except Exception as e:
|
||||||
traceback.print_exc()
|
verbose_logger.error(
|
||||||
verbose_logger.debug(f"Langfuse Layer Error - {traceback.format_exc()}")
|
"Langfuse Layer Error(): Exception occured - {}".format(str(e))
|
||||||
|
)
|
||||||
|
verbose_logger.debug(traceback.format_exc())
|
||||||
return {"trace_id": None, "generation_id": None}
|
return {"trace_id": None, "generation_id": None}
|
||||||
|
|
||||||
async def _async_log_event(
|
async def _async_log_event(
|
||||||
|
|
|
@ -44,7 +44,9 @@ class LangsmithLogger:
|
||||||
print_verbose(
|
print_verbose(
|
||||||
f"Langsmith Logging - project_name: {project_name}, run_name {run_name}"
|
f"Langsmith Logging - project_name: {project_name}, run_name {run_name}"
|
||||||
)
|
)
|
||||||
langsmith_base_url = os.getenv("LANGSMITH_BASE_URL", "https://api.smith.langchain.com")
|
langsmith_base_url = os.getenv(
|
||||||
|
"LANGSMITH_BASE_URL", "https://api.smith.langchain.com"
|
||||||
|
)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
print_verbose(
|
print_verbose(
|
||||||
|
@ -89,9 +91,7 @@ class LangsmithLogger:
|
||||||
}
|
}
|
||||||
|
|
||||||
url = f"{langsmith_base_url}/runs"
|
url = f"{langsmith_base_url}/runs"
|
||||||
print_verbose(
|
print_verbose(f"Langsmith Logging - About to send data to {url} ...")
|
||||||
f"Langsmith Logging - About to send data to {url} ..."
|
|
||||||
)
|
|
||||||
response = requests.post(
|
response = requests.post(
|
||||||
url=url,
|
url=url,
|
||||||
json=data,
|
json=data,
|
||||||
|
@ -106,6 +106,5 @@ class LangsmithLogger:
|
||||||
f"Langsmith Layer Logging - final response object: {response_obj}"
|
f"Langsmith Layer Logging - final response object: {response_obj}"
|
||||||
)
|
)
|
||||||
except:
|
except:
|
||||||
# traceback.print_exc()
|
|
||||||
print_verbose(f"Langsmith Layer Error - {traceback.format_exc()}")
|
print_verbose(f"Langsmith Layer Error - {traceback.format_exc()}")
|
||||||
pass
|
pass
|
||||||
|
|
|
@ -171,7 +171,6 @@ class LogfireLogger:
|
||||||
f"Logfire Layer Logging - final response object: {response_obj}"
|
f"Logfire Layer Logging - final response object: {response_obj}"
|
||||||
)
|
)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
traceback.print_exc()
|
|
||||||
verbose_logger.debug(
|
verbose_logger.debug(
|
||||||
f"Logfire Layer Error - {str(e)}\n{traceback.format_exc()}"
|
f"Logfire Layer Error - {str(e)}\n{traceback.format_exc()}"
|
||||||
)
|
)
|
||||||
|
|
|
@ -14,6 +14,7 @@ def parse_usage(usage):
|
||||||
"prompt": usage["prompt_tokens"] if "prompt_tokens" in usage else 0,
|
"prompt": usage["prompt_tokens"] if "prompt_tokens" in usage else 0,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
def parse_tool_calls(tool_calls):
|
def parse_tool_calls(tool_calls):
|
||||||
if tool_calls is None:
|
if tool_calls is None:
|
||||||
return None
|
return None
|
||||||
|
@ -26,7 +27,7 @@ def parse_tool_calls(tool_calls):
|
||||||
"function": {
|
"function": {
|
||||||
"name": tool_call.function.name,
|
"name": tool_call.function.name,
|
||||||
"arguments": tool_call.function.arguments,
|
"arguments": tool_call.function.arguments,
|
||||||
}
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
return serialized
|
return serialized
|
||||||
|
@ -176,6 +177,5 @@ class LunaryLogger:
|
||||||
)
|
)
|
||||||
|
|
||||||
except:
|
except:
|
||||||
# traceback.print_exc()
|
|
||||||
print_verbose(f"Lunary Logging Error - {traceback.format_exc()}")
|
print_verbose(f"Lunary Logging Error - {traceback.format_exc()}")
|
||||||
pass
|
pass
|
||||||
|
|
|
@ -109,8 +109,8 @@ class PrometheusLogger:
|
||||||
end_user_id, user_api_key, model, user_api_team, user_id
|
end_user_id, user_api_key, model, user_api_team, user_id
|
||||||
).inc()
|
).inc()
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
traceback.print_exc()
|
verbose_logger.error(
|
||||||
verbose_logger.debug(
|
"prometheus Layer Error(): Exception occured - {}".format(str(e))
|
||||||
f"prometheus Layer Error - {str(e)}\n{traceback.format_exc()}"
|
|
||||||
)
|
)
|
||||||
|
verbose_logger.debug(traceback.format_exc())
|
||||||
pass
|
pass
|
||||||
|
|
|
@ -180,6 +180,5 @@ class S3Logger:
|
||||||
print_verbose(f"s3 Layer Logging - final response object: {response_obj}")
|
print_verbose(f"s3 Layer Logging - final response object: {response_obj}")
|
||||||
return response
|
return response
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
traceback.print_exc()
|
|
||||||
verbose_logger.debug(f"s3 Layer Error - {str(e)}\n{traceback.format_exc()}")
|
verbose_logger.debug(f"s3 Layer Error - {str(e)}\n{traceback.format_exc()}")
|
||||||
pass
|
pass
|
||||||
|
|
|
@ -110,6 +110,5 @@ class Supabase:
|
||||||
)
|
)
|
||||||
|
|
||||||
except:
|
except:
|
||||||
# traceback.print_exc()
|
|
||||||
print_verbose(f"Supabase Logging Error - {traceback.format_exc()}")
|
print_verbose(f"Supabase Logging Error - {traceback.format_exc()}")
|
||||||
pass
|
pass
|
||||||
|
|
|
@ -217,6 +217,5 @@ class WeightsBiasesLogger:
|
||||||
f"W&B Logging Logging - final response object: {response_obj}"
|
f"W&B Logging Logging - final response object: {response_obj}"
|
||||||
)
|
)
|
||||||
except:
|
except:
|
||||||
# traceback.print_exc()
|
|
||||||
print_verbose(f"W&B Logging Layer Error - {traceback.format_exc()}")
|
print_verbose(f"W&B Logging Layer Error - {traceback.format_exc()}")
|
||||||
pass
|
pass
|
||||||
|
|
|
@ -1,13 +1,14 @@
|
||||||
import os, types, traceback, copy, asyncio
|
import types
|
||||||
import json
|
import traceback
|
||||||
from enum import Enum
|
import copy
|
||||||
import time
|
import time
|
||||||
from typing import Callable, Optional
|
from typing import Callable, Optional
|
||||||
from litellm.utils import ModelResponse, get_secret, Choices, Message, Usage
|
from litellm.utils import ModelResponse, Choices, Message, Usage
|
||||||
import litellm
|
import litellm
|
||||||
import sys, httpx
|
import httpx
|
||||||
from .prompt_templates.factory import prompt_factory, custom_prompt, get_system_prompt
|
from .prompt_templates.factory import prompt_factory, custom_prompt, get_system_prompt
|
||||||
from packaging.version import Version
|
from packaging.version import Version
|
||||||
|
from litellm import verbose_logger
|
||||||
|
|
||||||
|
|
||||||
class GeminiError(Exception):
|
class GeminiError(Exception):
|
||||||
|
@ -264,7 +265,8 @@ def completion(
|
||||||
choices_list.append(choice_obj)
|
choices_list.append(choice_obj)
|
||||||
model_response["choices"] = choices_list
|
model_response["choices"] = choices_list
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
traceback.print_exc()
|
verbose_logger.error("LiteLLM.gemini.py: Exception occured - {}".format(str(e)))
|
||||||
|
verbose_logger.debug(traceback.format_exc())
|
||||||
raise GeminiError(
|
raise GeminiError(
|
||||||
message=traceback.format_exc(), status_code=response.status_code
|
message=traceback.format_exc(), status_code=response.status_code
|
||||||
)
|
)
|
||||||
|
@ -356,7 +358,8 @@ async def async_completion(
|
||||||
choices_list.append(choice_obj)
|
choices_list.append(choice_obj)
|
||||||
model_response["choices"] = choices_list
|
model_response["choices"] = choices_list
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
traceback.print_exc()
|
verbose_logger.error("LiteLLM.gemini.py: Exception occured - {}".format(str(e)))
|
||||||
|
verbose_logger.debug(traceback.format_exc())
|
||||||
raise GeminiError(
|
raise GeminiError(
|
||||||
message=traceback.format_exc(), status_code=response.status_code
|
message=traceback.format_exc(), status_code=response.status_code
|
||||||
)
|
)
|
||||||
|
|
|
@ -2,10 +2,12 @@ from itertools import chain
|
||||||
import requests, types, time # type: ignore
|
import requests, types, time # type: ignore
|
||||||
import json, uuid
|
import json, uuid
|
||||||
import traceback
|
import traceback
|
||||||
from typing import Optional
|
from typing import Optional, List
|
||||||
import litellm
|
import litellm
|
||||||
|
from litellm.types.utils import ProviderField
|
||||||
import httpx, aiohttp, asyncio # type: ignore
|
import httpx, aiohttp, asyncio # type: ignore
|
||||||
from .prompt_templates.factory import prompt_factory, custom_prompt
|
from .prompt_templates.factory import prompt_factory, custom_prompt
|
||||||
|
from litellm import verbose_logger
|
||||||
|
|
||||||
|
|
||||||
class OllamaError(Exception):
|
class OllamaError(Exception):
|
||||||
|
@ -124,6 +126,19 @@ class OllamaConfig:
|
||||||
)
|
)
|
||||||
and v is not None
|
and v is not None
|
||||||
}
|
}
|
||||||
|
|
||||||
|
def get_required_params(self) -> List[ProviderField]:
|
||||||
|
"""For a given provider, return it's required fields with a description"""
|
||||||
|
return [
|
||||||
|
ProviderField(
|
||||||
|
field_name="base_url",
|
||||||
|
field_type="string",
|
||||||
|
field_description="Your Ollama API Base",
|
||||||
|
field_value="http://10.10.11.249:11434",
|
||||||
|
)
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
def get_supported_openai_params(
|
def get_supported_openai_params(
|
||||||
self,
|
self,
|
||||||
):
|
):
|
||||||
|
@ -138,10 +153,12 @@ class OllamaConfig:
|
||||||
"response_format",
|
"response_format",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|
||||||
# ollama wants plain base64 jpeg/png files as images. strip any leading dataURI
|
# ollama wants plain base64 jpeg/png files as images. strip any leading dataURI
|
||||||
# and convert to jpeg if necessary.
|
# and convert to jpeg if necessary.
|
||||||
def _convert_image(image):
|
def _convert_image(image):
|
||||||
import base64, io
|
import base64, io
|
||||||
|
|
||||||
try:
|
try:
|
||||||
from PIL import Image
|
from PIL import Image
|
||||||
except:
|
except:
|
||||||
|
@ -391,7 +408,13 @@ async def ollama_async_streaming(url, data, model_response, encoding, logging_ob
|
||||||
async for transformed_chunk in streamwrapper:
|
async for transformed_chunk in streamwrapper:
|
||||||
yield transformed_chunk
|
yield transformed_chunk
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
traceback.print_exc()
|
verbose_logger.error(
|
||||||
|
"LiteLLM.ollama.py::ollama_async_streaming(): Exception occured - {}".format(
|
||||||
|
str(e)
|
||||||
|
)
|
||||||
|
)
|
||||||
|
verbose_logger.debug(traceback.format_exc())
|
||||||
|
|
||||||
raise e
|
raise e
|
||||||
|
|
||||||
|
|
||||||
|
@ -455,7 +478,12 @@ async def ollama_acompletion(url, data, model_response, encoding, logging_obj):
|
||||||
)
|
)
|
||||||
return model_response
|
return model_response
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
traceback.print_exc()
|
verbose_logger.error(
|
||||||
|
"LiteLLM.ollama.py::ollama_acompletion(): Exception occured - {}".format(
|
||||||
|
str(e)
|
||||||
|
)
|
||||||
|
)
|
||||||
|
verbose_logger.debug(traceback.format_exc())
|
||||||
raise e
|
raise e
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -1,11 +1,15 @@
|
||||||
from itertools import chain
|
from itertools import chain
|
||||||
import requests, types, time
|
import requests
|
||||||
import json, uuid
|
import types
|
||||||
|
import time
|
||||||
|
import json
|
||||||
|
import uuid
|
||||||
import traceback
|
import traceback
|
||||||
from typing import Optional
|
from typing import Optional
|
||||||
|
from litellm import verbose_logger
|
||||||
import litellm
|
import litellm
|
||||||
import httpx, aiohttp, asyncio
|
import httpx
|
||||||
from .prompt_templates.factory import prompt_factory, custom_prompt
|
import aiohttp
|
||||||
|
|
||||||
|
|
||||||
class OllamaError(Exception):
|
class OllamaError(Exception):
|
||||||
|
@ -299,7 +303,10 @@ def get_ollama_response(
|
||||||
tool_calls=[
|
tool_calls=[
|
||||||
{
|
{
|
||||||
"id": f"call_{str(uuid.uuid4())}",
|
"id": f"call_{str(uuid.uuid4())}",
|
||||||
"function": {"name": function_call["name"], "arguments": json.dumps(function_call["arguments"])},
|
"function": {
|
||||||
|
"name": function_call["name"],
|
||||||
|
"arguments": json.dumps(function_call["arguments"]),
|
||||||
|
},
|
||||||
"type": "function",
|
"type": "function",
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
|
@ -307,7 +314,9 @@ def get_ollama_response(
|
||||||
model_response["choices"][0]["message"] = message
|
model_response["choices"][0]["message"] = message
|
||||||
model_response["choices"][0]["finish_reason"] = "tool_calls"
|
model_response["choices"][0]["finish_reason"] = "tool_calls"
|
||||||
else:
|
else:
|
||||||
model_response["choices"][0]["message"]["content"] = response_json["message"]["content"]
|
model_response["choices"][0]["message"]["content"] = response_json["message"][
|
||||||
|
"content"
|
||||||
|
]
|
||||||
model_response["created"] = int(time.time())
|
model_response["created"] = int(time.time())
|
||||||
model_response["model"] = "ollama/" + model
|
model_response["model"] = "ollama/" + model
|
||||||
prompt_tokens = response_json.get("prompt_eval_count", litellm.token_counter(messages=messages)) # type: ignore
|
prompt_tokens = response_json.get("prompt_eval_count", litellm.token_counter(messages=messages)) # type: ignore
|
||||||
|
@ -361,7 +370,10 @@ def ollama_completion_stream(url, api_key, data, logging_obj):
|
||||||
tool_calls=[
|
tool_calls=[
|
||||||
{
|
{
|
||||||
"id": f"call_{str(uuid.uuid4())}",
|
"id": f"call_{str(uuid.uuid4())}",
|
||||||
"function": {"name": function_call["name"], "arguments": json.dumps(function_call["arguments"])},
|
"function": {
|
||||||
|
"name": function_call["name"],
|
||||||
|
"arguments": json.dumps(function_call["arguments"]),
|
||||||
|
},
|
||||||
"type": "function",
|
"type": "function",
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
|
@ -410,9 +422,10 @@ async def ollama_async_streaming(
|
||||||
first_chunk_content = first_chunk.choices[0].delta.content or ""
|
first_chunk_content = first_chunk.choices[0].delta.content or ""
|
||||||
response_content = first_chunk_content + "".join(
|
response_content = first_chunk_content + "".join(
|
||||||
[
|
[
|
||||||
chunk.choices[0].delta.content
|
chunk.choices[0].delta.content
|
||||||
async for chunk in streamwrapper
|
async for chunk in streamwrapper
|
||||||
if chunk.choices[0].delta.content]
|
if chunk.choices[0].delta.content
|
||||||
|
]
|
||||||
)
|
)
|
||||||
function_call = json.loads(response_content)
|
function_call = json.loads(response_content)
|
||||||
delta = litellm.utils.Delta(
|
delta = litellm.utils.Delta(
|
||||||
|
@ -420,7 +433,10 @@ async def ollama_async_streaming(
|
||||||
tool_calls=[
|
tool_calls=[
|
||||||
{
|
{
|
||||||
"id": f"call_{str(uuid.uuid4())}",
|
"id": f"call_{str(uuid.uuid4())}",
|
||||||
"function": {"name": function_call["name"], "arguments": json.dumps(function_call["arguments"])},
|
"function": {
|
||||||
|
"name": function_call["name"],
|
||||||
|
"arguments": json.dumps(function_call["arguments"]),
|
||||||
|
},
|
||||||
"type": "function",
|
"type": "function",
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
|
@ -433,7 +449,8 @@ async def ollama_async_streaming(
|
||||||
async for transformed_chunk in streamwrapper:
|
async for transformed_chunk in streamwrapper:
|
||||||
yield transformed_chunk
|
yield transformed_chunk
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
traceback.print_exc()
|
verbose_logger.error("LiteLLM.gemini(): Exception occured - {}".format(str(e)))
|
||||||
|
verbose_logger.debug(traceback.format_exc())
|
||||||
|
|
||||||
|
|
||||||
async def ollama_acompletion(
|
async def ollama_acompletion(
|
||||||
|
@ -483,7 +500,10 @@ async def ollama_acompletion(
|
||||||
tool_calls=[
|
tool_calls=[
|
||||||
{
|
{
|
||||||
"id": f"call_{str(uuid.uuid4())}",
|
"id": f"call_{str(uuid.uuid4())}",
|
||||||
"function": {"name": function_call["name"], "arguments": json.dumps(function_call["arguments"])},
|
"function": {
|
||||||
|
"name": function_call["name"],
|
||||||
|
"arguments": json.dumps(function_call["arguments"]),
|
||||||
|
},
|
||||||
"type": "function",
|
"type": "function",
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
|
@ -491,7 +511,9 @@ async def ollama_acompletion(
|
||||||
model_response["choices"][0]["message"] = message
|
model_response["choices"][0]["message"] = message
|
||||||
model_response["choices"][0]["finish_reason"] = "tool_calls"
|
model_response["choices"][0]["finish_reason"] = "tool_calls"
|
||||||
else:
|
else:
|
||||||
model_response["choices"][0]["message"]["content"] = response_json["message"]["content"]
|
model_response["choices"][0]["message"]["content"] = response_json[
|
||||||
|
"message"
|
||||||
|
]["content"]
|
||||||
|
|
||||||
model_response["created"] = int(time.time())
|
model_response["created"] = int(time.time())
|
||||||
model_response["model"] = "ollama_chat/" + data["model"]
|
model_response["model"] = "ollama_chat/" + data["model"]
|
||||||
|
@ -509,5 +531,9 @@ async def ollama_acompletion(
|
||||||
)
|
)
|
||||||
return model_response
|
return model_response
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
traceback.print_exc()
|
verbose_logger.error(
|
||||||
|
"LiteLLM.ollama_acompletion(): Exception occured - {}".format(str(e))
|
||||||
|
)
|
||||||
|
verbose_logger.debug(traceback.format_exc())
|
||||||
|
|
||||||
raise e
|
raise e
|
||||||
|
|
|
@ -1,11 +1,12 @@
|
||||||
import os, types, traceback, copy
|
import types
|
||||||
import json
|
import traceback
|
||||||
from enum import Enum
|
import copy
|
||||||
import time
|
import time
|
||||||
from typing import Callable, Optional
|
from typing import Callable, Optional
|
||||||
from litellm.utils import ModelResponse, get_secret, Choices, Message, Usage
|
from litellm.utils import ModelResponse, Choices, Message, Usage
|
||||||
import litellm
|
import litellm
|
||||||
import sys, httpx
|
import httpx
|
||||||
|
from litellm import verbose_logger
|
||||||
|
|
||||||
|
|
||||||
class PalmError(Exception):
|
class PalmError(Exception):
|
||||||
|
@ -165,7 +166,10 @@ def completion(
|
||||||
choices_list.append(choice_obj)
|
choices_list.append(choice_obj)
|
||||||
model_response["choices"] = choices_list
|
model_response["choices"] = choices_list
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
traceback.print_exc()
|
verbose_logger.error(
|
||||||
|
"litellm.llms.palm.py::completion(): Exception occured - {}".format(str(e))
|
||||||
|
)
|
||||||
|
verbose_logger.debug(traceback.format_exc())
|
||||||
raise PalmError(
|
raise PalmError(
|
||||||
message=traceback.format_exc(), status_code=response.status_code
|
message=traceback.format_exc(), status_code=response.status_code
|
||||||
)
|
)
|
||||||
|
|
|
@ -826,7 +826,7 @@ def anthropic_messages_pt_xml(messages: list):
|
||||||
) # either string or none
|
) # either string or none
|
||||||
if messages[msg_i].get(
|
if messages[msg_i].get(
|
||||||
"tool_calls", []
|
"tool_calls", []
|
||||||
): # support assistant tool invoke convertion
|
): # support assistant tool invoke conversion
|
||||||
assistant_text += convert_to_anthropic_tool_invoke_xml( # type: ignore
|
assistant_text += convert_to_anthropic_tool_invoke_xml( # type: ignore
|
||||||
messages[msg_i]["tool_calls"]
|
messages[msg_i]["tool_calls"]
|
||||||
)
|
)
|
||||||
|
@ -1217,7 +1217,7 @@ def anthropic_messages_pt(messages: list):
|
||||||
|
|
||||||
if messages[msg_i].get(
|
if messages[msg_i].get(
|
||||||
"tool_calls", []
|
"tool_calls", []
|
||||||
): # support assistant tool invoke convertion
|
): # support assistant tool invoke conversion
|
||||||
assistant_content.extend(
|
assistant_content.extend(
|
||||||
convert_to_anthropic_tool_invoke(messages[msg_i]["tool_calls"])
|
convert_to_anthropic_tool_invoke(messages[msg_i]["tool_calls"])
|
||||||
)
|
)
|
||||||
|
|
|
@ -297,24 +297,29 @@ def _convert_gemini_role(role: str) -> Literal["user", "model"]:
|
||||||
|
|
||||||
def _process_gemini_image(image_url: str) -> PartType:
|
def _process_gemini_image(image_url: str) -> PartType:
|
||||||
try:
|
try:
|
||||||
if "gs://" in image_url:
|
if ".mp4" in image_url and "gs://" in image_url:
|
||||||
# Case 1: Images with Cloud Storage URIs
|
# Case 1: Videos with Cloud Storage URIs
|
||||||
|
part_mime = "video/mp4"
|
||||||
|
_file_data = FileDataType(mime_type=part_mime, file_uri=image_url)
|
||||||
|
return PartType(file_data=_file_data)
|
||||||
|
elif ".pdf" in image_url and "gs://" in image_url:
|
||||||
|
# Case 2: PDF's with Cloud Storage URIs
|
||||||
|
part_mime = "application/pdf"
|
||||||
|
_file_data = FileDataType(mime_type=part_mime, file_uri=image_url)
|
||||||
|
return PartType(file_data=_file_data)
|
||||||
|
elif "gs://" in image_url:
|
||||||
|
# Case 3: Images with Cloud Storage URIs
|
||||||
# The supported MIME types for images include image/png and image/jpeg.
|
# The supported MIME types for images include image/png and image/jpeg.
|
||||||
part_mime = "image/png" if "png" in image_url else "image/jpeg"
|
part_mime = "image/png" if "png" in image_url else "image/jpeg"
|
||||||
_file_data = FileDataType(mime_type=part_mime, file_uri=image_url)
|
_file_data = FileDataType(mime_type=part_mime, file_uri=image_url)
|
||||||
return PartType(file_data=_file_data)
|
return PartType(file_data=_file_data)
|
||||||
elif "https:/" in image_url:
|
elif "https:/" in image_url:
|
||||||
# Case 2: Images with direct links
|
# Case 4: Images with direct links
|
||||||
image = _load_image_from_url(image_url)
|
image = _load_image_from_url(image_url)
|
||||||
_blob = BlobType(data=image.data, mime_type=image._mime_type)
|
_blob = BlobType(data=image.data, mime_type=image._mime_type)
|
||||||
return PartType(inline_data=_blob)
|
return PartType(inline_data=_blob)
|
||||||
elif ".mp4" in image_url and "gs://" in image_url:
|
|
||||||
# Case 3: Videos with Cloud Storage URIs
|
|
||||||
part_mime = "video/mp4"
|
|
||||||
_file_data = FileDataType(mime_type=part_mime, file_uri=image_url)
|
|
||||||
return PartType(file_data=_file_data)
|
|
||||||
elif "base64" in image_url:
|
elif "base64" in image_url:
|
||||||
# Case 4: Images with base64 encoding
|
# Case 5: Images with base64 encoding
|
||||||
import base64, re
|
import base64, re
|
||||||
|
|
||||||
# base 64 is passed as data:image/jpeg;base64,<base-64-encoded-image>
|
# base 64 is passed as data:image/jpeg;base64,<base-64-encoded-image>
|
||||||
|
@ -390,7 +395,7 @@ def _gemini_convert_messages_with_history(messages: list) -> List[ContentType]:
|
||||||
assistant_content.extend(_parts)
|
assistant_content.extend(_parts)
|
||||||
elif messages[msg_i].get(
|
elif messages[msg_i].get(
|
||||||
"tool_calls", []
|
"tool_calls", []
|
||||||
): # support assistant tool invoke convertion
|
): # support assistant tool invoke conversion
|
||||||
assistant_content.extend(
|
assistant_content.extend(
|
||||||
convert_to_gemini_tool_call_invoke(messages[msg_i]["tool_calls"])
|
convert_to_gemini_tool_call_invoke(messages[msg_i]["tool_calls"])
|
||||||
)
|
)
|
||||||
|
@ -642,9 +647,9 @@ def completion(
|
||||||
|
|
||||||
prompt = " ".join(
|
prompt = " ".join(
|
||||||
[
|
[
|
||||||
message["content"]
|
message.get("content")
|
||||||
for message in messages
|
for message in messages
|
||||||
if isinstance(message["content"], str)
|
if isinstance(message.get("content", None), str)
|
||||||
]
|
]
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
|
@ -365,7 +365,10 @@ async def acompletion(
|
||||||
) # sets the logging event loop if the user does sync streaming (e.g. on proxy for sagemaker calls)
|
) # sets the logging event loop if the user does sync streaming (e.g. on proxy for sagemaker calls)
|
||||||
return response
|
return response
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
traceback.print_exc()
|
verbose_logger.error(
|
||||||
|
"litellm.acompletion(): Exception occured - {}".format(str(e))
|
||||||
|
)
|
||||||
|
verbose_logger.debug(traceback.format_exc())
|
||||||
custom_llm_provider = custom_llm_provider or "openai"
|
custom_llm_provider = custom_llm_provider or "openai"
|
||||||
raise exception_type(
|
raise exception_type(
|
||||||
model=model,
|
model=model,
|
||||||
|
@ -478,7 +481,10 @@ def mock_completion(
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
if isinstance(e, openai.APIError):
|
if isinstance(e, openai.APIError):
|
||||||
raise e
|
raise e
|
||||||
traceback.print_exc()
|
verbose_logger.error(
|
||||||
|
"litellm.mock_completion(): Exception occured - {}".format(str(e))
|
||||||
|
)
|
||||||
|
verbose_logger.debug(traceback.format_exc())
|
||||||
raise Exception("Mock completion response failed")
|
raise Exception("Mock completion response failed")
|
||||||
|
|
||||||
|
|
||||||
|
@ -4449,7 +4455,10 @@ async def ahealth_check(
|
||||||
response = {} # args like remaining ratelimit etc.
|
response = {} # args like remaining ratelimit etc.
|
||||||
return response
|
return response
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
traceback.print_exc()
|
verbose_logger.error(
|
||||||
|
"litellm.ahealth_check(): Exception occured - {}".format(str(e))
|
||||||
|
)
|
||||||
|
verbose_logger.debug(traceback.format_exc())
|
||||||
stack_trace = traceback.format_exc()
|
stack_trace = traceback.format_exc()
|
||||||
if isinstance(stack_trace, str):
|
if isinstance(stack_trace, str):
|
||||||
stack_trace = stack_trace[:1000]
|
stack_trace = stack_trace[:1000]
|
||||||
|
|
|
@ -1,6 +1,7 @@
|
||||||
import json
|
import json
|
||||||
import logging
|
import logging
|
||||||
from logging import Formatter
|
from logging import Formatter
|
||||||
|
import sys
|
||||||
|
|
||||||
|
|
||||||
class JsonFormatter(Formatter):
|
class JsonFormatter(Formatter):
|
||||||
|
|
|
@ -56,8 +56,10 @@ router_settings:
|
||||||
|
|
||||||
litellm_settings:
|
litellm_settings:
|
||||||
success_callback: ["langfuse"]
|
success_callback: ["langfuse"]
|
||||||
json_logs: true
|
|
||||||
|
|
||||||
general_settings:
|
general_settings:
|
||||||
alerting: ["email"]
|
alerting: ["email"]
|
||||||
|
key_management_system: "aws_kms"
|
||||||
|
key_management_settings:
|
||||||
|
hosted_keys: ["LITELLM_MASTER_KEY"]
|
||||||
|
|
||||||
|
|
|
@ -76,6 +76,17 @@ class LitellmUserRoles(str, enum.Enum):
|
||||||
return ui_labels.get(self.value, "")
|
return ui_labels.get(self.value, "")
|
||||||
|
|
||||||
|
|
||||||
|
class LitellmTableNames(str, enum.Enum):
|
||||||
|
"""
|
||||||
|
Enum for Table Names used by LiteLLM
|
||||||
|
"""
|
||||||
|
|
||||||
|
TEAM_TABLE_NAME: str = "LiteLLM_TeamTable"
|
||||||
|
USER_TABLE_NAME: str = "LiteLLM_UserTable"
|
||||||
|
KEY_TABLE_NAME: str = "LiteLLM_VerificationToken"
|
||||||
|
PROXY_MODEL_TABLE_NAME: str = "LiteLLM_ModelTable"
|
||||||
|
|
||||||
|
|
||||||
AlertType = Literal[
|
AlertType = Literal[
|
||||||
"llm_exceptions",
|
"llm_exceptions",
|
||||||
"llm_too_slow",
|
"llm_too_slow",
|
||||||
|
@ -935,6 +946,7 @@ class KeyManagementSystem(enum.Enum):
|
||||||
AZURE_KEY_VAULT = "azure_key_vault"
|
AZURE_KEY_VAULT = "azure_key_vault"
|
||||||
AWS_SECRET_MANAGER = "aws_secret_manager"
|
AWS_SECRET_MANAGER = "aws_secret_manager"
|
||||||
LOCAL = "local"
|
LOCAL = "local"
|
||||||
|
AWS_KMS = "aws_kms"
|
||||||
|
|
||||||
|
|
||||||
class KeyManagementSettings(LiteLLMBase):
|
class KeyManagementSettings(LiteLLMBase):
|
||||||
|
@ -1276,6 +1288,22 @@ class LiteLLM_ErrorLogs(LiteLLMBase):
|
||||||
endTime: Union[str, datetime, None]
|
endTime: Union[str, datetime, None]
|
||||||
|
|
||||||
|
|
||||||
|
class LiteLLM_AuditLogs(LiteLLMBase):
|
||||||
|
id: str
|
||||||
|
updated_at: datetime
|
||||||
|
changed_by: str
|
||||||
|
action: Literal["created", "updated", "deleted"]
|
||||||
|
table_name: Literal[
|
||||||
|
LitellmTableNames.TEAM_TABLE_NAME,
|
||||||
|
LitellmTableNames.USER_TABLE_NAME,
|
||||||
|
LitellmTableNames.KEY_TABLE_NAME,
|
||||||
|
LitellmTableNames.PROXY_MODEL_TABLE_NAME,
|
||||||
|
]
|
||||||
|
object_id: str
|
||||||
|
before_value: Optional[Json] = None
|
||||||
|
updated_values: Optional[Json] = None
|
||||||
|
|
||||||
|
|
||||||
class LiteLLM_SpendLogs_ResponseObject(LiteLLMBase):
|
class LiteLLM_SpendLogs_ResponseObject(LiteLLMBase):
|
||||||
response: Optional[List[Union[LiteLLM_SpendLogs, Any]]] = None
|
response: Optional[List[Union[LiteLLM_SpendLogs, Any]]] = None
|
||||||
|
|
||||||
|
|
|
@ -88,7 +88,7 @@ class _PROXY_AzureContentSafety(
|
||||||
verbose_proxy_logger.debug(
|
verbose_proxy_logger.debug(
|
||||||
"Error in Azure Content-Safety: %s", traceback.format_exc()
|
"Error in Azure Content-Safety: %s", traceback.format_exc()
|
||||||
)
|
)
|
||||||
traceback.print_exc()
|
verbose_proxy_logger.debug(traceback.format_exc())
|
||||||
raise
|
raise
|
||||||
|
|
||||||
result = self._compute_result(response)
|
result = self._compute_result(response)
|
||||||
|
@ -123,7 +123,12 @@ class _PROXY_AzureContentSafety(
|
||||||
except HTTPException as e:
|
except HTTPException as e:
|
||||||
raise e
|
raise e
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
traceback.print_exc()
|
verbose_proxy_logger.error(
|
||||||
|
"litellm.proxy.hooks.azure_content_safety.py::async_pre_call_hook(): Exception occured - {}".format(
|
||||||
|
str(e)
|
||||||
|
)
|
||||||
|
)
|
||||||
|
verbose_proxy_logger.debug(traceback.format_exc())
|
||||||
|
|
||||||
async def async_post_call_success_hook(
|
async def async_post_call_success_hook(
|
||||||
self,
|
self,
|
||||||
|
|
|
@ -94,7 +94,12 @@ class _PROXY_BatchRedisRequests(CustomLogger):
|
||||||
except HTTPException as e:
|
except HTTPException as e:
|
||||||
raise e
|
raise e
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
traceback.print_exc()
|
verbose_proxy_logger.error(
|
||||||
|
"litellm.proxy.hooks.batch_redis_get.py::async_pre_call_hook(): Exception occured - {}".format(
|
||||||
|
str(e)
|
||||||
|
)
|
||||||
|
)
|
||||||
|
verbose_proxy_logger.debug(traceback.format_exc())
|
||||||
|
|
||||||
async def async_get_cache(self, *args, **kwargs):
|
async def async_get_cache(self, *args, **kwargs):
|
||||||
"""
|
"""
|
||||||
|
|
|
@ -1,13 +1,13 @@
|
||||||
# What this does?
|
# What this does?
|
||||||
## Checks if key is allowed to use the cache controls passed in to the completion() call
|
## Checks if key is allowed to use the cache controls passed in to the completion() call
|
||||||
|
|
||||||
from typing import Optional
|
|
||||||
import litellm
|
import litellm
|
||||||
|
from litellm import verbose_logger
|
||||||
from litellm.caching import DualCache
|
from litellm.caching import DualCache
|
||||||
from litellm.proxy._types import UserAPIKeyAuth
|
from litellm.proxy._types import UserAPIKeyAuth
|
||||||
from litellm.integrations.custom_logger import CustomLogger
|
from litellm.integrations.custom_logger import CustomLogger
|
||||||
from fastapi import HTTPException
|
from fastapi import HTTPException
|
||||||
import json, traceback
|
import traceback
|
||||||
|
|
||||||
|
|
||||||
class _PROXY_CacheControlCheck(CustomLogger):
|
class _PROXY_CacheControlCheck(CustomLogger):
|
||||||
|
@ -54,4 +54,9 @@ class _PROXY_CacheControlCheck(CustomLogger):
|
||||||
except HTTPException as e:
|
except HTTPException as e:
|
||||||
raise e
|
raise e
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
traceback.print_exc()
|
verbose_logger.error(
|
||||||
|
"litellm.proxy.hooks.cache_control_check.py::async_pre_call_hook(): Exception occured - {}".format(
|
||||||
|
str(e)
|
||||||
|
)
|
||||||
|
)
|
||||||
|
verbose_logger.debug(traceback.format_exc())
|
||||||
|
|
|
@ -1,10 +1,10 @@
|
||||||
from typing import Optional
|
from litellm import verbose_logger
|
||||||
import litellm
|
import litellm
|
||||||
from litellm.caching import DualCache
|
from litellm.caching import DualCache
|
||||||
from litellm.proxy._types import UserAPIKeyAuth
|
from litellm.proxy._types import UserAPIKeyAuth
|
||||||
from litellm.integrations.custom_logger import CustomLogger
|
from litellm.integrations.custom_logger import CustomLogger
|
||||||
from fastapi import HTTPException
|
from fastapi import HTTPException
|
||||||
import json, traceback
|
import traceback
|
||||||
|
|
||||||
|
|
||||||
class _PROXY_MaxBudgetLimiter(CustomLogger):
|
class _PROXY_MaxBudgetLimiter(CustomLogger):
|
||||||
|
@ -44,4 +44,9 @@ class _PROXY_MaxBudgetLimiter(CustomLogger):
|
||||||
except HTTPException as e:
|
except HTTPException as e:
|
||||||
raise e
|
raise e
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
traceback.print_exc()
|
verbose_logger.error(
|
||||||
|
"litellm.proxy.hooks.max_budget_limiter.py::async_pre_call_hook(): Exception occured - {}".format(
|
||||||
|
str(e)
|
||||||
|
)
|
||||||
|
)
|
||||||
|
verbose_logger.debug(traceback.format_exc())
|
||||||
|
|
|
@ -8,8 +8,8 @@
|
||||||
# Tell us how we can improve! - Krrish & Ishaan
|
# Tell us how we can improve! - Krrish & Ishaan
|
||||||
|
|
||||||
|
|
||||||
from typing import Optional, Literal, Union
|
from typing import Optional, Union
|
||||||
import litellm, traceback, sys, uuid, json
|
import litellm, traceback, uuid, json # noqa: E401
|
||||||
from litellm.caching import DualCache
|
from litellm.caching import DualCache
|
||||||
from litellm.proxy._types import UserAPIKeyAuth
|
from litellm.proxy._types import UserAPIKeyAuth
|
||||||
from litellm.integrations.custom_logger import CustomLogger
|
from litellm.integrations.custom_logger import CustomLogger
|
||||||
|
@ -21,8 +21,8 @@ from litellm.utils import (
|
||||||
ImageResponse,
|
ImageResponse,
|
||||||
StreamingChoices,
|
StreamingChoices,
|
||||||
)
|
)
|
||||||
from datetime import datetime
|
import aiohttp
|
||||||
import aiohttp, asyncio
|
import asyncio
|
||||||
|
|
||||||
|
|
||||||
class _OPTIONAL_PresidioPIIMasking(CustomLogger):
|
class _OPTIONAL_PresidioPIIMasking(CustomLogger):
|
||||||
|
@ -138,7 +138,12 @@ class _OPTIONAL_PresidioPIIMasking(CustomLogger):
|
||||||
else:
|
else:
|
||||||
raise Exception(f"Invalid anonymizer response: {redacted_text}")
|
raise Exception(f"Invalid anonymizer response: {redacted_text}")
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
traceback.print_exc()
|
verbose_proxy_logger.error(
|
||||||
|
"litellm.proxy.hooks.presidio_pii_masking.py::async_pre_call_hook(): Exception occured - {}".format(
|
||||||
|
str(e)
|
||||||
|
)
|
||||||
|
)
|
||||||
|
verbose_proxy_logger.debug(traceback.format_exc())
|
||||||
raise e
|
raise e
|
||||||
|
|
||||||
async def async_pre_call_hook(
|
async def async_pre_call_hook(
|
||||||
|
|
|
@ -204,7 +204,12 @@ class _OPTIONAL_PromptInjectionDetection(CustomLogger):
|
||||||
return e.detail["error"]
|
return e.detail["error"]
|
||||||
raise e
|
raise e
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
traceback.print_exc()
|
verbose_proxy_logger.error(
|
||||||
|
"litellm.proxy.hooks.prompt_injection_detection.py::async_pre_call_hook(): Exception occured - {}".format(
|
||||||
|
str(e)
|
||||||
|
)
|
||||||
|
)
|
||||||
|
verbose_proxy_logger.debug(traceback.format_exc())
|
||||||
|
|
||||||
async def async_moderation_hook(
|
async def async_moderation_hook(
|
||||||
self,
|
self,
|
||||||
|
|
|
@ -24,3 +24,4 @@ general_settings:
|
||||||
|
|
||||||
litellm_settings:
|
litellm_settings:
|
||||||
callbacks: ["otel"]
|
callbacks: ["otel"]
|
||||||
|
store_audit_logs: true
|
|
@ -103,6 +103,7 @@ from litellm.proxy.utils import (
|
||||||
update_spend,
|
update_spend,
|
||||||
encrypt_value,
|
encrypt_value,
|
||||||
decrypt_value,
|
decrypt_value,
|
||||||
|
get_error_message_str,
|
||||||
)
|
)
|
||||||
from litellm import (
|
from litellm import (
|
||||||
CreateBatchRequest,
|
CreateBatchRequest,
|
||||||
|
@ -112,7 +113,10 @@ from litellm import (
|
||||||
CreateFileRequest,
|
CreateFileRequest,
|
||||||
)
|
)
|
||||||
from litellm.proxy.secret_managers.google_kms import load_google_kms
|
from litellm.proxy.secret_managers.google_kms import load_google_kms
|
||||||
from litellm.proxy.secret_managers.aws_secret_manager import load_aws_secret_manager
|
from litellm.proxy.secret_managers.aws_secret_manager import (
|
||||||
|
load_aws_secret_manager,
|
||||||
|
load_aws_kms,
|
||||||
|
)
|
||||||
import pydantic
|
import pydantic
|
||||||
from litellm.proxy._types import *
|
from litellm.proxy._types import *
|
||||||
from litellm.caching import DualCache, RedisCache
|
from litellm.caching import DualCache, RedisCache
|
||||||
|
@ -125,7 +129,10 @@ from litellm.router import (
|
||||||
AssistantsTypedDict,
|
AssistantsTypedDict,
|
||||||
)
|
)
|
||||||
from litellm.router import ModelInfo as RouterModelInfo
|
from litellm.router import ModelInfo as RouterModelInfo
|
||||||
from litellm._logging import verbose_router_logger, verbose_proxy_logger
|
from litellm._logging import (
|
||||||
|
verbose_router_logger,
|
||||||
|
verbose_proxy_logger,
|
||||||
|
)
|
||||||
from litellm.proxy.auth.handle_jwt import JWTHandler
|
from litellm.proxy.auth.handle_jwt import JWTHandler
|
||||||
from litellm.proxy.auth.litellm_license import LicenseCheck
|
from litellm.proxy.auth.litellm_license import LicenseCheck
|
||||||
from litellm.proxy.auth.model_checks import (
|
from litellm.proxy.auth.model_checks import (
|
||||||
|
@ -1471,7 +1478,12 @@ async def user_api_key_auth(
|
||||||
else:
|
else:
|
||||||
raise Exception()
|
raise Exception()
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
traceback.print_exc()
|
verbose_proxy_logger.error(
|
||||||
|
"litellm.proxy.proxy_server.user_api_key_auth(): Exception occured - {}".format(
|
||||||
|
str(e)
|
||||||
|
)
|
||||||
|
)
|
||||||
|
verbose_proxy_logger.debug(traceback.format_exc())
|
||||||
if isinstance(e, litellm.BudgetExceededError):
|
if isinstance(e, litellm.BudgetExceededError):
|
||||||
raise ProxyException(
|
raise ProxyException(
|
||||||
message=e.message, type="auth_error", param=None, code=400
|
message=e.message, type="auth_error", param=None, code=400
|
||||||
|
@ -2736,10 +2748,12 @@ class ProxyConfig:
|
||||||
load_google_kms(use_google_kms=True)
|
load_google_kms(use_google_kms=True)
|
||||||
elif (
|
elif (
|
||||||
key_management_system
|
key_management_system
|
||||||
== KeyManagementSystem.AWS_SECRET_MANAGER.value
|
== KeyManagementSystem.AWS_SECRET_MANAGER.value # noqa: F405
|
||||||
):
|
):
|
||||||
### LOAD FROM AWS SECRET MANAGER ###
|
### LOAD FROM AWS SECRET MANAGER ###
|
||||||
load_aws_secret_manager(use_aws_secret_manager=True)
|
load_aws_secret_manager(use_aws_secret_manager=True)
|
||||||
|
elif key_management_system == KeyManagementSystem.AWS_KMS.value:
|
||||||
|
load_aws_kms(use_aws_kms=True)
|
||||||
else:
|
else:
|
||||||
raise ValueError("Invalid Key Management System selected")
|
raise ValueError("Invalid Key Management System selected")
|
||||||
key_management_settings = general_settings.get(
|
key_management_settings = general_settings.get(
|
||||||
|
@ -2773,6 +2787,7 @@ class ProxyConfig:
|
||||||
master_key = general_settings.get(
|
master_key = general_settings.get(
|
||||||
"master_key", litellm.get_secret("LITELLM_MASTER_KEY", None)
|
"master_key", litellm.get_secret("LITELLM_MASTER_KEY", None)
|
||||||
)
|
)
|
||||||
|
|
||||||
if master_key and master_key.startswith("os.environ/"):
|
if master_key and master_key.startswith("os.environ/"):
|
||||||
master_key = litellm.get_secret(master_key)
|
master_key = litellm.get_secret(master_key)
|
||||||
if not isinstance(master_key, str):
|
if not isinstance(master_key, str):
|
||||||
|
@ -3476,7 +3491,12 @@ async def generate_key_helper_fn(
|
||||||
)
|
)
|
||||||
key_data["token_id"] = getattr(create_key_response, "token", None)
|
key_data["token_id"] = getattr(create_key_response, "token", None)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
traceback.print_exc()
|
verbose_proxy_logger.error(
|
||||||
|
"litellm.proxy.proxy_server.generate_key_helper_fn(): Exception occured - {}".format(
|
||||||
|
str(e)
|
||||||
|
)
|
||||||
|
)
|
||||||
|
verbose_proxy_logger.debug(traceback.format_exc())
|
||||||
if isinstance(e, HTTPException):
|
if isinstance(e, HTTPException):
|
||||||
raise e
|
raise e
|
||||||
raise HTTPException(
|
raise HTTPException(
|
||||||
|
@ -3515,7 +3535,12 @@ async def delete_verification_token(tokens: List, user_id: Optional[str] = None)
|
||||||
else:
|
else:
|
||||||
raise Exception("DB not connected. prisma_client is None")
|
raise Exception("DB not connected. prisma_client is None")
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
traceback.print_exc()
|
verbose_proxy_logger.error(
|
||||||
|
"litellm.proxy.proxy_server.delete_verification_token(): Exception occured - {}".format(
|
||||||
|
str(e)
|
||||||
|
)
|
||||||
|
)
|
||||||
|
verbose_proxy_logger.debug(traceback.format_exc())
|
||||||
raise e
|
raise e
|
||||||
return deleted_tokens
|
return deleted_tokens
|
||||||
|
|
||||||
|
@ -3676,7 +3701,12 @@ async def async_assistants_data_generator(
|
||||||
done_message = "[DONE]"
|
done_message = "[DONE]"
|
||||||
yield f"data: {done_message}\n\n"
|
yield f"data: {done_message}\n\n"
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
traceback.print_exc()
|
verbose_proxy_logger.error(
|
||||||
|
"litellm.proxy.proxy_server.async_assistants_data_generator(): Exception occured - {}".format(
|
||||||
|
str(e)
|
||||||
|
)
|
||||||
|
)
|
||||||
|
verbose_proxy_logger.debug(traceback.format_exc())
|
||||||
await proxy_logging_obj.post_call_failure_hook(
|
await proxy_logging_obj.post_call_failure_hook(
|
||||||
user_api_key_dict=user_api_key_dict,
|
user_api_key_dict=user_api_key_dict,
|
||||||
original_exception=e,
|
original_exception=e,
|
||||||
|
@ -3686,9 +3716,6 @@ async def async_assistants_data_generator(
|
||||||
f"\033[1;31mAn error occurred: {e}\n\n Debug this by setting `--debug`, e.g. `litellm --model gpt-3.5-turbo --debug`"
|
f"\033[1;31mAn error occurred: {e}\n\n Debug this by setting `--debug`, e.g. `litellm --model gpt-3.5-turbo --debug`"
|
||||||
)
|
)
|
||||||
router_model_names = llm_router.model_names if llm_router is not None else []
|
router_model_names = llm_router.model_names if llm_router is not None else []
|
||||||
if user_debug:
|
|
||||||
traceback.print_exc()
|
|
||||||
|
|
||||||
if isinstance(e, HTTPException):
|
if isinstance(e, HTTPException):
|
||||||
raise e
|
raise e
|
||||||
else:
|
else:
|
||||||
|
@ -3728,7 +3755,12 @@ async def async_data_generator(
|
||||||
done_message = "[DONE]"
|
done_message = "[DONE]"
|
||||||
yield f"data: {done_message}\n\n"
|
yield f"data: {done_message}\n\n"
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
traceback.print_exc()
|
verbose_proxy_logger.error(
|
||||||
|
"litellm.proxy.proxy_server.async_data_generator(): Exception occured - {}".format(
|
||||||
|
str(e)
|
||||||
|
)
|
||||||
|
)
|
||||||
|
verbose_proxy_logger.debug(traceback.format_exc())
|
||||||
await proxy_logging_obj.post_call_failure_hook(
|
await proxy_logging_obj.post_call_failure_hook(
|
||||||
user_api_key_dict=user_api_key_dict,
|
user_api_key_dict=user_api_key_dict,
|
||||||
original_exception=e,
|
original_exception=e,
|
||||||
|
@ -3738,8 +3770,6 @@ async def async_data_generator(
|
||||||
f"\033[1;31mAn error occurred: {e}\n\n Debug this by setting `--debug`, e.g. `litellm --model gpt-3.5-turbo --debug`"
|
f"\033[1;31mAn error occurred: {e}\n\n Debug this by setting `--debug`, e.g. `litellm --model gpt-3.5-turbo --debug`"
|
||||||
)
|
)
|
||||||
router_model_names = llm_router.model_names if llm_router is not None else []
|
router_model_names = llm_router.model_names if llm_router is not None else []
|
||||||
if user_debug:
|
|
||||||
traceback.print_exc()
|
|
||||||
|
|
||||||
if isinstance(e, HTTPException):
|
if isinstance(e, HTTPException):
|
||||||
raise e
|
raise e
|
||||||
|
@ -3800,6 +3830,18 @@ def on_backoff(details):
|
||||||
verbose_proxy_logger.debug("Backing off... this was attempt # %s", details["tries"])
|
verbose_proxy_logger.debug("Backing off... this was attempt # %s", details["tries"])
|
||||||
|
|
||||||
|
|
||||||
|
def giveup(e):
|
||||||
|
result = not (
|
||||||
|
isinstance(e, ProxyException)
|
||||||
|
and getattr(e, "message", None) is not None
|
||||||
|
and isinstance(e.message, str)
|
||||||
|
and "Max parallel request limit reached" in e.message
|
||||||
|
)
|
||||||
|
if result:
|
||||||
|
verbose_proxy_logger.info(json.dumps({"event": "giveup", "exception": str(e)}))
|
||||||
|
return result
|
||||||
|
|
||||||
|
|
||||||
@router.on_event("startup")
|
@router.on_event("startup")
|
||||||
async def startup_event():
|
async def startup_event():
|
||||||
global prisma_client, master_key, use_background_health_checks, llm_router, llm_model_list, general_settings, proxy_budget_rescheduler_min_time, proxy_budget_rescheduler_max_time, litellm_proxy_admin_name, db_writer_client, store_model_in_db
|
global prisma_client, master_key, use_background_health_checks, llm_router, llm_model_list, general_settings, proxy_budget_rescheduler_min_time, proxy_budget_rescheduler_max_time, litellm_proxy_admin_name, db_writer_client, store_model_in_db
|
||||||
|
@ -4084,12 +4126,8 @@ def model_list(
|
||||||
max_tries=litellm.num_retries or 3, # maximum number of retries
|
max_tries=litellm.num_retries or 3, # maximum number of retries
|
||||||
max_time=litellm.request_timeout or 60, # maximum total time to retry for
|
max_time=litellm.request_timeout or 60, # maximum total time to retry for
|
||||||
on_backoff=on_backoff, # specifying the function to call on backoff
|
on_backoff=on_backoff, # specifying the function to call on backoff
|
||||||
giveup=lambda e: not (
|
giveup=giveup,
|
||||||
isinstance(e, ProxyException)
|
logger=verbose_proxy_logger,
|
||||||
and getattr(e, "message", None) is not None
|
|
||||||
and isinstance(e.message, str)
|
|
||||||
and "Max parallel request limit reached" in e.message
|
|
||||||
), # the result of the logical expression is on the second position
|
|
||||||
)
|
)
|
||||||
async def chat_completion(
|
async def chat_completion(
|
||||||
request: Request,
|
request: Request,
|
||||||
|
@ -4098,6 +4136,7 @@ async def chat_completion(
|
||||||
user_api_key_dict: UserAPIKeyAuth = Depends(user_api_key_auth),
|
user_api_key_dict: UserAPIKeyAuth = Depends(user_api_key_auth),
|
||||||
):
|
):
|
||||||
global general_settings, user_debug, proxy_logging_obj, llm_model_list
|
global general_settings, user_debug, proxy_logging_obj, llm_model_list
|
||||||
|
|
||||||
data = {}
|
data = {}
|
||||||
try:
|
try:
|
||||||
body = await request.body()
|
body = await request.body()
|
||||||
|
@ -4386,7 +4425,12 @@ async def chat_completion(
|
||||||
return _chat_response
|
return _chat_response
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
data["litellm_status"] = "fail" # used for alerting
|
data["litellm_status"] = "fail" # used for alerting
|
||||||
traceback.print_exc()
|
verbose_proxy_logger.error(
|
||||||
|
"litellm.proxy.proxy_server.chat_completion(): Exception occured - {}".format(
|
||||||
|
get_error_message_str(e=e)
|
||||||
|
)
|
||||||
|
)
|
||||||
|
verbose_proxy_logger.debug(traceback.format_exc())
|
||||||
await proxy_logging_obj.post_call_failure_hook(
|
await proxy_logging_obj.post_call_failure_hook(
|
||||||
user_api_key_dict=user_api_key_dict, original_exception=e, request_data=data
|
user_api_key_dict=user_api_key_dict, original_exception=e, request_data=data
|
||||||
)
|
)
|
||||||
|
@ -4397,8 +4441,6 @@ async def chat_completion(
|
||||||
litellm_debug_info,
|
litellm_debug_info,
|
||||||
)
|
)
|
||||||
router_model_names = llm_router.model_names if llm_router is not None else []
|
router_model_names = llm_router.model_names if llm_router is not None else []
|
||||||
if user_debug:
|
|
||||||
traceback.print_exc()
|
|
||||||
|
|
||||||
if isinstance(e, HTTPException):
|
if isinstance(e, HTTPException):
|
||||||
raise ProxyException(
|
raise ProxyException(
|
||||||
|
@ -4630,15 +4672,12 @@ async def completion(
|
||||||
await proxy_logging_obj.post_call_failure_hook(
|
await proxy_logging_obj.post_call_failure_hook(
|
||||||
user_api_key_dict=user_api_key_dict, original_exception=e, request_data=data
|
user_api_key_dict=user_api_key_dict, original_exception=e, request_data=data
|
||||||
)
|
)
|
||||||
verbose_proxy_logger.debug("EXCEPTION RAISED IN PROXY MAIN.PY")
|
verbose_proxy_logger.error(
|
||||||
litellm_debug_info = getattr(e, "litellm_debug_info", "")
|
"litellm.proxy.proxy_server.completion(): Exception occured - {}".format(
|
||||||
verbose_proxy_logger.debug(
|
str(e)
|
||||||
"\033[1;31mAn error occurred: %s %s\n\n Debug this by setting `--debug`, e.g. `litellm --model gpt-3.5-turbo --debug`",
|
)
|
||||||
e,
|
|
||||||
litellm_debug_info,
|
|
||||||
)
|
)
|
||||||
traceback.print_exc()
|
verbose_proxy_logger.debug(traceback.format_exc())
|
||||||
error_traceback = traceback.format_exc()
|
|
||||||
error_msg = f"{str(e)}"
|
error_msg = f"{str(e)}"
|
||||||
raise ProxyException(
|
raise ProxyException(
|
||||||
message=getattr(e, "message", error_msg),
|
message=getattr(e, "message", error_msg),
|
||||||
|
@ -4848,7 +4887,12 @@ async def embeddings(
|
||||||
e,
|
e,
|
||||||
litellm_debug_info,
|
litellm_debug_info,
|
||||||
)
|
)
|
||||||
traceback.print_exc()
|
verbose_proxy_logger.error(
|
||||||
|
"litellm.proxy.proxy_server.embeddings(): Exception occured - {}".format(
|
||||||
|
str(e)
|
||||||
|
)
|
||||||
|
)
|
||||||
|
verbose_proxy_logger.debug(traceback.format_exc())
|
||||||
if isinstance(e, HTTPException):
|
if isinstance(e, HTTPException):
|
||||||
raise ProxyException(
|
raise ProxyException(
|
||||||
message=getattr(e, "message", str(e)),
|
message=getattr(e, "message", str(e)),
|
||||||
|
@ -5027,7 +5071,12 @@ async def image_generation(
|
||||||
await proxy_logging_obj.post_call_failure_hook(
|
await proxy_logging_obj.post_call_failure_hook(
|
||||||
user_api_key_dict=user_api_key_dict, original_exception=e, request_data=data
|
user_api_key_dict=user_api_key_dict, original_exception=e, request_data=data
|
||||||
)
|
)
|
||||||
traceback.print_exc()
|
verbose_proxy_logger.error(
|
||||||
|
"litellm.proxy.proxy_server.image_generation(): Exception occured - {}".format(
|
||||||
|
str(e)
|
||||||
|
)
|
||||||
|
)
|
||||||
|
verbose_proxy_logger.debug(traceback.format_exc())
|
||||||
if isinstance(e, HTTPException):
|
if isinstance(e, HTTPException):
|
||||||
raise ProxyException(
|
raise ProxyException(
|
||||||
message=getattr(e, "message", str(e)),
|
message=getattr(e, "message", str(e)),
|
||||||
|
@ -5205,7 +5254,12 @@ async def audio_speech(
|
||||||
)
|
)
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
traceback.print_exc()
|
verbose_proxy_logger.error(
|
||||||
|
"litellm.proxy.proxy_server.audio_speech(): Exception occured - {}".format(
|
||||||
|
str(e)
|
||||||
|
)
|
||||||
|
)
|
||||||
|
verbose_proxy_logger.debug(traceback.format_exc())
|
||||||
raise e
|
raise e
|
||||||
|
|
||||||
|
|
||||||
|
@ -5394,7 +5448,12 @@ async def audio_transcriptions(
|
||||||
await proxy_logging_obj.post_call_failure_hook(
|
await proxy_logging_obj.post_call_failure_hook(
|
||||||
user_api_key_dict=user_api_key_dict, original_exception=e, request_data=data
|
user_api_key_dict=user_api_key_dict, original_exception=e, request_data=data
|
||||||
)
|
)
|
||||||
traceback.print_exc()
|
verbose_proxy_logger.error(
|
||||||
|
"litellm.proxy.proxy_server.audio_transcription(): Exception occured - {}".format(
|
||||||
|
str(e)
|
||||||
|
)
|
||||||
|
)
|
||||||
|
verbose_proxy_logger.debug(traceback.format_exc())
|
||||||
if isinstance(e, HTTPException):
|
if isinstance(e, HTTPException):
|
||||||
raise ProxyException(
|
raise ProxyException(
|
||||||
message=getattr(e, "message", str(e.detail)),
|
message=getattr(e, "message", str(e.detail)),
|
||||||
|
@ -5403,7 +5462,6 @@ async def audio_transcriptions(
|
||||||
code=getattr(e, "status_code", status.HTTP_400_BAD_REQUEST),
|
code=getattr(e, "status_code", status.HTTP_400_BAD_REQUEST),
|
||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
error_traceback = traceback.format_exc()
|
|
||||||
error_msg = f"{str(e)}"
|
error_msg = f"{str(e)}"
|
||||||
raise ProxyException(
|
raise ProxyException(
|
||||||
message=getattr(e, "message", error_msg),
|
message=getattr(e, "message", error_msg),
|
||||||
|
@ -5531,7 +5589,12 @@ async def get_assistants(
|
||||||
await proxy_logging_obj.post_call_failure_hook(
|
await proxy_logging_obj.post_call_failure_hook(
|
||||||
user_api_key_dict=user_api_key_dict, original_exception=e, request_data=data
|
user_api_key_dict=user_api_key_dict, original_exception=e, request_data=data
|
||||||
)
|
)
|
||||||
traceback.print_exc()
|
verbose_proxy_logger.error(
|
||||||
|
"litellm.proxy.proxy_server.get_assistants(): Exception occured - {}".format(
|
||||||
|
str(e)
|
||||||
|
)
|
||||||
|
)
|
||||||
|
verbose_proxy_logger.debug(traceback.format_exc())
|
||||||
if isinstance(e, HTTPException):
|
if isinstance(e, HTTPException):
|
||||||
raise ProxyException(
|
raise ProxyException(
|
||||||
message=getattr(e, "message", str(e.detail)),
|
message=getattr(e, "message", str(e.detail)),
|
||||||
|
@ -5540,7 +5603,6 @@ async def get_assistants(
|
||||||
code=getattr(e, "status_code", status.HTTP_400_BAD_REQUEST),
|
code=getattr(e, "status_code", status.HTTP_400_BAD_REQUEST),
|
||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
error_traceback = traceback.format_exc()
|
|
||||||
error_msg = f"{str(e)}"
|
error_msg = f"{str(e)}"
|
||||||
raise ProxyException(
|
raise ProxyException(
|
||||||
message=getattr(e, "message", error_msg),
|
message=getattr(e, "message", error_msg),
|
||||||
|
@ -5660,7 +5722,12 @@ async def create_threads(
|
||||||
await proxy_logging_obj.post_call_failure_hook(
|
await proxy_logging_obj.post_call_failure_hook(
|
||||||
user_api_key_dict=user_api_key_dict, original_exception=e, request_data=data
|
user_api_key_dict=user_api_key_dict, original_exception=e, request_data=data
|
||||||
)
|
)
|
||||||
traceback.print_exc()
|
verbose_proxy_logger.error(
|
||||||
|
"litellm.proxy.proxy_server.create_threads(): Exception occured - {}".format(
|
||||||
|
str(e)
|
||||||
|
)
|
||||||
|
)
|
||||||
|
verbose_proxy_logger.debug(traceback.format_exc())
|
||||||
if isinstance(e, HTTPException):
|
if isinstance(e, HTTPException):
|
||||||
raise ProxyException(
|
raise ProxyException(
|
||||||
message=getattr(e, "message", str(e.detail)),
|
message=getattr(e, "message", str(e.detail)),
|
||||||
|
@ -5669,7 +5736,6 @@ async def create_threads(
|
||||||
code=getattr(e, "status_code", status.HTTP_400_BAD_REQUEST),
|
code=getattr(e, "status_code", status.HTTP_400_BAD_REQUEST),
|
||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
error_traceback = traceback.format_exc()
|
|
||||||
error_msg = f"{str(e)}"
|
error_msg = f"{str(e)}"
|
||||||
raise ProxyException(
|
raise ProxyException(
|
||||||
message=getattr(e, "message", error_msg),
|
message=getattr(e, "message", error_msg),
|
||||||
|
@ -5788,7 +5854,12 @@ async def get_thread(
|
||||||
await proxy_logging_obj.post_call_failure_hook(
|
await proxy_logging_obj.post_call_failure_hook(
|
||||||
user_api_key_dict=user_api_key_dict, original_exception=e, request_data=data
|
user_api_key_dict=user_api_key_dict, original_exception=e, request_data=data
|
||||||
)
|
)
|
||||||
traceback.print_exc()
|
verbose_proxy_logger.error(
|
||||||
|
"litellm.proxy.proxy_server.get_thread(): Exception occured - {}".format(
|
||||||
|
str(e)
|
||||||
|
)
|
||||||
|
)
|
||||||
|
verbose_proxy_logger.debug(traceback.format_exc())
|
||||||
if isinstance(e, HTTPException):
|
if isinstance(e, HTTPException):
|
||||||
raise ProxyException(
|
raise ProxyException(
|
||||||
message=getattr(e, "message", str(e.detail)),
|
message=getattr(e, "message", str(e.detail)),
|
||||||
|
@ -5797,7 +5868,6 @@ async def get_thread(
|
||||||
code=getattr(e, "status_code", status.HTTP_400_BAD_REQUEST),
|
code=getattr(e, "status_code", status.HTTP_400_BAD_REQUEST),
|
||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
error_traceback = traceback.format_exc()
|
|
||||||
error_msg = f"{str(e)}"
|
error_msg = f"{str(e)}"
|
||||||
raise ProxyException(
|
raise ProxyException(
|
||||||
message=getattr(e, "message", error_msg),
|
message=getattr(e, "message", error_msg),
|
||||||
|
@ -5919,7 +5989,12 @@ async def add_messages(
|
||||||
await proxy_logging_obj.post_call_failure_hook(
|
await proxy_logging_obj.post_call_failure_hook(
|
||||||
user_api_key_dict=user_api_key_dict, original_exception=e, request_data=data
|
user_api_key_dict=user_api_key_dict, original_exception=e, request_data=data
|
||||||
)
|
)
|
||||||
traceback.print_exc()
|
verbose_proxy_logger.error(
|
||||||
|
"litellm.proxy.proxy_server.add_messages(): Exception occured - {}".format(
|
||||||
|
str(e)
|
||||||
|
)
|
||||||
|
)
|
||||||
|
verbose_proxy_logger.debug(traceback.format_exc())
|
||||||
if isinstance(e, HTTPException):
|
if isinstance(e, HTTPException):
|
||||||
raise ProxyException(
|
raise ProxyException(
|
||||||
message=getattr(e, "message", str(e.detail)),
|
message=getattr(e, "message", str(e.detail)),
|
||||||
|
@ -5928,7 +6003,6 @@ async def add_messages(
|
||||||
code=getattr(e, "status_code", status.HTTP_400_BAD_REQUEST),
|
code=getattr(e, "status_code", status.HTTP_400_BAD_REQUEST),
|
||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
error_traceback = traceback.format_exc()
|
|
||||||
error_msg = f"{str(e)}"
|
error_msg = f"{str(e)}"
|
||||||
raise ProxyException(
|
raise ProxyException(
|
||||||
message=getattr(e, "message", error_msg),
|
message=getattr(e, "message", error_msg),
|
||||||
|
@ -6046,7 +6120,12 @@ async def get_messages(
|
||||||
await proxy_logging_obj.post_call_failure_hook(
|
await proxy_logging_obj.post_call_failure_hook(
|
||||||
user_api_key_dict=user_api_key_dict, original_exception=e, request_data=data
|
user_api_key_dict=user_api_key_dict, original_exception=e, request_data=data
|
||||||
)
|
)
|
||||||
traceback.print_exc()
|
verbose_proxy_logger.error(
|
||||||
|
"litellm.proxy.proxy_server.get_messages(): Exception occured - {}".format(
|
||||||
|
str(e)
|
||||||
|
)
|
||||||
|
)
|
||||||
|
verbose_proxy_logger.debug(traceback.format_exc())
|
||||||
if isinstance(e, HTTPException):
|
if isinstance(e, HTTPException):
|
||||||
raise ProxyException(
|
raise ProxyException(
|
||||||
message=getattr(e, "message", str(e.detail)),
|
message=getattr(e, "message", str(e.detail)),
|
||||||
|
@ -6055,7 +6134,6 @@ async def get_messages(
|
||||||
code=getattr(e, "status_code", status.HTTP_400_BAD_REQUEST),
|
code=getattr(e, "status_code", status.HTTP_400_BAD_REQUEST),
|
||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
error_traceback = traceback.format_exc()
|
|
||||||
error_msg = f"{str(e)}"
|
error_msg = f"{str(e)}"
|
||||||
raise ProxyException(
|
raise ProxyException(
|
||||||
message=getattr(e, "message", error_msg),
|
message=getattr(e, "message", error_msg),
|
||||||
|
@ -6187,7 +6265,12 @@ async def run_thread(
|
||||||
await proxy_logging_obj.post_call_failure_hook(
|
await proxy_logging_obj.post_call_failure_hook(
|
||||||
user_api_key_dict=user_api_key_dict, original_exception=e, request_data=data
|
user_api_key_dict=user_api_key_dict, original_exception=e, request_data=data
|
||||||
)
|
)
|
||||||
traceback.print_exc()
|
verbose_proxy_logger.error(
|
||||||
|
"litellm.proxy.proxy_server.run_thread(): Exception occured - {}".format(
|
||||||
|
str(e)
|
||||||
|
)
|
||||||
|
)
|
||||||
|
verbose_proxy_logger.debug(traceback.format_exc())
|
||||||
if isinstance(e, HTTPException):
|
if isinstance(e, HTTPException):
|
||||||
raise ProxyException(
|
raise ProxyException(
|
||||||
message=getattr(e, "message", str(e.detail)),
|
message=getattr(e, "message", str(e.detail)),
|
||||||
|
@ -6196,7 +6279,6 @@ async def run_thread(
|
||||||
code=getattr(e, "status_code", status.HTTP_400_BAD_REQUEST),
|
code=getattr(e, "status_code", status.HTTP_400_BAD_REQUEST),
|
||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
error_traceback = traceback.format_exc()
|
|
||||||
error_msg = f"{str(e)}"
|
error_msg = f"{str(e)}"
|
||||||
raise ProxyException(
|
raise ProxyException(
|
||||||
message=getattr(e, "message", error_msg),
|
message=getattr(e, "message", error_msg),
|
||||||
|
@ -6335,7 +6417,12 @@ async def create_batch(
|
||||||
await proxy_logging_obj.post_call_failure_hook(
|
await proxy_logging_obj.post_call_failure_hook(
|
||||||
user_api_key_dict=user_api_key_dict, original_exception=e, request_data=data
|
user_api_key_dict=user_api_key_dict, original_exception=e, request_data=data
|
||||||
)
|
)
|
||||||
traceback.print_exc()
|
verbose_proxy_logger.error(
|
||||||
|
"litellm.proxy.proxy_server.create_batch(): Exception occured - {}".format(
|
||||||
|
str(e)
|
||||||
|
)
|
||||||
|
)
|
||||||
|
verbose_proxy_logger.debug(traceback.format_exc())
|
||||||
if isinstance(e, HTTPException):
|
if isinstance(e, HTTPException):
|
||||||
raise ProxyException(
|
raise ProxyException(
|
||||||
message=getattr(e, "message", str(e.detail)),
|
message=getattr(e, "message", str(e.detail)),
|
||||||
|
@ -6344,7 +6431,6 @@ async def create_batch(
|
||||||
code=getattr(e, "status_code", status.HTTP_400_BAD_REQUEST),
|
code=getattr(e, "status_code", status.HTTP_400_BAD_REQUEST),
|
||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
error_traceback = traceback.format_exc()
|
|
||||||
error_msg = f"{str(e)}"
|
error_msg = f"{str(e)}"
|
||||||
raise ProxyException(
|
raise ProxyException(
|
||||||
message=getattr(e, "message", error_msg),
|
message=getattr(e, "message", error_msg),
|
||||||
|
@ -6478,7 +6564,12 @@ async def retrieve_batch(
|
||||||
await proxy_logging_obj.post_call_failure_hook(
|
await proxy_logging_obj.post_call_failure_hook(
|
||||||
user_api_key_dict=user_api_key_dict, original_exception=e, request_data=data
|
user_api_key_dict=user_api_key_dict, original_exception=e, request_data=data
|
||||||
)
|
)
|
||||||
traceback.print_exc()
|
verbose_proxy_logger.error(
|
||||||
|
"litellm.proxy.proxy_server.retrieve_batch(): Exception occured - {}".format(
|
||||||
|
str(e)
|
||||||
|
)
|
||||||
|
)
|
||||||
|
verbose_proxy_logger.debug(traceback.format_exc())
|
||||||
if isinstance(e, HTTPException):
|
if isinstance(e, HTTPException):
|
||||||
raise ProxyException(
|
raise ProxyException(
|
||||||
message=getattr(e, "message", str(e.detail)),
|
message=getattr(e, "message", str(e.detail)),
|
||||||
|
@ -6631,7 +6722,12 @@ async def create_file(
|
||||||
await proxy_logging_obj.post_call_failure_hook(
|
await proxy_logging_obj.post_call_failure_hook(
|
||||||
user_api_key_dict=user_api_key_dict, original_exception=e, request_data=data
|
user_api_key_dict=user_api_key_dict, original_exception=e, request_data=data
|
||||||
)
|
)
|
||||||
traceback.print_exc()
|
verbose_proxy_logger.error(
|
||||||
|
"litellm.proxy.proxy_server.create_file(): Exception occured - {}".format(
|
||||||
|
str(e)
|
||||||
|
)
|
||||||
|
)
|
||||||
|
verbose_proxy_logger.debug(traceback.format_exc())
|
||||||
if isinstance(e, HTTPException):
|
if isinstance(e, HTTPException):
|
||||||
raise ProxyException(
|
raise ProxyException(
|
||||||
message=getattr(e, "message", str(e.detail)),
|
message=getattr(e, "message", str(e.detail)),
|
||||||
|
@ -6640,7 +6736,6 @@ async def create_file(
|
||||||
code=getattr(e, "status_code", status.HTTP_400_BAD_REQUEST),
|
code=getattr(e, "status_code", status.HTTP_400_BAD_REQUEST),
|
||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
error_traceback = traceback.format_exc()
|
|
||||||
error_msg = f"{str(e)}"
|
error_msg = f"{str(e)}"
|
||||||
raise ProxyException(
|
raise ProxyException(
|
||||||
message=getattr(e, "message", error_msg),
|
message=getattr(e, "message", error_msg),
|
||||||
|
@ -6816,7 +6911,12 @@ async def moderations(
|
||||||
await proxy_logging_obj.post_call_failure_hook(
|
await proxy_logging_obj.post_call_failure_hook(
|
||||||
user_api_key_dict=user_api_key_dict, original_exception=e, request_data=data
|
user_api_key_dict=user_api_key_dict, original_exception=e, request_data=data
|
||||||
)
|
)
|
||||||
traceback.print_exc()
|
verbose_proxy_logger.error(
|
||||||
|
"litellm.proxy.proxy_server.moderations(): Exception occured - {}".format(
|
||||||
|
str(e)
|
||||||
|
)
|
||||||
|
)
|
||||||
|
verbose_proxy_logger.debug(traceback.format_exc())
|
||||||
if isinstance(e, HTTPException):
|
if isinstance(e, HTTPException):
|
||||||
raise ProxyException(
|
raise ProxyException(
|
||||||
message=getattr(e, "message", str(e)),
|
message=getattr(e, "message", str(e)),
|
||||||
|
@ -6825,7 +6925,6 @@ async def moderations(
|
||||||
code=getattr(e, "status_code", status.HTTP_400_BAD_REQUEST),
|
code=getattr(e, "status_code", status.HTTP_400_BAD_REQUEST),
|
||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
error_traceback = traceback.format_exc()
|
|
||||||
error_msg = f"{str(e)}"
|
error_msg = f"{str(e)}"
|
||||||
raise ProxyException(
|
raise ProxyException(
|
||||||
message=getattr(e, "message", error_msg),
|
message=getattr(e, "message", error_msg),
|
||||||
|
@ -7115,9 +7214,33 @@ async def generate_key_fn(
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
|
|
||||||
|
# Enterprise Feature - Audit Logging. Enable with litellm.store_audit_logs = True
|
||||||
|
if litellm.store_audit_logs is True:
|
||||||
|
_updated_values = json.dumps(response)
|
||||||
|
asyncio.create_task(
|
||||||
|
create_audit_log_for_update(
|
||||||
|
request_data=LiteLLM_AuditLogs(
|
||||||
|
id=str(uuid.uuid4()),
|
||||||
|
updated_at=datetime.now(timezone.utc),
|
||||||
|
changed_by=user_api_key_dict.user_id
|
||||||
|
or litellm_proxy_admin_name,
|
||||||
|
table_name=LitellmTableNames.KEY_TABLE_NAME,
|
||||||
|
object_id=response.get("token_id", ""),
|
||||||
|
action="created",
|
||||||
|
updated_values=_updated_values,
|
||||||
|
before_value=None,
|
||||||
|
)
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
return GenerateKeyResponse(**response)
|
return GenerateKeyResponse(**response)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
traceback.print_exc()
|
verbose_proxy_logger.error(
|
||||||
|
"litellm.proxy.proxy_server.generate_key_fn(): Exception occured - {}".format(
|
||||||
|
str(e)
|
||||||
|
)
|
||||||
|
)
|
||||||
|
verbose_proxy_logger.debug(traceback.format_exc())
|
||||||
if isinstance(e, HTTPException):
|
if isinstance(e, HTTPException):
|
||||||
raise ProxyException(
|
raise ProxyException(
|
||||||
message=getattr(e, "detail", f"Authentication Error({str(e)})"),
|
message=getattr(e, "detail", f"Authentication Error({str(e)})"),
|
||||||
|
@ -7138,7 +7261,11 @@ async def generate_key_fn(
|
||||||
@router.post(
|
@router.post(
|
||||||
"/key/update", tags=["key management"], dependencies=[Depends(user_api_key_auth)]
|
"/key/update", tags=["key management"], dependencies=[Depends(user_api_key_auth)]
|
||||||
)
|
)
|
||||||
async def update_key_fn(request: Request, data: UpdateKeyRequest):
|
async def update_key_fn(
|
||||||
|
request: Request,
|
||||||
|
data: UpdateKeyRequest,
|
||||||
|
user_api_key_dict: UserAPIKeyAuth = Depends(user_api_key_auth),
|
||||||
|
):
|
||||||
"""
|
"""
|
||||||
Update an existing key
|
Update an existing key
|
||||||
"""
|
"""
|
||||||
|
@ -7150,6 +7277,16 @@ async def update_key_fn(request: Request, data: UpdateKeyRequest):
|
||||||
if prisma_client is None:
|
if prisma_client is None:
|
||||||
raise Exception("Not connected to DB!")
|
raise Exception("Not connected to DB!")
|
||||||
|
|
||||||
|
existing_key_row = await prisma_client.get_data(
|
||||||
|
token=data.key, table_name="key", query_type="find_unique"
|
||||||
|
)
|
||||||
|
|
||||||
|
if existing_key_row is None:
|
||||||
|
raise HTTPException(
|
||||||
|
status_code=404,
|
||||||
|
detail={"error": f"Team not found, passed team_id={data.team_id}"},
|
||||||
|
)
|
||||||
|
|
||||||
# get non default values for key
|
# get non default values for key
|
||||||
non_default_values = {}
|
non_default_values = {}
|
||||||
for k, v in data_json.items():
|
for k, v in data_json.items():
|
||||||
|
@ -7176,6 +7313,29 @@ async def update_key_fn(request: Request, data: UpdateKeyRequest):
|
||||||
hashed_token = hash_token(key)
|
hashed_token = hash_token(key)
|
||||||
user_api_key_cache.delete_cache(hashed_token)
|
user_api_key_cache.delete_cache(hashed_token)
|
||||||
|
|
||||||
|
# Enterprise Feature - Audit Logging. Enable with litellm.store_audit_logs = True
|
||||||
|
if litellm.store_audit_logs is True:
|
||||||
|
_updated_values = json.dumps(data_json)
|
||||||
|
|
||||||
|
_before_value = existing_key_row.json(exclude_none=True)
|
||||||
|
_before_value = json.dumps(_before_value)
|
||||||
|
|
||||||
|
asyncio.create_task(
|
||||||
|
create_audit_log_for_update(
|
||||||
|
request_data=LiteLLM_AuditLogs(
|
||||||
|
id=str(uuid.uuid4()),
|
||||||
|
updated_at=datetime.now(timezone.utc),
|
||||||
|
changed_by=user_api_key_dict.user_id
|
||||||
|
or litellm_proxy_admin_name,
|
||||||
|
table_name=LitellmTableNames.KEY_TABLE_NAME,
|
||||||
|
object_id=data.key,
|
||||||
|
action="updated",
|
||||||
|
updated_values=_updated_values,
|
||||||
|
before_value=_before_value,
|
||||||
|
)
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
return {"key": key, **response["data"]}
|
return {"key": key, **response["data"]}
|
||||||
# update based on remaining passed in values
|
# update based on remaining passed in values
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
|
@ -7238,6 +7398,34 @@ async def delete_key_fn(
|
||||||
):
|
):
|
||||||
user_id = None # unless they're admin
|
user_id = None # unless they're admin
|
||||||
|
|
||||||
|
# Enterprise Feature - Audit Logging. Enable with litellm.store_audit_logs = True
|
||||||
|
# we do this after the first for loop, since first for loop is for validation. we only want this inserted after validation passes
|
||||||
|
if litellm.store_audit_logs is True:
|
||||||
|
# make an audit log for each team deleted
|
||||||
|
for key in data.keys:
|
||||||
|
key_row = await prisma_client.get_data( # type: ignore
|
||||||
|
token=key, table_name="key", query_type="find_unique"
|
||||||
|
)
|
||||||
|
|
||||||
|
key_row = key_row.json(exclude_none=True)
|
||||||
|
_key_row = json.dumps(key_row)
|
||||||
|
|
||||||
|
asyncio.create_task(
|
||||||
|
create_audit_log_for_update(
|
||||||
|
request_data=LiteLLM_AuditLogs(
|
||||||
|
id=str(uuid.uuid4()),
|
||||||
|
updated_at=datetime.now(timezone.utc),
|
||||||
|
changed_by=user_api_key_dict.user_id
|
||||||
|
or litellm_proxy_admin_name,
|
||||||
|
table_name=LitellmTableNames.KEY_TABLE_NAME,
|
||||||
|
object_id=key,
|
||||||
|
action="deleted",
|
||||||
|
updated_values="{}",
|
||||||
|
before_value=_key_row,
|
||||||
|
)
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
number_deleted_keys = await delete_verification_token(
|
number_deleted_keys = await delete_verification_token(
|
||||||
tokens=keys, user_id=user_id
|
tokens=keys, user_id=user_id
|
||||||
)
|
)
|
||||||
|
@ -9507,7 +9695,12 @@ async def user_info(
|
||||||
}
|
}
|
||||||
return response_data
|
return response_data
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
traceback.print_exc()
|
verbose_proxy_logger.error(
|
||||||
|
"litellm.proxy.proxy_server.user_info(): Exception occured - {}".format(
|
||||||
|
str(e)
|
||||||
|
)
|
||||||
|
)
|
||||||
|
verbose_proxy_logger.debug(traceback.format_exc())
|
||||||
if isinstance(e, HTTPException):
|
if isinstance(e, HTTPException):
|
||||||
raise ProxyException(
|
raise ProxyException(
|
||||||
message=getattr(e, "detail", f"Authentication Error({str(e)})"),
|
message=getattr(e, "detail", f"Authentication Error({str(e)})"),
|
||||||
|
@ -9602,7 +9795,12 @@ async def user_update(data: UpdateUserRequest):
|
||||||
return response
|
return response
|
||||||
# update based on remaining passed in values
|
# update based on remaining passed in values
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
traceback.print_exc()
|
verbose_proxy_logger.error(
|
||||||
|
"litellm.proxy.proxy_server.user_update(): Exception occured - {}".format(
|
||||||
|
str(e)
|
||||||
|
)
|
||||||
|
)
|
||||||
|
verbose_proxy_logger.debug(traceback.format_exc())
|
||||||
if isinstance(e, HTTPException):
|
if isinstance(e, HTTPException):
|
||||||
raise ProxyException(
|
raise ProxyException(
|
||||||
message=getattr(e, "detail", f"Authentication Error({str(e)})"),
|
message=getattr(e, "detail", f"Authentication Error({str(e)})"),
|
||||||
|
@ -9655,7 +9853,12 @@ async def user_request_model(request: Request):
|
||||||
return {"status": "success"}
|
return {"status": "success"}
|
||||||
# update based on remaining passed in values
|
# update based on remaining passed in values
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
traceback.print_exc()
|
verbose_proxy_logger.error(
|
||||||
|
"litellm.proxy.proxy_server.user_request_model(): Exception occured - {}".format(
|
||||||
|
str(e)
|
||||||
|
)
|
||||||
|
)
|
||||||
|
verbose_proxy_logger.debug(traceback.format_exc())
|
||||||
if isinstance(e, HTTPException):
|
if isinstance(e, HTTPException):
|
||||||
raise ProxyException(
|
raise ProxyException(
|
||||||
message=getattr(e, "detail", f"Authentication Error({str(e)})"),
|
message=getattr(e, "detail", f"Authentication Error({str(e)})"),
|
||||||
|
@ -9697,7 +9900,12 @@ async def user_get_requests():
|
||||||
return {"requests": response}
|
return {"requests": response}
|
||||||
# update based on remaining passed in values
|
# update based on remaining passed in values
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
traceback.print_exc()
|
verbose_proxy_logger.error(
|
||||||
|
"litellm.proxy.proxy_server.user_get_requests(): Exception occured - {}".format(
|
||||||
|
str(e)
|
||||||
|
)
|
||||||
|
)
|
||||||
|
verbose_proxy_logger.debug(traceback.format_exc())
|
||||||
if isinstance(e, HTTPException):
|
if isinstance(e, HTTPException):
|
||||||
raise ProxyException(
|
raise ProxyException(
|
||||||
message=getattr(e, "detail", f"Authentication Error({str(e)})"),
|
message=getattr(e, "detail", f"Authentication Error({str(e)})"),
|
||||||
|
@ -10087,7 +10295,12 @@ async def update_end_user(
|
||||||
|
|
||||||
# update based on remaining passed in values
|
# update based on remaining passed in values
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
traceback.print_exc()
|
verbose_proxy_logger.error(
|
||||||
|
"litellm.proxy.proxy_server.update_end_user(): Exception occured - {}".format(
|
||||||
|
str(e)
|
||||||
|
)
|
||||||
|
)
|
||||||
|
verbose_proxy_logger.debug(traceback.format_exc())
|
||||||
if isinstance(e, HTTPException):
|
if isinstance(e, HTTPException):
|
||||||
raise ProxyException(
|
raise ProxyException(
|
||||||
message=getattr(e, "detail", f"Internal Server Error({str(e)})"),
|
message=getattr(e, "detail", f"Internal Server Error({str(e)})"),
|
||||||
|
@ -10171,7 +10384,12 @@ async def delete_end_user(
|
||||||
|
|
||||||
# update based on remaining passed in values
|
# update based on remaining passed in values
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
traceback.print_exc()
|
verbose_proxy_logger.error(
|
||||||
|
"litellm.proxy.proxy_server.delete_end_user(): Exception occured - {}".format(
|
||||||
|
str(e)
|
||||||
|
)
|
||||||
|
)
|
||||||
|
verbose_proxy_logger.debug(traceback.format_exc())
|
||||||
if isinstance(e, HTTPException):
|
if isinstance(e, HTTPException):
|
||||||
raise ProxyException(
|
raise ProxyException(
|
||||||
message=getattr(e, "detail", f"Internal Server Error({str(e)})"),
|
message=getattr(e, "detail", f"Internal Server Error({str(e)})"),
|
||||||
|
@ -10365,12 +10583,65 @@ async def new_team(
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
)
|
)
|
||||||
|
|
||||||
|
# Enterprise Feature - Audit Logging. Enable with litellm.store_audit_logs = True
|
||||||
|
if litellm.store_audit_logs is True:
|
||||||
|
_updated_values = complete_team_data.json(exclude_none=True)
|
||||||
|
_updated_values = json.dumps(_updated_values)
|
||||||
|
|
||||||
|
asyncio.create_task(
|
||||||
|
create_audit_log_for_update(
|
||||||
|
request_data=LiteLLM_AuditLogs(
|
||||||
|
id=str(uuid.uuid4()),
|
||||||
|
updated_at=datetime.now(timezone.utc),
|
||||||
|
changed_by=user_api_key_dict.user_id or litellm_proxy_admin_name,
|
||||||
|
table_name=LitellmTableNames.TEAM_TABLE_NAME,
|
||||||
|
object_id=data.team_id,
|
||||||
|
action="created",
|
||||||
|
updated_values=_updated_values,
|
||||||
|
before_value=None,
|
||||||
|
)
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
return team_row.model_dump()
|
return team_row.model_dump()
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
return team_row.dict()
|
return team_row.dict()
|
||||||
|
|
||||||
|
|
||||||
|
async def create_audit_log_for_update(request_data: LiteLLM_AuditLogs):
|
||||||
|
if premium_user is not True:
|
||||||
|
return
|
||||||
|
|
||||||
|
if litellm.store_audit_logs is not True:
|
||||||
|
return
|
||||||
|
if prisma_client is None:
|
||||||
|
raise Exception("prisma_client is None, no DB connected")
|
||||||
|
|
||||||
|
verbose_proxy_logger.debug("creating audit log for %s", request_data)
|
||||||
|
|
||||||
|
if isinstance(request_data.updated_values, dict):
|
||||||
|
request_data.updated_values = json.dumps(request_data.updated_values)
|
||||||
|
|
||||||
|
if isinstance(request_data.before_value, dict):
|
||||||
|
request_data.before_value = json.dumps(request_data.before_value)
|
||||||
|
|
||||||
|
_request_data = request_data.dict(exclude_none=True)
|
||||||
|
|
||||||
|
try:
|
||||||
|
await prisma_client.db.litellm_auditlog.create(
|
||||||
|
data={
|
||||||
|
**_request_data, # type: ignore
|
||||||
|
}
|
||||||
|
)
|
||||||
|
except Exception as e:
|
||||||
|
# [Non-Blocking Exception. Do not allow blocking LLM API call]
|
||||||
|
verbose_proxy_logger.error(f"Failed Creating audit log {e}")
|
||||||
|
|
||||||
|
return
|
||||||
|
|
||||||
|
|
||||||
@router.post(
|
@router.post(
|
||||||
"/team/update", tags=["team management"], dependencies=[Depends(user_api_key_auth)]
|
"/team/update", tags=["team management"], dependencies=[Depends(user_api_key_auth)]
|
||||||
)
|
)
|
||||||
|
@ -10443,6 +10714,27 @@ async def update_team(
|
||||||
team_id=data.team_id,
|
team_id=data.team_id,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
# Enterprise Feature - Audit Logging. Enable with litellm.store_audit_logs = True
|
||||||
|
if litellm.store_audit_logs is True:
|
||||||
|
_before_value = existing_team_row.json(exclude_none=True)
|
||||||
|
_before_value = json.dumps(_before_value)
|
||||||
|
_after_value: str = json.dumps(updated_kv)
|
||||||
|
|
||||||
|
asyncio.create_task(
|
||||||
|
create_audit_log_for_update(
|
||||||
|
request_data=LiteLLM_AuditLogs(
|
||||||
|
id=str(uuid.uuid4()),
|
||||||
|
updated_at=datetime.now(timezone.utc),
|
||||||
|
changed_by=user_api_key_dict.user_id or litellm_proxy_admin_name,
|
||||||
|
table_name=LitellmTableNames.TEAM_TABLE_NAME,
|
||||||
|
object_id=data.team_id,
|
||||||
|
action="updated",
|
||||||
|
updated_values=_after_value,
|
||||||
|
before_value=_before_value,
|
||||||
|
)
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
return team_row
|
return team_row
|
||||||
|
|
||||||
|
|
||||||
|
@ -10714,6 +11006,35 @@ async def delete_team(
|
||||||
detail={"error": f"Team not found, passed team_id={team_id}"},
|
detail={"error": f"Team not found, passed team_id={team_id}"},
|
||||||
)
|
)
|
||||||
|
|
||||||
|
# Enterprise Feature - Audit Logging. Enable with litellm.store_audit_logs = True
|
||||||
|
# we do this after the first for loop, since first for loop is for validation. we only want this inserted after validation passes
|
||||||
|
if litellm.store_audit_logs is True:
|
||||||
|
# make an audit log for each team deleted
|
||||||
|
for team_id in data.team_ids:
|
||||||
|
team_row = await prisma_client.get_data( # type: ignore
|
||||||
|
team_id=team_id, table_name="team", query_type="find_unique"
|
||||||
|
)
|
||||||
|
|
||||||
|
_team_row = team_row.json(exclude_none=True)
|
||||||
|
|
||||||
|
asyncio.create_task(
|
||||||
|
create_audit_log_for_update(
|
||||||
|
request_data=LiteLLM_AuditLogs(
|
||||||
|
id=str(uuid.uuid4()),
|
||||||
|
updated_at=datetime.now(timezone.utc),
|
||||||
|
changed_by=user_api_key_dict.user_id
|
||||||
|
or litellm_proxy_admin_name,
|
||||||
|
table_name=LitellmTableNames.TEAM_TABLE_NAME,
|
||||||
|
object_id=team_id,
|
||||||
|
action="deleted",
|
||||||
|
updated_values="{}",
|
||||||
|
before_value=_team_row,
|
||||||
|
)
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
# End of Audit logging
|
||||||
|
|
||||||
## DELETE ASSOCIATED KEYS
|
## DELETE ASSOCIATED KEYS
|
||||||
await prisma_client.delete_data(team_id_list=data.team_ids, table_name="key")
|
await prisma_client.delete_data(team_id_list=data.team_ids, table_name="key")
|
||||||
## DELETE TEAMS
|
## DELETE TEAMS
|
||||||
|
@ -11371,7 +11692,12 @@ async def add_new_model(
|
||||||
return model_response
|
return model_response
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
traceback.print_exc()
|
verbose_proxy_logger.error(
|
||||||
|
"litellm.proxy.proxy_server.add_new_model(): Exception occured - {}".format(
|
||||||
|
str(e)
|
||||||
|
)
|
||||||
|
)
|
||||||
|
verbose_proxy_logger.debug(traceback.format_exc())
|
||||||
if isinstance(e, HTTPException):
|
if isinstance(e, HTTPException):
|
||||||
raise ProxyException(
|
raise ProxyException(
|
||||||
message=getattr(e, "detail", f"Authentication Error({str(e)})"),
|
message=getattr(e, "detail", f"Authentication Error({str(e)})"),
|
||||||
|
@ -11485,7 +11811,12 @@ async def update_model(
|
||||||
|
|
||||||
return model_response
|
return model_response
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
traceback.print_exc()
|
verbose_proxy_logger.error(
|
||||||
|
"litellm.proxy.proxy_server.update_model(): Exception occured - {}".format(
|
||||||
|
str(e)
|
||||||
|
)
|
||||||
|
)
|
||||||
|
verbose_proxy_logger.debug(traceback.format_exc())
|
||||||
if isinstance(e, HTTPException):
|
if isinstance(e, HTTPException):
|
||||||
raise ProxyException(
|
raise ProxyException(
|
||||||
message=getattr(e, "detail", f"Authentication Error({str(e)})"),
|
message=getattr(e, "detail", f"Authentication Error({str(e)})"),
|
||||||
|
@ -13719,7 +14050,12 @@ async def update_config(config_info: ConfigYAML):
|
||||||
|
|
||||||
return {"message": "Config updated successfully"}
|
return {"message": "Config updated successfully"}
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
traceback.print_exc()
|
verbose_proxy_logger.error(
|
||||||
|
"litellm.proxy.proxy_server.update_config(): Exception occured - {}".format(
|
||||||
|
str(e)
|
||||||
|
)
|
||||||
|
)
|
||||||
|
verbose_proxy_logger.debug(traceback.format_exc())
|
||||||
if isinstance(e, HTTPException):
|
if isinstance(e, HTTPException):
|
||||||
raise ProxyException(
|
raise ProxyException(
|
||||||
message=getattr(e, "detail", f"Authentication Error({str(e)})"),
|
message=getattr(e, "detail", f"Authentication Error({str(e)})"),
|
||||||
|
@ -14192,7 +14528,12 @@ async def get_config():
|
||||||
"available_callbacks": all_available_callbacks,
|
"available_callbacks": all_available_callbacks,
|
||||||
}
|
}
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
traceback.print_exc()
|
verbose_proxy_logger.error(
|
||||||
|
"litellm.proxy.proxy_server.get_config(): Exception occured - {}".format(
|
||||||
|
str(e)
|
||||||
|
)
|
||||||
|
)
|
||||||
|
verbose_proxy_logger.debug(traceback.format_exc())
|
||||||
if isinstance(e, HTTPException):
|
if isinstance(e, HTTPException):
|
||||||
raise ProxyException(
|
raise ProxyException(
|
||||||
message=getattr(e, "detail", f"Authentication Error({str(e)})"),
|
message=getattr(e, "detail", f"Authentication Error({str(e)})"),
|
||||||
|
@ -14443,7 +14784,12 @@ async def health_services_endpoint(
|
||||||
}
|
}
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
traceback.print_exc()
|
verbose_proxy_logger.error(
|
||||||
|
"litellm.proxy.proxy_server.health_services_endpoint(): Exception occured - {}".format(
|
||||||
|
str(e)
|
||||||
|
)
|
||||||
|
)
|
||||||
|
verbose_proxy_logger.debug(traceback.format_exc())
|
||||||
if isinstance(e, HTTPException):
|
if isinstance(e, HTTPException):
|
||||||
raise ProxyException(
|
raise ProxyException(
|
||||||
message=getattr(e, "detail", f"Authentication Error({str(e)})"),
|
message=getattr(e, "detail", f"Authentication Error({str(e)})"),
|
||||||
|
@ -14522,7 +14868,12 @@ async def health_endpoint(
|
||||||
"unhealthy_count": len(unhealthy_endpoints),
|
"unhealthy_count": len(unhealthy_endpoints),
|
||||||
}
|
}
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
traceback.print_exc()
|
verbose_proxy_logger.error(
|
||||||
|
"litellm.proxy.proxy_server.py::health_endpoint(): Exception occured - {}".format(
|
||||||
|
str(e)
|
||||||
|
)
|
||||||
|
)
|
||||||
|
verbose_proxy_logger.debug(traceback.format_exc())
|
||||||
raise e
|
raise e
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -244,3 +244,15 @@ model LiteLLM_InvitationLink {
|
||||||
liteLLM_user_table_created LiteLLM_UserTable @relation("CreatedBy", fields: [created_by], references: [user_id])
|
liteLLM_user_table_created LiteLLM_UserTable @relation("CreatedBy", fields: [created_by], references: [user_id])
|
||||||
liteLLM_user_table_updated LiteLLM_UserTable @relation("UpdatedBy", fields: [updated_by], references: [user_id])
|
liteLLM_user_table_updated LiteLLM_UserTable @relation("UpdatedBy", fields: [updated_by], references: [user_id])
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
model LiteLLM_AuditLog {
|
||||||
|
id String @id @default(uuid())
|
||||||
|
updated_at DateTime @default(now())
|
||||||
|
changed_by String // user or system that performed the action
|
||||||
|
action String // create, update, delete
|
||||||
|
table_name String // on of LitellmTableNames.TEAM_TABLE_NAME, LitellmTableNames.USER_TABLE_NAME, LitellmTableNames.PROXY_MODEL_TABLE_NAME,
|
||||||
|
object_id String // id of the object being audited. This can be the key id, team id, user id, model id
|
||||||
|
before_value Json? // value of the row
|
||||||
|
updated_values Json? // value of the row after change
|
||||||
|
}
|
|
@ -8,7 +8,8 @@ Requires:
|
||||||
* `pip install boto3>=1.28.57`
|
* `pip install boto3>=1.28.57`
|
||||||
"""
|
"""
|
||||||
|
|
||||||
import litellm, os
|
import litellm
|
||||||
|
import os
|
||||||
from typing import Optional
|
from typing import Optional
|
||||||
from litellm.proxy._types import KeyManagementSystem
|
from litellm.proxy._types import KeyManagementSystem
|
||||||
|
|
||||||
|
@ -38,3 +39,21 @@ def load_aws_secret_manager(use_aws_secret_manager: Optional[bool]):
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
raise e
|
raise e
|
||||||
|
|
||||||
|
|
||||||
|
def load_aws_kms(use_aws_kms: Optional[bool]):
|
||||||
|
if use_aws_kms is None or use_aws_kms is False:
|
||||||
|
return
|
||||||
|
try:
|
||||||
|
import boto3
|
||||||
|
|
||||||
|
validate_environment()
|
||||||
|
|
||||||
|
# Create a Secrets Manager client
|
||||||
|
kms_client = boto3.client("kms", region_name=os.getenv("AWS_REGION_NAME"))
|
||||||
|
|
||||||
|
litellm.secret_manager_client = kms_client
|
||||||
|
litellm._key_management_system = KeyManagementSystem.AWS_KMS
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
raise e
|
||||||
|
|
|
@ -2709,13 +2709,15 @@ def decrypt_value(value: bytes, master_key: str) -> str:
|
||||||
|
|
||||||
|
|
||||||
# LiteLLM Admin UI - Non SSO Login
|
# LiteLLM Admin UI - Non SSO Login
|
||||||
html_form = """
|
url_to_redirect_to = os.getenv("PROXY_BASE_URL", "")
|
||||||
|
url_to_redirect_to += "/login"
|
||||||
|
html_form = f"""
|
||||||
<!DOCTYPE html>
|
<!DOCTYPE html>
|
||||||
<html>
|
<html>
|
||||||
<head>
|
<head>
|
||||||
<title>LiteLLM Login</title>
|
<title>LiteLLM Login</title>
|
||||||
<style>
|
<style>
|
||||||
body {
|
body {{
|
||||||
font-family: Arial, sans-serif;
|
font-family: Arial, sans-serif;
|
||||||
background-color: #f4f4f4;
|
background-color: #f4f4f4;
|
||||||
margin: 0;
|
margin: 0;
|
||||||
|
@ -2724,42 +2726,42 @@ html_form = """
|
||||||
justify-content: center;
|
justify-content: center;
|
||||||
align-items: center;
|
align-items: center;
|
||||||
height: 100vh;
|
height: 100vh;
|
||||||
}
|
}}
|
||||||
|
|
||||||
form {
|
form {{
|
||||||
background-color: #fff;
|
background-color: #fff;
|
||||||
padding: 20px;
|
padding: 20px;
|
||||||
border-radius: 8px;
|
border-radius: 8px;
|
||||||
box-shadow: 0 0 10px rgba(0, 0, 0, 0.1);
|
box-shadow: 0 0 10px rgba(0, 0, 0, 0.1);
|
||||||
}
|
}}
|
||||||
|
|
||||||
label {
|
label {{
|
||||||
display: block;
|
display: block;
|
||||||
margin-bottom: 8px;
|
margin-bottom: 8px;
|
||||||
}
|
}}
|
||||||
|
|
||||||
input {
|
input {{
|
||||||
width: 100%;
|
width: 100%;
|
||||||
padding: 8px;
|
padding: 8px;
|
||||||
margin-bottom: 16px;
|
margin-bottom: 16px;
|
||||||
box-sizing: border-box;
|
box-sizing: border-box;
|
||||||
border: 1px solid #ccc;
|
border: 1px solid #ccc;
|
||||||
border-radius: 4px;
|
border-radius: 4px;
|
||||||
}
|
}}
|
||||||
|
|
||||||
input[type="submit"] {
|
input[type="submit"] {{
|
||||||
background-color: #4caf50;
|
background-color: #4caf50;
|
||||||
color: #fff;
|
color: #fff;
|
||||||
cursor: pointer;
|
cursor: pointer;
|
||||||
}
|
}}
|
||||||
|
|
||||||
input[type="submit"]:hover {
|
input[type="submit"]:hover {{
|
||||||
background-color: #45a049;
|
background-color: #45a049;
|
||||||
}
|
}}
|
||||||
</style>
|
</style>
|
||||||
</head>
|
</head>
|
||||||
<body>
|
<body>
|
||||||
<form action="/login" method="post">
|
<form action="{url_to_redirect_to}" method="post">
|
||||||
<h2>LiteLLM Login</h2>
|
<h2>LiteLLM Login</h2>
|
||||||
|
|
||||||
<p>By default Username is "admin" and Password is your set LiteLLM Proxy `MASTER_KEY`</p>
|
<p>By default Username is "admin" and Password is your set LiteLLM Proxy `MASTER_KEY`</p>
|
||||||
|
@ -2771,8 +2773,6 @@ html_form = """
|
||||||
<input type="password" id="password" name="password" required>
|
<input type="password" id="password" name="password" required>
|
||||||
<input type="submit" value="Submit">
|
<input type="submit" value="Submit">
|
||||||
</form>
|
</form>
|
||||||
</body>
|
|
||||||
</html>
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
|
||||||
|
@ -2837,3 +2837,17 @@ missing_keys_html_form = """
|
||||||
</body>
|
</body>
|
||||||
</html>
|
</html>
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
|
||||||
|
def get_error_message_str(e: Exception) -> str:
|
||||||
|
error_message = ""
|
||||||
|
if isinstance(e, HTTPException):
|
||||||
|
if isinstance(e.detail, str):
|
||||||
|
error_message = e.detail
|
||||||
|
elif isinstance(e.detail, dict):
|
||||||
|
error_message = json.dumps(e.detail)
|
||||||
|
else:
|
||||||
|
error_message = str(e)
|
||||||
|
else:
|
||||||
|
error_message = str(e)
|
||||||
|
return error_message
|
||||||
|
|
|
@ -220,8 +220,6 @@ class Router:
|
||||||
[]
|
[]
|
||||||
) # names of models under litellm_params. ex. azure/chatgpt-v-2
|
) # names of models under litellm_params. ex. azure/chatgpt-v-2
|
||||||
self.deployment_latency_map = {}
|
self.deployment_latency_map = {}
|
||||||
### SCHEDULER ###
|
|
||||||
self.scheduler = Scheduler(polling_interval=polling_interval)
|
|
||||||
### CACHING ###
|
### CACHING ###
|
||||||
cache_type: Literal["local", "redis"] = "local" # default to an in-memory cache
|
cache_type: Literal["local", "redis"] = "local" # default to an in-memory cache
|
||||||
redis_cache = None
|
redis_cache = None
|
||||||
|
@ -259,6 +257,10 @@ class Router:
|
||||||
redis_cache=redis_cache, in_memory_cache=InMemoryCache()
|
redis_cache=redis_cache, in_memory_cache=InMemoryCache()
|
||||||
) # use a dual cache (Redis+In-Memory) for tracking cooldowns, usage, etc.
|
) # use a dual cache (Redis+In-Memory) for tracking cooldowns, usage, etc.
|
||||||
|
|
||||||
|
### SCHEDULER ###
|
||||||
|
self.scheduler = Scheduler(
|
||||||
|
polling_interval=polling_interval, redis_cache=redis_cache
|
||||||
|
)
|
||||||
self.default_deployment = None # use this to track the users default deployment, when they want to use model = *
|
self.default_deployment = None # use this to track the users default deployment, when they want to use model = *
|
||||||
self.default_max_parallel_requests = default_max_parallel_requests
|
self.default_max_parallel_requests = default_max_parallel_requests
|
||||||
|
|
||||||
|
@ -2096,8 +2098,8 @@ class Router:
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
raise e
|
raise e
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
verbose_router_logger.debug(f"An exception occurred - {str(e)}")
|
verbose_router_logger.error(f"An exception occurred - {str(e)}")
|
||||||
traceback.print_exc()
|
verbose_router_logger.debug(traceback.format_exc())
|
||||||
raise original_exception
|
raise original_exception
|
||||||
|
|
||||||
async def async_function_with_retries(self, *args, **kwargs):
|
async def async_function_with_retries(self, *args, **kwargs):
|
||||||
|
@ -4048,6 +4050,12 @@ class Router:
|
||||||
for idx in reversed(invalid_model_indices):
|
for idx in reversed(invalid_model_indices):
|
||||||
_returned_deployments.pop(idx)
|
_returned_deployments.pop(idx)
|
||||||
|
|
||||||
|
## ORDER FILTERING ## -> if user set 'order' in deployments, return deployments with lowest order (e.g. order=1 > order=2)
|
||||||
|
if len(_returned_deployments) > 0:
|
||||||
|
_returned_deployments = litellm.utils._get_order_filtered_deployments(
|
||||||
|
_returned_deployments
|
||||||
|
)
|
||||||
|
|
||||||
return _returned_deployments
|
return _returned_deployments
|
||||||
|
|
||||||
def _common_checks_available_deployment(
|
def _common_checks_available_deployment(
|
||||||
|
|
|
@ -1,11 +1,9 @@
|
||||||
#### What this does ####
|
#### What this does ####
|
||||||
# picks based on response time (for streaming, this is time to first token)
|
# picks based on response time (for streaming, this is time to first token)
|
||||||
from pydantic import BaseModel, Extra, Field, root_validator
|
from pydantic import BaseModel
|
||||||
import os, requests, random # type: ignore
|
|
||||||
from typing import Optional, Union, List, Dict
|
from typing import Optional, Union, List, Dict
|
||||||
from datetime import datetime, timedelta
|
from datetime import datetime, timedelta
|
||||||
import random
|
from litellm import verbose_logger
|
||||||
|
|
||||||
import traceback
|
import traceback
|
||||||
from litellm.caching import DualCache
|
from litellm.caching import DualCache
|
||||||
from litellm.integrations.custom_logger import CustomLogger
|
from litellm.integrations.custom_logger import CustomLogger
|
||||||
|
@ -119,7 +117,12 @@ class LowestCostLoggingHandler(CustomLogger):
|
||||||
if self.test_flag:
|
if self.test_flag:
|
||||||
self.logged_success += 1
|
self.logged_success += 1
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
traceback.print_exc()
|
verbose_logger.error(
|
||||||
|
"litellm.proxy.hooks.prompt_injection_detection.py::async_pre_call_hook(): Exception occured - {}".format(
|
||||||
|
str(e)
|
||||||
|
)
|
||||||
|
)
|
||||||
|
verbose_logger.debug(traceback.format_exc())
|
||||||
pass
|
pass
|
||||||
|
|
||||||
async def async_log_success_event(self, kwargs, response_obj, start_time, end_time):
|
async def async_log_success_event(self, kwargs, response_obj, start_time, end_time):
|
||||||
|
@ -201,7 +204,12 @@ class LowestCostLoggingHandler(CustomLogger):
|
||||||
if self.test_flag:
|
if self.test_flag:
|
||||||
self.logged_success += 1
|
self.logged_success += 1
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
traceback.print_exc()
|
verbose_logger.error(
|
||||||
|
"litellm.proxy.hooks.prompt_injection_detection.py::async_pre_call_hook(): Exception occured - {}".format(
|
||||||
|
str(e)
|
||||||
|
)
|
||||||
|
)
|
||||||
|
verbose_logger.debug(traceback.format_exc())
|
||||||
pass
|
pass
|
||||||
|
|
||||||
async def async_get_available_deployments(
|
async def async_get_available_deployments(
|
||||||
|
|
|
@ -1,16 +1,16 @@
|
||||||
#### What this does ####
|
#### What this does ####
|
||||||
# picks based on response time (for streaming, this is time to first token)
|
# picks based on response time (for streaming, this is time to first token)
|
||||||
from pydantic import BaseModel, Extra, Field, root_validator # type: ignore
|
from pydantic import BaseModel
|
||||||
import dotenv, os, requests, random # type: ignore
|
import random
|
||||||
from typing import Optional, Union, List, Dict
|
from typing import Optional, Union, List, Dict
|
||||||
from datetime import datetime, timedelta
|
from datetime import datetime, timedelta
|
||||||
import random
|
|
||||||
import traceback
|
import traceback
|
||||||
from litellm.caching import DualCache
|
from litellm.caching import DualCache
|
||||||
from litellm.integrations.custom_logger import CustomLogger
|
from litellm.integrations.custom_logger import CustomLogger
|
||||||
from litellm import ModelResponse
|
from litellm import ModelResponse
|
||||||
from litellm import token_counter
|
from litellm import token_counter
|
||||||
import litellm
|
import litellm
|
||||||
|
from litellm import verbose_logger
|
||||||
|
|
||||||
|
|
||||||
class LiteLLMBase(BaseModel):
|
class LiteLLMBase(BaseModel):
|
||||||
|
@ -165,7 +165,12 @@ class LowestLatencyLoggingHandler(CustomLogger):
|
||||||
if self.test_flag:
|
if self.test_flag:
|
||||||
self.logged_success += 1
|
self.logged_success += 1
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
traceback.print_exc()
|
verbose_logger.error(
|
||||||
|
"litellm.proxy.hooks.prompt_injection_detection.py::async_pre_call_hook(): Exception occured - {}".format(
|
||||||
|
str(e)
|
||||||
|
)
|
||||||
|
)
|
||||||
|
verbose_logger.debug(traceback.format_exc())
|
||||||
pass
|
pass
|
||||||
|
|
||||||
async def async_log_failure_event(self, kwargs, response_obj, start_time, end_time):
|
async def async_log_failure_event(self, kwargs, response_obj, start_time, end_time):
|
||||||
|
@ -229,7 +234,12 @@ class LowestLatencyLoggingHandler(CustomLogger):
|
||||||
# do nothing if it's not a timeout error
|
# do nothing if it's not a timeout error
|
||||||
return
|
return
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
traceback.print_exc()
|
verbose_logger.error(
|
||||||
|
"litellm.proxy.hooks.prompt_injection_detection.py::async_pre_call_hook(): Exception occured - {}".format(
|
||||||
|
str(e)
|
||||||
|
)
|
||||||
|
)
|
||||||
|
verbose_logger.debug(traceback.format_exc())
|
||||||
pass
|
pass
|
||||||
|
|
||||||
async def async_log_success_event(self, kwargs, response_obj, start_time, end_time):
|
async def async_log_success_event(self, kwargs, response_obj, start_time, end_time):
|
||||||
|
@ -352,7 +362,12 @@ class LowestLatencyLoggingHandler(CustomLogger):
|
||||||
if self.test_flag:
|
if self.test_flag:
|
||||||
self.logged_success += 1
|
self.logged_success += 1
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
traceback.print_exc()
|
verbose_logger.error(
|
||||||
|
"litellm.router_strategy.lowest_latency.py::async_log_success_event(): Exception occured - {}".format(
|
||||||
|
str(e)
|
||||||
|
)
|
||||||
|
)
|
||||||
|
verbose_logger.debug(traceback.format_exc())
|
||||||
pass
|
pass
|
||||||
|
|
||||||
def get_available_deployments(
|
def get_available_deployments(
|
||||||
|
|
|
@ -11,6 +11,7 @@ from litellm.integrations.custom_logger import CustomLogger
|
||||||
from litellm._logging import verbose_router_logger
|
from litellm._logging import verbose_router_logger
|
||||||
from litellm.utils import print_verbose
|
from litellm.utils import print_verbose
|
||||||
|
|
||||||
|
|
||||||
class LiteLLMBase(BaseModel):
|
class LiteLLMBase(BaseModel):
|
||||||
"""
|
"""
|
||||||
Implements default functions, all pydantic objects should have.
|
Implements default functions, all pydantic objects should have.
|
||||||
|
@ -23,8 +24,10 @@ class LiteLLMBase(BaseModel):
|
||||||
# if using pydantic v1
|
# if using pydantic v1
|
||||||
return self.dict()
|
return self.dict()
|
||||||
|
|
||||||
|
|
||||||
class RoutingArgs(LiteLLMBase):
|
class RoutingArgs(LiteLLMBase):
|
||||||
ttl: int = 1 * 60 # 1min (RPM/TPM expire key)
|
ttl: int = 1 * 60 # 1min (RPM/TPM expire key)
|
||||||
|
|
||||||
|
|
||||||
class LowestTPMLoggingHandler(CustomLogger):
|
class LowestTPMLoggingHandler(CustomLogger):
|
||||||
test_flag: bool = False
|
test_flag: bool = False
|
||||||
|
@ -32,7 +35,9 @@ class LowestTPMLoggingHandler(CustomLogger):
|
||||||
logged_failure: int = 0
|
logged_failure: int = 0
|
||||||
default_cache_time_seconds: int = 1 * 60 * 60 # 1 hour
|
default_cache_time_seconds: int = 1 * 60 * 60 # 1 hour
|
||||||
|
|
||||||
def __init__(self, router_cache: DualCache, model_list: list, routing_args: dict = {}):
|
def __init__(
|
||||||
|
self, router_cache: DualCache, model_list: list, routing_args: dict = {}
|
||||||
|
):
|
||||||
self.router_cache = router_cache
|
self.router_cache = router_cache
|
||||||
self.model_list = model_list
|
self.model_list = model_list
|
||||||
self.routing_args = RoutingArgs(**routing_args)
|
self.routing_args = RoutingArgs(**routing_args)
|
||||||
|
@ -72,19 +77,28 @@ class LowestTPMLoggingHandler(CustomLogger):
|
||||||
request_count_dict = self.router_cache.get_cache(key=tpm_key) or {}
|
request_count_dict = self.router_cache.get_cache(key=tpm_key) or {}
|
||||||
request_count_dict[id] = request_count_dict.get(id, 0) + total_tokens
|
request_count_dict[id] = request_count_dict.get(id, 0) + total_tokens
|
||||||
|
|
||||||
self.router_cache.set_cache(key=tpm_key, value=request_count_dict, ttl=self.routing_args.ttl)
|
self.router_cache.set_cache(
|
||||||
|
key=tpm_key, value=request_count_dict, ttl=self.routing_args.ttl
|
||||||
|
)
|
||||||
|
|
||||||
## RPM
|
## RPM
|
||||||
request_count_dict = self.router_cache.get_cache(key=rpm_key) or {}
|
request_count_dict = self.router_cache.get_cache(key=rpm_key) or {}
|
||||||
request_count_dict[id] = request_count_dict.get(id, 0) + 1
|
request_count_dict[id] = request_count_dict.get(id, 0) + 1
|
||||||
|
|
||||||
self.router_cache.set_cache(key=rpm_key, value=request_count_dict, ttl=self.routing_args.ttl)
|
self.router_cache.set_cache(
|
||||||
|
key=rpm_key, value=request_count_dict, ttl=self.routing_args.ttl
|
||||||
|
)
|
||||||
|
|
||||||
### TESTING ###
|
### TESTING ###
|
||||||
if self.test_flag:
|
if self.test_flag:
|
||||||
self.logged_success += 1
|
self.logged_success += 1
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
traceback.print_exc()
|
verbose_router_logger.error(
|
||||||
|
"litellm.router_strategy.lowest_tpm_rpm.py::async_log_success_event(): Exception occured - {}".format(
|
||||||
|
str(e)
|
||||||
|
)
|
||||||
|
)
|
||||||
|
verbose_router_logger.debug(traceback.format_exc())
|
||||||
pass
|
pass
|
||||||
|
|
||||||
async def async_log_success_event(self, kwargs, response_obj, start_time, end_time):
|
async def async_log_success_event(self, kwargs, response_obj, start_time, end_time):
|
||||||
|
@ -123,19 +137,28 @@ class LowestTPMLoggingHandler(CustomLogger):
|
||||||
request_count_dict = self.router_cache.get_cache(key=tpm_key) or {}
|
request_count_dict = self.router_cache.get_cache(key=tpm_key) or {}
|
||||||
request_count_dict[id] = request_count_dict.get(id, 0) + total_tokens
|
request_count_dict[id] = request_count_dict.get(id, 0) + total_tokens
|
||||||
|
|
||||||
self.router_cache.set_cache(key=tpm_key, value=request_count_dict, ttl=self.routing_args.ttl)
|
self.router_cache.set_cache(
|
||||||
|
key=tpm_key, value=request_count_dict, ttl=self.routing_args.ttl
|
||||||
|
)
|
||||||
|
|
||||||
## RPM
|
## RPM
|
||||||
request_count_dict = self.router_cache.get_cache(key=rpm_key) or {}
|
request_count_dict = self.router_cache.get_cache(key=rpm_key) or {}
|
||||||
request_count_dict[id] = request_count_dict.get(id, 0) + 1
|
request_count_dict[id] = request_count_dict.get(id, 0) + 1
|
||||||
|
|
||||||
self.router_cache.set_cache(key=rpm_key, value=request_count_dict, ttl=self.routing_args.ttl)
|
self.router_cache.set_cache(
|
||||||
|
key=rpm_key, value=request_count_dict, ttl=self.routing_args.ttl
|
||||||
|
)
|
||||||
|
|
||||||
### TESTING ###
|
### TESTING ###
|
||||||
if self.test_flag:
|
if self.test_flag:
|
||||||
self.logged_success += 1
|
self.logged_success += 1
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
traceback.print_exc()
|
verbose_router_logger.error(
|
||||||
|
"litellm.router_strategy.lowest_tpm_rpm.py::async_log_success_event(): Exception occured - {}".format(
|
||||||
|
str(e)
|
||||||
|
)
|
||||||
|
)
|
||||||
|
verbose_router_logger.debug(traceback.format_exc())
|
||||||
pass
|
pass
|
||||||
|
|
||||||
def get_available_deployments(
|
def get_available_deployments(
|
||||||
|
|
|
@ -1,19 +1,19 @@
|
||||||
#### What this does ####
|
#### What this does ####
|
||||||
# identifies lowest tpm deployment
|
# identifies lowest tpm deployment
|
||||||
from pydantic import BaseModel
|
from pydantic import BaseModel
|
||||||
import dotenv, os, requests, random
|
import random
|
||||||
from typing import Optional, Union, List, Dict
|
from typing import Optional, Union, List, Dict
|
||||||
import datetime as datetime_og
|
import traceback
|
||||||
from datetime import datetime
|
import httpx
|
||||||
import traceback, asyncio, httpx
|
|
||||||
import litellm
|
import litellm
|
||||||
from litellm import token_counter
|
from litellm import token_counter
|
||||||
from litellm.caching import DualCache
|
from litellm.caching import DualCache
|
||||||
from litellm.integrations.custom_logger import CustomLogger
|
from litellm.integrations.custom_logger import CustomLogger
|
||||||
from litellm._logging import verbose_router_logger
|
from litellm._logging import verbose_router_logger, verbose_logger
|
||||||
from litellm.utils import print_verbose, get_utc_datetime
|
from litellm.utils import print_verbose, get_utc_datetime
|
||||||
from litellm.types.router import RouterErrors
|
from litellm.types.router import RouterErrors
|
||||||
|
|
||||||
|
|
||||||
class LiteLLMBase(BaseModel):
|
class LiteLLMBase(BaseModel):
|
||||||
"""
|
"""
|
||||||
Implements default functions, all pydantic objects should have.
|
Implements default functions, all pydantic objects should have.
|
||||||
|
@ -22,12 +22,14 @@ class LiteLLMBase(BaseModel):
|
||||||
def json(self, **kwargs):
|
def json(self, **kwargs):
|
||||||
try:
|
try:
|
||||||
return self.model_dump() # noqa
|
return self.model_dump() # noqa
|
||||||
except:
|
except Exception as e:
|
||||||
# if using pydantic v1
|
# if using pydantic v1
|
||||||
return self.dict()
|
return self.dict()
|
||||||
|
|
||||||
|
|
||||||
class RoutingArgs(LiteLLMBase):
|
class RoutingArgs(LiteLLMBase):
|
||||||
ttl: int = 1 * 60 # 1min (RPM/TPM expire key)
|
ttl: int = 1 * 60 # 1min (RPM/TPM expire key)
|
||||||
|
|
||||||
|
|
||||||
class LowestTPMLoggingHandler_v2(CustomLogger):
|
class LowestTPMLoggingHandler_v2(CustomLogger):
|
||||||
"""
|
"""
|
||||||
|
@ -47,7 +49,9 @@ class LowestTPMLoggingHandler_v2(CustomLogger):
|
||||||
logged_failure: int = 0
|
logged_failure: int = 0
|
||||||
default_cache_time_seconds: int = 1 * 60 * 60 # 1 hour
|
default_cache_time_seconds: int = 1 * 60 * 60 # 1 hour
|
||||||
|
|
||||||
def __init__(self, router_cache: DualCache, model_list: list, routing_args: dict = {}):
|
def __init__(
|
||||||
|
self, router_cache: DualCache, model_list: list, routing_args: dict = {}
|
||||||
|
):
|
||||||
self.router_cache = router_cache
|
self.router_cache = router_cache
|
||||||
self.model_list = model_list
|
self.model_list = model_list
|
||||||
self.routing_args = RoutingArgs(**routing_args)
|
self.routing_args = RoutingArgs(**routing_args)
|
||||||
|
@ -104,7 +108,9 @@ class LowestTPMLoggingHandler_v2(CustomLogger):
|
||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
# if local result below limit, check redis ## prevent unnecessary redis checks
|
# if local result below limit, check redis ## prevent unnecessary redis checks
|
||||||
result = self.router_cache.increment_cache(key=rpm_key, value=1, ttl=self.routing_args.ttl)
|
result = self.router_cache.increment_cache(
|
||||||
|
key=rpm_key, value=1, ttl=self.routing_args.ttl
|
||||||
|
)
|
||||||
if result is not None and result > deployment_rpm:
|
if result is not None and result > deployment_rpm:
|
||||||
raise litellm.RateLimitError(
|
raise litellm.RateLimitError(
|
||||||
message="Deployment over defined rpm limit={}. current usage={}".format(
|
message="Deployment over defined rpm limit={}. current usage={}".format(
|
||||||
|
@ -244,12 +250,19 @@ class LowestTPMLoggingHandler_v2(CustomLogger):
|
||||||
# update cache
|
# update cache
|
||||||
|
|
||||||
## TPM
|
## TPM
|
||||||
self.router_cache.increment_cache(key=tpm_key, value=total_tokens, ttl=self.routing_args.ttl)
|
self.router_cache.increment_cache(
|
||||||
|
key=tpm_key, value=total_tokens, ttl=self.routing_args.ttl
|
||||||
|
)
|
||||||
### TESTING ###
|
### TESTING ###
|
||||||
if self.test_flag:
|
if self.test_flag:
|
||||||
self.logged_success += 1
|
self.logged_success += 1
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
traceback.print_exc()
|
verbose_logger.error(
|
||||||
|
"litellm.proxy.hooks.prompt_injection_detection.py::async_pre_call_hook(): Exception occured - {}".format(
|
||||||
|
str(e)
|
||||||
|
)
|
||||||
|
)
|
||||||
|
verbose_logger.debug(traceback.format_exc())
|
||||||
pass
|
pass
|
||||||
|
|
||||||
async def async_log_success_event(self, kwargs, response_obj, start_time, end_time):
|
async def async_log_success_event(self, kwargs, response_obj, start_time, end_time):
|
||||||
|
@ -295,7 +308,12 @@ class LowestTPMLoggingHandler_v2(CustomLogger):
|
||||||
if self.test_flag:
|
if self.test_flag:
|
||||||
self.logged_success += 1
|
self.logged_success += 1
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
traceback.print_exc()
|
verbose_logger.error(
|
||||||
|
"litellm.proxy.hooks.prompt_injection_detection.py::async_pre_call_hook(): Exception occured - {}".format(
|
||||||
|
str(e)
|
||||||
|
)
|
||||||
|
)
|
||||||
|
verbose_logger.debug(traceback.format_exc())
|
||||||
pass
|
pass
|
||||||
|
|
||||||
def _common_checks_available_deployment(
|
def _common_checks_available_deployment(
|
||||||
|
|
|
@ -1,13 +1,14 @@
|
||||||
import heapq, time
|
import heapq
|
||||||
from pydantic import BaseModel
|
from pydantic import BaseModel
|
||||||
from typing import Optional
|
from typing import Optional
|
||||||
import enum
|
import enum
|
||||||
from litellm.caching import DualCache
|
from litellm.caching import DualCache, RedisCache
|
||||||
from litellm import print_verbose
|
from litellm import print_verbose
|
||||||
|
|
||||||
|
|
||||||
class SchedulerCacheKeys(enum.Enum):
|
class SchedulerCacheKeys(enum.Enum):
|
||||||
queue = "scheduler:queue"
|
queue = "scheduler:queue"
|
||||||
|
default_in_memory_ttl = 5 # cache queue in-memory for 5s when redis cache available
|
||||||
|
|
||||||
|
|
||||||
class DefaultPriorities(enum.Enum):
|
class DefaultPriorities(enum.Enum):
|
||||||
|
@ -25,18 +26,24 @@ class FlowItem(BaseModel):
|
||||||
class Scheduler:
|
class Scheduler:
|
||||||
cache: DualCache
|
cache: DualCache
|
||||||
|
|
||||||
def __init__(self, polling_interval: Optional[float] = None):
|
def __init__(
|
||||||
|
self,
|
||||||
|
polling_interval: Optional[float] = None,
|
||||||
|
redis_cache: Optional[RedisCache] = None,
|
||||||
|
):
|
||||||
"""
|
"""
|
||||||
polling_interval: float or null - frequency of polling queue. Default is 3ms.
|
polling_interval: float or null - frequency of polling queue. Default is 3ms.
|
||||||
"""
|
"""
|
||||||
self.queue: list = []
|
self.queue: list = []
|
||||||
self.cache = DualCache()
|
default_in_memory_ttl: Optional[float] = None
|
||||||
|
if redis_cache is not None:
|
||||||
|
# if redis-cache available frequently poll that instead of using in-memory.
|
||||||
|
default_in_memory_ttl = SchedulerCacheKeys.default_in_memory_ttl.value
|
||||||
|
self.cache = DualCache(
|
||||||
|
redis_cache=redis_cache, default_in_memory_ttl=default_in_memory_ttl
|
||||||
|
)
|
||||||
self.polling_interval = polling_interval or 0.03 # default to 3ms
|
self.polling_interval = polling_interval or 0.03 # default to 3ms
|
||||||
|
|
||||||
def update_variables(self, cache: Optional[DualCache] = None):
|
|
||||||
if cache is not None:
|
|
||||||
self.cache = cache
|
|
||||||
|
|
||||||
async def add_request(self, request: FlowItem):
|
async def add_request(self, request: FlowItem):
|
||||||
# We use the priority directly, as lower values indicate higher priority
|
# We use the priority directly, as lower values indicate higher priority
|
||||||
# get the queue
|
# get the queue
|
||||||
|
|
|
@ -198,7 +198,11 @@ async def test_aarun_thread_litellm(sync_mode, provider, is_streaming):
|
||||||
)
|
)
|
||||||
assert isinstance(messages.data[0], Message)
|
assert isinstance(messages.data[0], Message)
|
||||||
else:
|
else:
|
||||||
pytest.fail("An unexpected error occurred when running the thread")
|
pytest.fail(
|
||||||
|
"An unexpected error occurred when running the thread, {}".format(
|
||||||
|
run
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
else:
|
else:
|
||||||
added_message = await litellm.a_add_message(**data)
|
added_message = await litellm.a_add_message(**data)
|
||||||
|
@ -226,4 +230,8 @@ async def test_aarun_thread_litellm(sync_mode, provider, is_streaming):
|
||||||
)
|
)
|
||||||
assert isinstance(messages.data[0], Message)
|
assert isinstance(messages.data[0], Message)
|
||||||
else:
|
else:
|
||||||
pytest.fail("An unexpected error occurred when running the thread")
|
pytest.fail(
|
||||||
|
"An unexpected error occurred when running the thread, {}".format(
|
||||||
|
run
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
|
@ -2169,6 +2169,7 @@ def test_completion_azure_key_completion_arg():
|
||||||
logprobs=True,
|
logprobs=True,
|
||||||
max_tokens=10,
|
max_tokens=10,
|
||||||
)
|
)
|
||||||
|
|
||||||
print(f"response: {response}")
|
print(f"response: {response}")
|
||||||
|
|
||||||
print("Hidden Params", response._hidden_params)
|
print("Hidden Params", response._hidden_params)
|
||||||
|
@ -2544,6 +2545,8 @@ def test_replicate_custom_prompt_dict():
|
||||||
"content": "what is yc write 1 paragraph",
|
"content": "what is yc write 1 paragraph",
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
|
mock_response="Hello world",
|
||||||
|
mock_response="hello world",
|
||||||
repetition_penalty=0.1,
|
repetition_penalty=0.1,
|
||||||
num_retries=3,
|
num_retries=3,
|
||||||
)
|
)
|
||||||
|
|
|
@ -76,7 +76,7 @@ def test_image_generation_azure_dall_e_3():
|
||||||
)
|
)
|
||||||
print(f"response: {response}")
|
print(f"response: {response}")
|
||||||
assert len(response.data) > 0
|
assert len(response.data) > 0
|
||||||
except litellm.RateLimitError as e:
|
except litellm.InternalServerError as e:
|
||||||
pass
|
pass
|
||||||
except litellm.ContentPolicyViolationError:
|
except litellm.ContentPolicyViolationError:
|
||||||
pass # OpenAI randomly raises these errors - skip when they occur
|
pass # OpenAI randomly raises these errors - skip when they occur
|
||||||
|
|
|
@ -102,18 +102,18 @@ async def test_get_available_deployments_custom_price():
|
||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
async def test_lowest_cost_routing():
|
async def test_lowest_cost_routing():
|
||||||
"""
|
"""
|
||||||
Test if router returns model with the lowest cost
|
Test if router, returns model with the lowest cost
|
||||||
"""
|
"""
|
||||||
model_list = [
|
model_list = [
|
||||||
{
|
{
|
||||||
"model_name": "gpt-3.5-turbo",
|
"model_name": "gpt-4",
|
||||||
"litellm_params": {"model": "gpt-4"},
|
"litellm_params": {"model": "gpt-4"},
|
||||||
"model_info": {"id": "openai-gpt-4"},
|
"model_info": {"id": "openai-gpt-4"},
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"model_name": "gpt-3.5-turbo",
|
"model_name": "gpt-3.5-turbo",
|
||||||
"litellm_params": {"model": "groq/llama3-8b-8192"},
|
"litellm_params": {"model": "gpt-3.5-turbo"},
|
||||||
"model_info": {"id": "groq-llama"},
|
"model_info": {"id": "gpt-3.5-turbo"},
|
||||||
},
|
},
|
||||||
]
|
]
|
||||||
|
|
||||||
|
@ -127,7 +127,7 @@ async def test_lowest_cost_routing():
|
||||||
print(
|
print(
|
||||||
response._hidden_params["model_id"]
|
response._hidden_params["model_id"]
|
||||||
) # expect groq-llama, since groq/llama has lowest cost
|
) # expect groq-llama, since groq/llama has lowest cost
|
||||||
assert "groq-llama" == response._hidden_params["model_id"]
|
assert "gpt-3.5-turbo" == response._hidden_params["model_id"]
|
||||||
|
|
||||||
|
|
||||||
async def _deploy(lowest_cost_logger, deployment_id, tokens_used, duration):
|
async def _deploy(lowest_cost_logger, deployment_id, tokens_used, duration):
|
||||||
|
|
|
@ -38,6 +38,48 @@ def test_router_sensitive_keys():
|
||||||
assert "special-key" not in str(e)
|
assert "special-key" not in str(e)
|
||||||
|
|
||||||
|
|
||||||
|
def test_router_order():
|
||||||
|
"""
|
||||||
|
Asserts for 2 models in a model group, model with order=1 always called first
|
||||||
|
"""
|
||||||
|
router = Router(
|
||||||
|
model_list=[
|
||||||
|
{
|
||||||
|
"model_name": "gpt-3.5-turbo",
|
||||||
|
"litellm_params": {
|
||||||
|
"model": "gpt-4o",
|
||||||
|
"api_key": os.getenv("OPENAI_API_KEY"),
|
||||||
|
"mock_response": "Hello world",
|
||||||
|
"order": 1,
|
||||||
|
},
|
||||||
|
"model_info": {"id": "1"},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"model_name": "gpt-3.5-turbo",
|
||||||
|
"litellm_params": {
|
||||||
|
"model": "gpt-4o",
|
||||||
|
"api_key": "bad-key",
|
||||||
|
"mock_response": Exception("this is a bad key"),
|
||||||
|
"order": 2,
|
||||||
|
},
|
||||||
|
"model_info": {"id": "2"},
|
||||||
|
},
|
||||||
|
],
|
||||||
|
num_retries=0,
|
||||||
|
allowed_fails=0,
|
||||||
|
enable_pre_call_checks=True,
|
||||||
|
)
|
||||||
|
|
||||||
|
for _ in range(100):
|
||||||
|
response = router.completion(
|
||||||
|
model="gpt-3.5-turbo",
|
||||||
|
messages=[{"role": "user", "content": "Hey, how's it going?"}],
|
||||||
|
)
|
||||||
|
|
||||||
|
assert isinstance(response, litellm.ModelResponse)
|
||||||
|
assert response._hidden_params["model_id"] == "1"
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.parametrize("num_retries", [None, 2])
|
@pytest.mark.parametrize("num_retries", [None, 2])
|
||||||
@pytest.mark.parametrize("max_retries", [None, 4])
|
@pytest.mark.parametrize("max_retries", [None, 4])
|
||||||
def test_router_num_retries_init(num_retries, max_retries):
|
def test_router_num_retries_init(num_retries, max_retries):
|
||||||
|
|
|
@ -186,3 +186,13 @@ def test_load_test_token_counter(model):
|
||||||
total_time = end_time - start_time
|
total_time = end_time - start_time
|
||||||
print("model={}, total test time={}".format(model, total_time))
|
print("model={}, total test time={}".format(model, total_time))
|
||||||
assert total_time < 10, f"Total encoding time > 10s, {total_time}"
|
assert total_time < 10, f"Total encoding time > 10s, {total_time}"
|
||||||
|
|
||||||
|
def test_openai_token_with_image_and_text():
|
||||||
|
model = "gpt-4o"
|
||||||
|
full_request = {'model': 'gpt-4o', 'tools': [{'type': 'function', 'function': {'name': 'json', 'parameters': {'type': 'object', 'required': ['clause'], 'properties': {'clause': {'type': 'string'}}}, 'description': 'Respond with a JSON object.'}}], 'logprobs': False, 'messages': [{'role': 'user', 'content': [{'text': '\n Just some long text, long long text, and you know it will be longer than 7 tokens definetly.', 'type': 'text'}]}], 'tool_choice': {'type': 'function', 'function': {'name': 'json'}}, 'exclude_models': [], 'disable_fallback': False, 'exclude_providers': []}
|
||||||
|
messages = full_request.get("messages", [])
|
||||||
|
|
||||||
|
token_count = token_counter(model=model, messages=messages)
|
||||||
|
print(token_count)
|
||||||
|
|
||||||
|
test_openai_token_with_image_and_text()
|
110
litellm/utils.py
110
litellm/utils.py
|
@ -1374,8 +1374,12 @@ class Logging:
|
||||||
callback_func=callback,
|
callback_func=callback,
|
||||||
)
|
)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
traceback.print_exc()
|
verbose_logger.error(
|
||||||
print_verbose(
|
"litellm.Logging.pre_call(): Exception occured - {}".format(
|
||||||
|
str(e)
|
||||||
|
)
|
||||||
|
)
|
||||||
|
verbose_logger.debug(
|
||||||
f"LiteLLM.LoggingError: [Non-Blocking] Exception occurred while input logging with integrations {traceback.format_exc()}"
|
f"LiteLLM.LoggingError: [Non-Blocking] Exception occurred while input logging with integrations {traceback.format_exc()}"
|
||||||
)
|
)
|
||||||
print_verbose(
|
print_verbose(
|
||||||
|
@ -4062,6 +4066,7 @@ def openai_token_counter(
|
||||||
for c in value:
|
for c in value:
|
||||||
if c["type"] == "text":
|
if c["type"] == "text":
|
||||||
text += c["text"]
|
text += c["text"]
|
||||||
|
num_tokens += len(encoding.encode(c["text"], disallowed_special=()))
|
||||||
elif c["type"] == "image_url":
|
elif c["type"] == "image_url":
|
||||||
if isinstance(c["image_url"], dict):
|
if isinstance(c["image_url"], dict):
|
||||||
image_url_dict = c["image_url"]
|
image_url_dict = c["image_url"]
|
||||||
|
@ -6194,6 +6199,27 @@ def calculate_max_parallel_requests(
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def _get_order_filtered_deployments(healthy_deployments: List[Dict]) -> List:
|
||||||
|
min_order = min(
|
||||||
|
(
|
||||||
|
deployment["litellm_params"]["order"]
|
||||||
|
for deployment in healthy_deployments
|
||||||
|
if "order" in deployment["litellm_params"]
|
||||||
|
),
|
||||||
|
default=None,
|
||||||
|
)
|
||||||
|
|
||||||
|
if min_order is not None:
|
||||||
|
filtered_deployments = [
|
||||||
|
deployment
|
||||||
|
for deployment in healthy_deployments
|
||||||
|
if deployment["litellm_params"].get("order") == min_order
|
||||||
|
]
|
||||||
|
|
||||||
|
return filtered_deployments
|
||||||
|
return healthy_deployments
|
||||||
|
|
||||||
|
|
||||||
def _get_model_region(
|
def _get_model_region(
|
||||||
custom_llm_provider: str, litellm_params: LiteLLM_Params
|
custom_llm_provider: str, litellm_params: LiteLLM_Params
|
||||||
) -> Optional[str]:
|
) -> Optional[str]:
|
||||||
|
@ -7336,6 +7362,10 @@ def get_provider_fields(custom_llm_provider: str) -> List[ProviderField]:
|
||||||
|
|
||||||
if custom_llm_provider == "databricks":
|
if custom_llm_provider == "databricks":
|
||||||
return litellm.DatabricksConfig().get_required_params()
|
return litellm.DatabricksConfig().get_required_params()
|
||||||
|
|
||||||
|
elif custom_llm_provider == "ollama":
|
||||||
|
return litellm.OllamaConfig().get_required_params()
|
||||||
|
|
||||||
else:
|
else:
|
||||||
return []
|
return []
|
||||||
|
|
||||||
|
@ -9782,8 +9812,7 @@ def exception_type(
|
||||||
elif custom_llm_provider == "azure":
|
elif custom_llm_provider == "azure":
|
||||||
if "Internal server error" in error_str:
|
if "Internal server error" in error_str:
|
||||||
exception_mapping_worked = True
|
exception_mapping_worked = True
|
||||||
raise APIError(
|
raise litellm.InternalServerError(
|
||||||
status_code=500,
|
|
||||||
message=f"AzureException Internal server error - {original_exception.message}",
|
message=f"AzureException Internal server error - {original_exception.message}",
|
||||||
llm_provider="azure",
|
llm_provider="azure",
|
||||||
model=model,
|
model=model,
|
||||||
|
@ -10033,6 +10062,8 @@ def get_secret(
|
||||||
):
|
):
|
||||||
key_management_system = litellm._key_management_system
|
key_management_system = litellm._key_management_system
|
||||||
key_management_settings = litellm._key_management_settings
|
key_management_settings = litellm._key_management_settings
|
||||||
|
args = locals()
|
||||||
|
|
||||||
if secret_name.startswith("os.environ/"):
|
if secret_name.startswith("os.environ/"):
|
||||||
secret_name = secret_name.replace("os.environ/", "")
|
secret_name = secret_name.replace("os.environ/", "")
|
||||||
|
|
||||||
|
@ -10120,13 +10151,13 @@ def get_secret(
|
||||||
key_manager = "local"
|
key_manager = "local"
|
||||||
|
|
||||||
if (
|
if (
|
||||||
key_manager == KeyManagementSystem.AZURE_KEY_VAULT
|
key_manager == KeyManagementSystem.AZURE_KEY_VAULT.value
|
||||||
or type(client).__module__ + "." + type(client).__name__
|
or type(client).__module__ + "." + type(client).__name__
|
||||||
== "azure.keyvault.secrets._client.SecretClient"
|
== "azure.keyvault.secrets._client.SecretClient"
|
||||||
): # support Azure Secret Client - from azure.keyvault.secrets import SecretClient
|
): # support Azure Secret Client - from azure.keyvault.secrets import SecretClient
|
||||||
secret = client.get_secret(secret_name).value
|
secret = client.get_secret(secret_name).value
|
||||||
elif (
|
elif (
|
||||||
key_manager == KeyManagementSystem.GOOGLE_KMS
|
key_manager == KeyManagementSystem.GOOGLE_KMS.value
|
||||||
or client.__class__.__name__ == "KeyManagementServiceClient"
|
or client.__class__.__name__ == "KeyManagementServiceClient"
|
||||||
):
|
):
|
||||||
encrypted_secret: Any = os.getenv(secret_name)
|
encrypted_secret: Any = os.getenv(secret_name)
|
||||||
|
@ -10154,6 +10185,25 @@ def get_secret(
|
||||||
secret = response.plaintext.decode(
|
secret = response.plaintext.decode(
|
||||||
"utf-8"
|
"utf-8"
|
||||||
) # assumes the original value was encoded with utf-8
|
) # assumes the original value was encoded with utf-8
|
||||||
|
elif key_manager == KeyManagementSystem.AWS_KMS.value:
|
||||||
|
"""
|
||||||
|
Only check the tokens which start with 'aws_kms/'. This prevents latency impact caused by checking all keys.
|
||||||
|
"""
|
||||||
|
encrypted_value = os.getenv(secret_name, None)
|
||||||
|
if encrypted_value is None:
|
||||||
|
raise Exception("encrypted value for AWS KMS cannot be None.")
|
||||||
|
# Decode the base64 encoded ciphertext
|
||||||
|
ciphertext_blob = base64.b64decode(encrypted_value)
|
||||||
|
|
||||||
|
# Set up the parameters for the decrypt call
|
||||||
|
params = {"CiphertextBlob": ciphertext_blob}
|
||||||
|
|
||||||
|
# Perform the decryption
|
||||||
|
response = client.decrypt(**params)
|
||||||
|
|
||||||
|
# Extract and decode the plaintext
|
||||||
|
plaintext = response["Plaintext"]
|
||||||
|
secret = plaintext.decode("utf-8")
|
||||||
elif key_manager == KeyManagementSystem.AWS_SECRET_MANAGER.value:
|
elif key_manager == KeyManagementSystem.AWS_SECRET_MANAGER.value:
|
||||||
try:
|
try:
|
||||||
get_secret_value_response = client.get_secret_value(
|
get_secret_value_response = client.get_secret_value(
|
||||||
|
@ -10174,10 +10224,14 @@ def get_secret(
|
||||||
for k, v in secret_dict.items():
|
for k, v in secret_dict.items():
|
||||||
secret = v
|
secret = v
|
||||||
print_verbose(f"secret: {secret}")
|
print_verbose(f"secret: {secret}")
|
||||||
|
elif key_manager == "local":
|
||||||
|
secret = os.getenv(secret_name)
|
||||||
else: # assume the default is infisicial client
|
else: # assume the default is infisicial client
|
||||||
secret = client.get_secret(secret_name).secret_value
|
secret = client.get_secret(secret_name).secret_value
|
||||||
except Exception as e: # check if it's in os.environ
|
except Exception as e: # check if it's in os.environ
|
||||||
print_verbose(f"An exception occurred - {str(e)}")
|
verbose_logger.error(
|
||||||
|
f"An exception occurred - {str(e)}\n\n{traceback.format_exc()}"
|
||||||
|
)
|
||||||
secret = os.getenv(secret_name)
|
secret = os.getenv(secret_name)
|
||||||
try:
|
try:
|
||||||
secret_value_as_bool = ast.literal_eval(secret)
|
secret_value_as_bool = ast.literal_eval(secret)
|
||||||
|
@ -10511,7 +10565,12 @@ class CustomStreamWrapper:
|
||||||
"finish_reason": finish_reason,
|
"finish_reason": finish_reason,
|
||||||
}
|
}
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
traceback.print_exc()
|
verbose_logger.error(
|
||||||
|
"litellm.CustomStreamWrapper.handle_predibase_chunk(): Exception occured - {}".format(
|
||||||
|
str(e)
|
||||||
|
)
|
||||||
|
)
|
||||||
|
verbose_logger.debug(traceback.format_exc())
|
||||||
raise e
|
raise e
|
||||||
|
|
||||||
def handle_huggingface_chunk(self, chunk):
|
def handle_huggingface_chunk(self, chunk):
|
||||||
|
@ -10555,7 +10614,12 @@ class CustomStreamWrapper:
|
||||||
"finish_reason": finish_reason,
|
"finish_reason": finish_reason,
|
||||||
}
|
}
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
traceback.print_exc()
|
verbose_logger.error(
|
||||||
|
"litellm.CustomStreamWrapper.handle_huggingface_chunk(): Exception occured - {}".format(
|
||||||
|
str(e)
|
||||||
|
)
|
||||||
|
)
|
||||||
|
verbose_logger.debug(traceback.format_exc())
|
||||||
raise e
|
raise e
|
||||||
|
|
||||||
def handle_ai21_chunk(self, chunk): # fake streaming
|
def handle_ai21_chunk(self, chunk): # fake streaming
|
||||||
|
@ -10790,7 +10854,12 @@ class CustomStreamWrapper:
|
||||||
"usage": usage,
|
"usage": usage,
|
||||||
}
|
}
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
traceback.print_exc()
|
verbose_logger.error(
|
||||||
|
"litellm.CustomStreamWrapper.handle_openai_chat_completion_chunk(): Exception occured - {}".format(
|
||||||
|
str(e)
|
||||||
|
)
|
||||||
|
)
|
||||||
|
verbose_logger.debug(traceback.format_exc())
|
||||||
raise e
|
raise e
|
||||||
|
|
||||||
def handle_azure_text_completion_chunk(self, chunk):
|
def handle_azure_text_completion_chunk(self, chunk):
|
||||||
|
@ -10871,7 +10940,12 @@ class CustomStreamWrapper:
|
||||||
else:
|
else:
|
||||||
return ""
|
return ""
|
||||||
except:
|
except:
|
||||||
traceback.print_exc()
|
verbose_logger.error(
|
||||||
|
"litellm.CustomStreamWrapper.handle_baseten_chunk(): Exception occured - {}".format(
|
||||||
|
str(e)
|
||||||
|
)
|
||||||
|
)
|
||||||
|
verbose_logger.debug(traceback.format_exc())
|
||||||
return ""
|
return ""
|
||||||
|
|
||||||
def handle_cloudlfare_stream(self, chunk):
|
def handle_cloudlfare_stream(self, chunk):
|
||||||
|
@ -11070,7 +11144,12 @@ class CustomStreamWrapper:
|
||||||
"is_finished": True,
|
"is_finished": True,
|
||||||
}
|
}
|
||||||
except:
|
except:
|
||||||
traceback.print_exc()
|
verbose_logger.error(
|
||||||
|
"litellm.CustomStreamWrapper.handle_clarifai_chunk(): Exception occured - {}".format(
|
||||||
|
str(e)
|
||||||
|
)
|
||||||
|
)
|
||||||
|
verbose_logger.debug(traceback.format_exc())
|
||||||
return ""
|
return ""
|
||||||
|
|
||||||
def model_response_creator(self):
|
def model_response_creator(self):
|
||||||
|
@ -11557,7 +11636,12 @@ class CustomStreamWrapper:
|
||||||
tool["type"] = "function"
|
tool["type"] = "function"
|
||||||
model_response.choices[0].delta = Delta(**_json_delta)
|
model_response.choices[0].delta = Delta(**_json_delta)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
traceback.print_exc()
|
verbose_logger.error(
|
||||||
|
"litellm.CustomStreamWrapper.chunk_creator(): Exception occured - {}".format(
|
||||||
|
str(e)
|
||||||
|
)
|
||||||
|
)
|
||||||
|
verbose_logger.debug(traceback.format_exc())
|
||||||
model_response.choices[0].delta = Delta()
|
model_response.choices[0].delta = Delta()
|
||||||
else:
|
else:
|
||||||
try:
|
try:
|
||||||
|
|
14
poetry.lock
generated
14
poetry.lock
generated
|
@ -1,4 +1,4 @@
|
||||||
# This file is automatically @generated by Poetry 1.7.1 and should not be changed by hand.
|
# This file is automatically @generated by Poetry 1.8.3 and should not be changed by hand.
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "aiohttp"
|
name = "aiohttp"
|
||||||
|
@ -2114,6 +2114,7 @@ files = [
|
||||||
{file = "PyYAML-6.0.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:69b023b2b4daa7548bcfbd4aa3da05b3a74b772db9e23b982788168117739938"},
|
{file = "PyYAML-6.0.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:69b023b2b4daa7548bcfbd4aa3da05b3a74b772db9e23b982788168117739938"},
|
||||||
{file = "PyYAML-6.0.1-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:81e0b275a9ecc9c0c0c07b4b90ba548307583c125f54d5b6946cfee6360c733d"},
|
{file = "PyYAML-6.0.1-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:81e0b275a9ecc9c0c0c07b4b90ba548307583c125f54d5b6946cfee6360c733d"},
|
||||||
{file = "PyYAML-6.0.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ba336e390cd8e4d1739f42dfe9bb83a3cc2e80f567d8805e11b46f4a943f5515"},
|
{file = "PyYAML-6.0.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ba336e390cd8e4d1739f42dfe9bb83a3cc2e80f567d8805e11b46f4a943f5515"},
|
||||||
|
{file = "PyYAML-6.0.1-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:326c013efe8048858a6d312ddd31d56e468118ad4cdeda36c719bf5bb6192290"},
|
||||||
{file = "PyYAML-6.0.1-cp310-cp310-win32.whl", hash = "sha256:bd4af7373a854424dabd882decdc5579653d7868b8fb26dc7d0e99f823aa5924"},
|
{file = "PyYAML-6.0.1-cp310-cp310-win32.whl", hash = "sha256:bd4af7373a854424dabd882decdc5579653d7868b8fb26dc7d0e99f823aa5924"},
|
||||||
{file = "PyYAML-6.0.1-cp310-cp310-win_amd64.whl", hash = "sha256:fd1592b3fdf65fff2ad0004b5e363300ef59ced41c2e6b3a99d4089fa8c5435d"},
|
{file = "PyYAML-6.0.1-cp310-cp310-win_amd64.whl", hash = "sha256:fd1592b3fdf65fff2ad0004b5e363300ef59ced41c2e6b3a99d4089fa8c5435d"},
|
||||||
{file = "PyYAML-6.0.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:6965a7bc3cf88e5a1c3bd2e0b5c22f8d677dc88a455344035f03399034eb3007"},
|
{file = "PyYAML-6.0.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:6965a7bc3cf88e5a1c3bd2e0b5c22f8d677dc88a455344035f03399034eb3007"},
|
||||||
|
@ -2121,8 +2122,15 @@ files = [
|
||||||
{file = "PyYAML-6.0.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:42f8152b8dbc4fe7d96729ec2b99c7097d656dc1213a3229ca5383f973a5ed6d"},
|
{file = "PyYAML-6.0.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:42f8152b8dbc4fe7d96729ec2b99c7097d656dc1213a3229ca5383f973a5ed6d"},
|
||||||
{file = "PyYAML-6.0.1-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:062582fca9fabdd2c8b54a3ef1c978d786e0f6b3a1510e0ac93ef59e0ddae2bc"},
|
{file = "PyYAML-6.0.1-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:062582fca9fabdd2c8b54a3ef1c978d786e0f6b3a1510e0ac93ef59e0ddae2bc"},
|
||||||
{file = "PyYAML-6.0.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d2b04aac4d386b172d5b9692e2d2da8de7bfb6c387fa4f801fbf6fb2e6ba4673"},
|
{file = "PyYAML-6.0.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d2b04aac4d386b172d5b9692e2d2da8de7bfb6c387fa4f801fbf6fb2e6ba4673"},
|
||||||
|
{file = "PyYAML-6.0.1-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:e7d73685e87afe9f3b36c799222440d6cf362062f78be1013661b00c5c6f678b"},
|
||||||
{file = "PyYAML-6.0.1-cp311-cp311-win32.whl", hash = "sha256:1635fd110e8d85d55237ab316b5b011de701ea0f29d07611174a1b42f1444741"},
|
{file = "PyYAML-6.0.1-cp311-cp311-win32.whl", hash = "sha256:1635fd110e8d85d55237ab316b5b011de701ea0f29d07611174a1b42f1444741"},
|
||||||
{file = "PyYAML-6.0.1-cp311-cp311-win_amd64.whl", hash = "sha256:bf07ee2fef7014951eeb99f56f39c9bb4af143d8aa3c21b1677805985307da34"},
|
{file = "PyYAML-6.0.1-cp311-cp311-win_amd64.whl", hash = "sha256:bf07ee2fef7014951eeb99f56f39c9bb4af143d8aa3c21b1677805985307da34"},
|
||||||
|
{file = "PyYAML-6.0.1-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:855fb52b0dc35af121542a76b9a84f8d1cd886ea97c84703eaa6d88e37a2ad28"},
|
||||||
|
{file = "PyYAML-6.0.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:40df9b996c2b73138957fe23a16a4f0ba614f4c0efce1e9406a184b6d07fa3a9"},
|
||||||
|
{file = "PyYAML-6.0.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6c22bec3fbe2524cde73d7ada88f6566758a8f7227bfbf93a408a9d86bcc12a0"},
|
||||||
|
{file = "PyYAML-6.0.1-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:8d4e9c88387b0f5c7d5f281e55304de64cf7f9c0021a3525bd3b1c542da3b0e4"},
|
||||||
|
{file = "PyYAML-6.0.1-cp312-cp312-win32.whl", hash = "sha256:d483d2cdf104e7c9fa60c544d92981f12ad66a457afae824d146093b8c294c54"},
|
||||||
|
{file = "PyYAML-6.0.1-cp312-cp312-win_amd64.whl", hash = "sha256:0d3304d8c0adc42be59c5f8a4d9e3d7379e6955ad754aa9d6ab7a398b59dd1df"},
|
||||||
{file = "PyYAML-6.0.1-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:50550eb667afee136e9a77d6dc71ae76a44df8b3e51e41b77f6de2932bfe0f47"},
|
{file = "PyYAML-6.0.1-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:50550eb667afee136e9a77d6dc71ae76a44df8b3e51e41b77f6de2932bfe0f47"},
|
||||||
{file = "PyYAML-6.0.1-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1fe35611261b29bd1de0070f0b2f47cb6ff71fa6595c077e42bd0c419fa27b98"},
|
{file = "PyYAML-6.0.1-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1fe35611261b29bd1de0070f0b2f47cb6ff71fa6595c077e42bd0c419fa27b98"},
|
||||||
{file = "PyYAML-6.0.1-cp36-cp36m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:704219a11b772aea0d8ecd7058d0082713c3562b4e271b849ad7dc4a5c90c13c"},
|
{file = "PyYAML-6.0.1-cp36-cp36m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:704219a11b772aea0d8ecd7058d0082713c3562b4e271b849ad7dc4a5c90c13c"},
|
||||||
|
@ -2139,6 +2147,7 @@ files = [
|
||||||
{file = "PyYAML-6.0.1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a0cd17c15d3bb3fa06978b4e8958dcdc6e0174ccea823003a106c7d4d7899ac5"},
|
{file = "PyYAML-6.0.1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a0cd17c15d3bb3fa06978b4e8958dcdc6e0174ccea823003a106c7d4d7899ac5"},
|
||||||
{file = "PyYAML-6.0.1-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:28c119d996beec18c05208a8bd78cbe4007878c6dd15091efb73a30e90539696"},
|
{file = "PyYAML-6.0.1-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:28c119d996beec18c05208a8bd78cbe4007878c6dd15091efb73a30e90539696"},
|
||||||
{file = "PyYAML-6.0.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7e07cbde391ba96ab58e532ff4803f79c4129397514e1413a7dc761ccd755735"},
|
{file = "PyYAML-6.0.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7e07cbde391ba96ab58e532ff4803f79c4129397514e1413a7dc761ccd755735"},
|
||||||
|
{file = "PyYAML-6.0.1-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:49a183be227561de579b4a36efbb21b3eab9651dd81b1858589f796549873dd6"},
|
||||||
{file = "PyYAML-6.0.1-cp38-cp38-win32.whl", hash = "sha256:184c5108a2aca3c5b3d3bf9395d50893a7ab82a38004c8f61c258d4428e80206"},
|
{file = "PyYAML-6.0.1-cp38-cp38-win32.whl", hash = "sha256:184c5108a2aca3c5b3d3bf9395d50893a7ab82a38004c8f61c258d4428e80206"},
|
||||||
{file = "PyYAML-6.0.1-cp38-cp38-win_amd64.whl", hash = "sha256:1e2722cc9fbb45d9b87631ac70924c11d3a401b2d7f410cc0e3bbf249f2dca62"},
|
{file = "PyYAML-6.0.1-cp38-cp38-win_amd64.whl", hash = "sha256:1e2722cc9fbb45d9b87631ac70924c11d3a401b2d7f410cc0e3bbf249f2dca62"},
|
||||||
{file = "PyYAML-6.0.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:9eb6caa9a297fc2c2fb8862bc5370d0303ddba53ba97e71f08023b6cd73d16a8"},
|
{file = "PyYAML-6.0.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:9eb6caa9a297fc2c2fb8862bc5370d0303ddba53ba97e71f08023b6cd73d16a8"},
|
||||||
|
@ -2146,6 +2155,7 @@ files = [
|
||||||
{file = "PyYAML-6.0.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5773183b6446b2c99bb77e77595dd486303b4faab2b086e7b17bc6bef28865f6"},
|
{file = "PyYAML-6.0.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5773183b6446b2c99bb77e77595dd486303b4faab2b086e7b17bc6bef28865f6"},
|
||||||
{file = "PyYAML-6.0.1-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:b786eecbdf8499b9ca1d697215862083bd6d2a99965554781d0d8d1ad31e13a0"},
|
{file = "PyYAML-6.0.1-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:b786eecbdf8499b9ca1d697215862083bd6d2a99965554781d0d8d1ad31e13a0"},
|
||||||
{file = "PyYAML-6.0.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bc1bf2925a1ecd43da378f4db9e4f799775d6367bdb94671027b73b393a7c42c"},
|
{file = "PyYAML-6.0.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bc1bf2925a1ecd43da378f4db9e4f799775d6367bdb94671027b73b393a7c42c"},
|
||||||
|
{file = "PyYAML-6.0.1-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:04ac92ad1925b2cff1db0cfebffb6ffc43457495c9b3c39d3fcae417d7125dc5"},
|
||||||
{file = "PyYAML-6.0.1-cp39-cp39-win32.whl", hash = "sha256:faca3bdcf85b2fc05d06ff3fbc1f83e1391b3e724afa3feba7d13eeab355484c"},
|
{file = "PyYAML-6.0.1-cp39-cp39-win32.whl", hash = "sha256:faca3bdcf85b2fc05d06ff3fbc1f83e1391b3e724afa3feba7d13eeab355484c"},
|
||||||
{file = "PyYAML-6.0.1-cp39-cp39-win_amd64.whl", hash = "sha256:510c9deebc5c0225e8c96813043e62b680ba2f9c50a08d3724c7f28a747d1486"},
|
{file = "PyYAML-6.0.1-cp39-cp39-win_amd64.whl", hash = "sha256:510c9deebc5c0225e8c96813043e62b680ba2f9c50a08d3724c7f28a747d1486"},
|
||||||
{file = "PyYAML-6.0.1.tar.gz", hash = "sha256:bfdf460b1736c775f2ba9f6a92bca30bc2095067b8a9d77876d1fad6cc3b4a43"},
|
{file = "PyYAML-6.0.1.tar.gz", hash = "sha256:bfdf460b1736c775f2ba9f6a92bca30bc2095067b8a9d77876d1fad6cc3b4a43"},
|
||||||
|
@ -3140,4 +3150,4 @@ proxy = ["PyJWT", "apscheduler", "backoff", "cryptography", "fastapi", "fastapi-
|
||||||
[metadata]
|
[metadata]
|
||||||
lock-version = "2.0"
|
lock-version = "2.0"
|
||||||
python-versions = ">=3.8.1,<4.0, !=3.9.7"
|
python-versions = ">=3.8.1,<4.0, !=3.9.7"
|
||||||
content-hash = "a54d969a1a707413e7cd3ce869d14ef73dd41bb9d36ebf0fb878d9e929bc15b3"
|
content-hash = "6a37992b63b11d254f5f40687bd96898b1d9515728f663f30dcc81c4ef8df7b7"
|
||||||
|
|
|
@ -1,6 +1,6 @@
|
||||||
[tool.poetry]
|
[tool.poetry]
|
||||||
name = "litellm"
|
name = "litellm"
|
||||||
version = "1.40.3"
|
version = "1.40.5"
|
||||||
description = "Library to easily interface with LLM API providers"
|
description = "Library to easily interface with LLM API providers"
|
||||||
authors = ["BerriAI"]
|
authors = ["BerriAI"]
|
||||||
license = "MIT"
|
license = "MIT"
|
||||||
|
@ -62,7 +62,8 @@ extra_proxy = [
|
||||||
"azure-identity",
|
"azure-identity",
|
||||||
"azure-keyvault-secrets",
|
"azure-keyvault-secrets",
|
||||||
"google-cloud-kms",
|
"google-cloud-kms",
|
||||||
"resend"
|
"resend",
|
||||||
|
"pynacl"
|
||||||
]
|
]
|
||||||
|
|
||||||
[tool.poetry.scripts]
|
[tool.poetry.scripts]
|
||||||
|
@ -79,7 +80,7 @@ requires = ["poetry-core", "wheel"]
|
||||||
build-backend = "poetry.core.masonry.api"
|
build-backend = "poetry.core.masonry.api"
|
||||||
|
|
||||||
[tool.commitizen]
|
[tool.commitizen]
|
||||||
version = "1.40.3"
|
version = "1.40.5"
|
||||||
version_files = [
|
version_files = [
|
||||||
"pyproject.toml:^version"
|
"pyproject.toml:^version"
|
||||||
]
|
]
|
||||||
|
|
|
@ -244,3 +244,15 @@ model LiteLLM_InvitationLink {
|
||||||
liteLLM_user_table_created LiteLLM_UserTable @relation("CreatedBy", fields: [created_by], references: [user_id])
|
liteLLM_user_table_created LiteLLM_UserTable @relation("CreatedBy", fields: [created_by], references: [user_id])
|
||||||
liteLLM_user_table_updated LiteLLM_UserTable @relation("UpdatedBy", fields: [updated_by], references: [user_id])
|
liteLLM_user_table_updated LiteLLM_UserTable @relation("UpdatedBy", fields: [updated_by], references: [user_id])
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
model LiteLLM_AuditLog {
|
||||||
|
id String @id @default(uuid())
|
||||||
|
updated_at DateTime @default(now())
|
||||||
|
changed_by String // user or system that performed the action
|
||||||
|
action String // create, update, delete
|
||||||
|
table_name String // on of LitellmTableNames.TEAM_TABLE_NAME, LitellmTableNames.USER_TABLE_NAME, LitellmTableNames.PROXY_MODEL_TABLE_NAME,
|
||||||
|
object_id String // id of the object being audited. This can be the key id, team id, user id, model id
|
||||||
|
before_value Json? // value of the row
|
||||||
|
updated_values Json? // value of the row after change
|
||||||
|
}
|
|
@ -145,6 +145,7 @@ enum Providers {
|
||||||
OpenAI_Compatible = "OpenAI-Compatible Endpoints (Groq, Together AI, Mistral AI, etc.)",
|
OpenAI_Compatible = "OpenAI-Compatible Endpoints (Groq, Together AI, Mistral AI, etc.)",
|
||||||
Vertex_AI = "Vertex AI (Anthropic, Gemini, etc.)",
|
Vertex_AI = "Vertex AI (Anthropic, Gemini, etc.)",
|
||||||
Databricks = "Databricks",
|
Databricks = "Databricks",
|
||||||
|
Ollama = "Ollama",
|
||||||
}
|
}
|
||||||
|
|
||||||
const provider_map: Record<string, string> = {
|
const provider_map: Record<string, string> = {
|
||||||
|
@ -156,6 +157,7 @@ const provider_map: Record<string, string> = {
|
||||||
OpenAI_Compatible: "openai",
|
OpenAI_Compatible: "openai",
|
||||||
Vertex_AI: "vertex_ai",
|
Vertex_AI: "vertex_ai",
|
||||||
Databricks: "databricks",
|
Databricks: "databricks",
|
||||||
|
Ollama: "ollama",
|
||||||
};
|
};
|
||||||
|
|
||||||
const retry_policy_map: Record<string, string> = {
|
const retry_policy_map: Record<string, string> = {
|
||||||
|
@ -1747,6 +1749,7 @@ const ModelDashboard: React.FC<ModelDashboardProps> = ({
|
||||||
)}
|
)}
|
||||||
{selectedProvider != Providers.Bedrock &&
|
{selectedProvider != Providers.Bedrock &&
|
||||||
selectedProvider != Providers.Vertex_AI &&
|
selectedProvider != Providers.Vertex_AI &&
|
||||||
|
selectedProvider != Providers.Ollama &&
|
||||||
(dynamicProviderForm === undefined ||
|
(dynamicProviderForm === undefined ||
|
||||||
dynamicProviderForm.fields.length == 0) && (
|
dynamicProviderForm.fields.length == 0) && (
|
||||||
<Form.Item
|
<Form.Item
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue