Merge branch 'BerriAI:main' into main

This commit is contained in:
Sha Ahammed Roze 2024-06-06 20:50:22 +05:30 committed by GitHub
commit 53ccc45978
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
14 changed files with 186 additions and 137 deletions

View file

@ -225,37 +225,37 @@ curl 'http://0.0.0.0:4000/key/generate' \
## Supported Providers ([Docs](https://docs.litellm.ai/docs/providers)) ## Supported Providers ([Docs](https://docs.litellm.ai/docs/providers))
| Provider | [Completion](https://docs.litellm.ai/docs/#basic-usage) | [Streaming](https://docs.litellm.ai/docs/completion/stream#streaming-responses) | [Async Completion](https://docs.litellm.ai/docs/completion/stream#async-completion) | [Async Streaming](https://docs.litellm.ai/docs/completion/stream#async-streaming) | [Async Embedding](https://docs.litellm.ai/docs/embedding/supported_embedding) | [Async Image Generation](https://docs.litellm.ai/docs/image_generation) | | Provider | [Completion](https://docs.litellm.ai/docs/#basic-usage) | [Streaming](https://docs.litellm.ai/docs/completion/stream#streaming-responses) | [Async Completion](https://docs.litellm.ai/docs/completion/stream#async-completion) | [Async Streaming](https://docs.litellm.ai/docs/completion/stream#async-streaming) | [Async Embedding](https://docs.litellm.ai/docs/embedding/supported_embedding) | [Async Image Generation](https://docs.litellm.ai/docs/image_generation) |
| ----------------------------------------------------------------------------------- | ------------------------------------------------------- | ------------------------------------------------------------------------------- | ----------------------------------------------------------------------------------- | --------------------------------------------------------------------------------- | ----------------------------------------------------------------------------- | ----------------------------------------------------------------------- | |-------------------------------------------------------------------------------------|---------------------------------------------------------|---------------------------------------------------------------------------------|-------------------------------------------------------------------------------------|-----------------------------------------------------------------------------------|-------------------------------------------------------------------------------|-------------------------------------------------------------------------|
| [openai](https://docs.litellm.ai/docs/providers/openai) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | | [openai](https://docs.litellm.ai/docs/providers/openai) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ |
| [azure](https://docs.litellm.ai/docs/providers/azure) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | | [azure](https://docs.litellm.ai/docs/providers/azure) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ |
| [aws - sagemaker](https://docs.litellm.ai/docs/providers/aws_sagemaker) | ✅ | ✅ | ✅ | ✅ | ✅ | | [aws - sagemaker](https://docs.litellm.ai/docs/providers/aws_sagemaker) | ✅ | ✅ | ✅ | ✅ | ✅ | |
| [aws - bedrock](https://docs.litellm.ai/docs/providers/bedrock) | ✅ | ✅ | ✅ | ✅ | ✅ | | [aws - bedrock](https://docs.litellm.ai/docs/providers/bedrock) | ✅ | ✅ | ✅ | ✅ | ✅ | |
| [google - vertex_ai](https://docs.litellm.ai/docs/providers/vertex) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | [google - vertex_ai](https://docs.litellm.ai/docs/providers/vertex) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ |
| [google - palm](https://docs.litellm.ai/docs/providers/palm) | ✅ | ✅ | ✅ | ✅ | | [google - palm](https://docs.litellm.ai/docs/providers/palm) | ✅ | ✅ | ✅ | ✅ | | |
| [google AI Studio - gemini](https://docs.litellm.ai/docs/providers/gemini) | ✅ | ✅ | ✅ | ✅ | | | [google AI Studio - gemini](https://docs.litellm.ai/docs/providers/gemini) | ✅ | ✅ | ✅ | | | |
| [mistral ai api](https://docs.litellm.ai/docs/providers/mistral) | ✅ | ✅ | ✅ | ✅ | ✅ | | [mistral ai api](https://docs.litellm.ai/docs/providers/mistral) | ✅ | ✅ | ✅ | ✅ | ✅ | |
| [cloudflare AI Workers](https://docs.litellm.ai/docs/providers/cloudflare_workers) | ✅ | ✅ | ✅ | ✅ | | [cloudflare AI Workers](https://docs.litellm.ai/docs/providers/cloudflare_workers) | ✅ | ✅ | ✅ | ✅ | | |
| [cohere](https://docs.litellm.ai/docs/providers/cohere) | ✅ | ✅ | ✅ | ✅ | ✅ | | [cohere](https://docs.litellm.ai/docs/providers/cohere) | ✅ | ✅ | ✅ | ✅ | ✅ | |
| [anthropic](https://docs.litellm.ai/docs/providers/anthropic) | ✅ | ✅ | ✅ | ✅ | | [anthropic](https://docs.litellm.ai/docs/providers/anthropic) | ✅ | ✅ | ✅ | ✅ | | |
| [huggingface](https://docs.litellm.ai/docs/providers/huggingface) | ✅ | ✅ | ✅ | ✅ | ✅ | | [huggingface](https://docs.litellm.ai/docs/providers/huggingface) | ✅ | ✅ | ✅ | ✅ | ✅ | |
| [replicate](https://docs.litellm.ai/docs/providers/replicate) | ✅ | ✅ | ✅ | ✅ | | [replicate](https://docs.litellm.ai/docs/providers/replicate) | ✅ | ✅ | ✅ | ✅ | | |
| [together_ai](https://docs.litellm.ai/docs/providers/togetherai) | ✅ | ✅ | ✅ | ✅ | | [together_ai](https://docs.litellm.ai/docs/providers/togetherai) | ✅ | ✅ | ✅ | ✅ | | |
| [openrouter](https://docs.litellm.ai/docs/providers/openrouter) | ✅ | ✅ | ✅ | ✅ | | [openrouter](https://docs.litellm.ai/docs/providers/openrouter) | ✅ | ✅ | ✅ | ✅ | | |
| [ai21](https://docs.litellm.ai/docs/providers/ai21) | ✅ | ✅ | ✅ | ✅ | | [ai21](https://docs.litellm.ai/docs/providers/ai21) | ✅ | ✅ | ✅ | ✅ | | |
| [baseten](https://docs.litellm.ai/docs/providers/baseten) | ✅ | ✅ | ✅ | ✅ | | [baseten](https://docs.litellm.ai/docs/providers/baseten) | ✅ | ✅ | ✅ | ✅ | | |
| [vllm](https://docs.litellm.ai/docs/providers/vllm) | ✅ | ✅ | ✅ | ✅ | | [vllm](https://docs.litellm.ai/docs/providers/vllm) | ✅ | ✅ | ✅ | ✅ | | |
| [nlp_cloud](https://docs.litellm.ai/docs/providers/nlp_cloud) | ✅ | ✅ | ✅ | ✅ | | [nlp_cloud](https://docs.litellm.ai/docs/providers/nlp_cloud) | ✅ | ✅ | ✅ | ✅ | | |
| [aleph alpha](https://docs.litellm.ai/docs/providers/aleph_alpha) | ✅ | ✅ | ✅ | ✅ | | [aleph alpha](https://docs.litellm.ai/docs/providers/aleph_alpha) | ✅ | ✅ | ✅ | ✅ | | |
| [petals](https://docs.litellm.ai/docs/providers/petals) | ✅ | ✅ | ✅ | ✅ | | [petals](https://docs.litellm.ai/docs/providers/petals) | ✅ | ✅ | ✅ | ✅ | | |
| [ollama](https://docs.litellm.ai/docs/providers/ollama) | ✅ | ✅ | ✅ | ✅ | ✅ | | [ollama](https://docs.litellm.ai/docs/providers/ollama) | ✅ | ✅ | ✅ | ✅ | ✅ | |
| [deepinfra](https://docs.litellm.ai/docs/providers/deepinfra) | ✅ | ✅ | ✅ | ✅ | | [deepinfra](https://docs.litellm.ai/docs/providers/deepinfra) | ✅ | ✅ | ✅ | ✅ | | |
| [perplexity-ai](https://docs.litellm.ai/docs/providers/perplexity) | ✅ | ✅ | ✅ | ✅ | | [perplexity-ai](https://docs.litellm.ai/docs/providers/perplexity) | ✅ | ✅ | ✅ | ✅ | | |
| [Groq AI](https://docs.litellm.ai/docs/providers/groq) | ✅ | ✅ | ✅ | ✅ | | [Groq AI](https://docs.litellm.ai/docs/providers/groq) | ✅ | ✅ | ✅ | ✅ | | |
| [Deepseek](https://docs.litellm.ai/docs/providers/deepseek) | ✅ | ✅ | ✅ | ✅ | | [Deepseek](https://docs.litellm.ai/docs/providers/deepseek) | ✅ | ✅ | ✅ | ✅ | | |
| [anyscale](https://docs.litellm.ai/docs/providers/anyscale) | ✅ | ✅ | ✅ | ✅ | | [anyscale](https://docs.litellm.ai/docs/providers/anyscale) | ✅ | ✅ | ✅ | ✅ | | |
| [IBM - watsonx.ai](https://docs.litellm.ai/docs/providers/watsonx) | ✅ | ✅ | ✅ | ✅ | ✅ | [IBM - watsonx.ai](https://docs.litellm.ai/docs/providers/watsonx) | ✅ | ✅ | ✅ | ✅ | ✅ | |
| [voyage ai](https://docs.litellm.ai/docs/providers/voyage) | | | | | ✅ | | [voyage ai](https://docs.litellm.ai/docs/providers/voyage) | | | | | ✅ | |
| [xinference [Xorbits Inference]](https://docs.litellm.ai/docs/providers/xinference) | | | | | ✅ | | [xinference [Xorbits Inference]](https://docs.litellm.ai/docs/providers/xinference) | | | | | ✅ | |
[**Read the Docs**](https://docs.litellm.ai/docs/) [**Read the Docs**](https://docs.litellm.ai/docs/)

View file

@ -144,6 +144,26 @@ print(response)
``` ```
You can also pass `metadata` as part of the request header with a `langfuse_*` prefix:
```shell
curl --location 'http://0.0.0.0:4000/chat/completions' \
--header 'Content-Type: application/json' \
--header 'langfuse_trace_id: trace-id22' \
--header 'langfuse_trace_user_id: user-id2' \
--header 'langfuse_trace_metadata: {"key":"value"}' \
--data '{
"model": "gpt-3.5-turbo",
"messages": [
{
"role": "user",
"content": "what llm are you"
}
]
}'
```
### Trace & Generation Parameters ### Trace & Generation Parameters
#### Trace Specific Parameters #### Trace Specific Parameters

View file

@ -47,7 +47,7 @@ for chunk in response:
We support ALL Groq models, just set `groq/` as a prefix when sending completion requests We support ALL Groq models, just set `groq/` as a prefix when sending completion requests
| Model Name | Function Call | | Model Name | Function Call |
|--------------------------|------------------------------------------------------------------------------------------------------------------------------------------------------------------| |--------------------|---------------------------------------------------------|
| llama3-8b-8192 | `completion(model="groq/llama3-8b-8192", messages)` | | llama3-8b-8192 | `completion(model="groq/llama3-8b-8192", messages)` |
| llama3-70b-8192 | `completion(model="groq/llama3-70b-8192", messages)` | | llama3-70b-8192 | `completion(model="groq/llama3-70b-8192", messages)` |
| llama2-70b-4096 | `completion(model="groq/llama2-70b-4096", messages)` | | llama2-70b-4096 | `completion(model="groq/llama2-70b-4096", messages)` |

View file

@ -27,12 +27,12 @@ Example TogetherAI Usage - Note: liteLLM supports all models deployed on Togethe
### Llama LLMs - Chat ### Llama LLMs - Chat
| Model Name | Function Call | Required OS Variables | | Model Name | Function Call | Required OS Variables |
|-----------------------------------|------------------------------------------------------------------------|---------------------------------| |-----------------------------------|-------------------------------------------------------------------------|------------------------------------|
| togethercomputer/llama-2-70b-chat | `completion('together_ai/togethercomputer/llama-2-70b-chat', messages)` | `os.environ['TOGETHERAI_API_KEY']` | | togethercomputer/llama-2-70b-chat | `completion('together_ai/togethercomputer/llama-2-70b-chat', messages)` | `os.environ['TOGETHERAI_API_KEY']` |
### Llama LLMs - Language / Instruct ### Llama LLMs - Language / Instruct
| Model Name | Function Call | Required OS Variables | | Model Name | Function Call | Required OS Variables |
|-----------------------------------|------------------------------------------------------------------------|---------------------------------| |------------------------------------------|--------------------------------------------------------------------------------|------------------------------------|
| togethercomputer/llama-2-70b | `completion('together_ai/togethercomputer/llama-2-70b', messages)` | `os.environ['TOGETHERAI_API_KEY']` | | togethercomputer/llama-2-70b | `completion('together_ai/togethercomputer/llama-2-70b', messages)` | `os.environ['TOGETHERAI_API_KEY']` |
| togethercomputer/LLaMA-2-7B-32K | `completion('together_ai/togethercomputer/LLaMA-2-7B-32K', messages)` | `os.environ['TOGETHERAI_API_KEY']` | | togethercomputer/LLaMA-2-7B-32K | `completion('together_ai/togethercomputer/LLaMA-2-7B-32K', messages)` | `os.environ['TOGETHERAI_API_KEY']` |
| togethercomputer/Llama-2-7B-32K-Instruct | `completion('together_ai/togethercomputer/Llama-2-7B-32K-Instruct', messages)` | `os.environ['TOGETHERAI_API_KEY']` | | togethercomputer/Llama-2-7B-32K-Instruct | `completion('together_ai/togethercomputer/Llama-2-7B-32K-Instruct', messages)` | `os.environ['TOGETHERAI_API_KEY']` |
@ -40,23 +40,23 @@ Example TogetherAI Usage - Note: liteLLM supports all models deployed on Togethe
### Falcon LLMs ### Falcon LLMs
| Model Name | Function Call | Required OS Variables | | Model Name | Function Call | Required OS Variables |
|-----------------------------------|------------------------------------------------------------------------|---------------------------------| |--------------------------------------|----------------------------------------------------------------------------|------------------------------------|
| togethercomputer/falcon-40b-instruct | `completion('together_ai/togethercomputer/falcon-40b-instruct', messages)` | `os.environ['TOGETHERAI_API_KEY']` | | togethercomputer/falcon-40b-instruct | `completion('together_ai/togethercomputer/falcon-40b-instruct', messages)` | `os.environ['TOGETHERAI_API_KEY']` |
| togethercomputer/falcon-7b-instruct | `completion('together_ai/togethercomputer/falcon-7b-instruct', messages)` | `os.environ['TOGETHERAI_API_KEY']` | | togethercomputer/falcon-7b-instruct | `completion('together_ai/togethercomputer/falcon-7b-instruct', messages)` | `os.environ['TOGETHERAI_API_KEY']` |
### Alpaca LLMs ### Alpaca LLMs
| Model Name | Function Call | Required OS Variables | | Model Name | Function Call | Required OS Variables |
|-----------------------------------|------------------------------------------------------------------------|---------------------------------| |----------------------------|------------------------------------------------------------------|------------------------------------|
| togethercomputer/alpaca-7b | `completion('together_ai/togethercomputer/alpaca-7b', messages)` | `os.environ['TOGETHERAI_API_KEY']` | | togethercomputer/alpaca-7b | `completion('together_ai/togethercomputer/alpaca-7b', messages)` | `os.environ['TOGETHERAI_API_KEY']` |
### Other Chat LLMs ### Other Chat LLMs
| Model Name | Function Call | Required OS Variables | | Model Name | Function Call | Required OS Variables |
|-----------------------------------|------------------------------------------------------------------------|---------------------------------| |------------------------------|--------------------------------------------------------------------|------------------------------------|
| HuggingFaceH4/starchat-alpha | `completion('together_ai/HuggingFaceH4/starchat-alpha', messages)` | `os.environ['TOGETHERAI_API_KEY']` | | HuggingFaceH4/starchat-alpha | `completion('together_ai/HuggingFaceH4/starchat-alpha', messages)` | `os.environ['TOGETHERAI_API_KEY']` |
### Code LLMs ### Code LLMs
| Model Name | Function Call | Required OS Variables | | Model Name | Function Call | Required OS Variables |
|-----------------------------------|------------------------------------------------------------------------|---------------------------------| |-----------------------------------------|-------------------------------------------------------------------------------|------------------------------------|
| togethercomputer/CodeLlama-34b | `completion('together_ai/togethercomputer/CodeLlama-34b', messages)` | `os.environ['TOGETHERAI_API_KEY']` | | togethercomputer/CodeLlama-34b | `completion('together_ai/togethercomputer/CodeLlama-34b', messages)` | `os.environ['TOGETHERAI_API_KEY']` |
| togethercomputer/CodeLlama-34b-Instruct | `completion('together_ai/togethercomputer/CodeLlama-34b-Instruct', messages)` | `os.environ['TOGETHERAI_API_KEY']` | | togethercomputer/CodeLlama-34b-Instruct | `completion('together_ai/togethercomputer/CodeLlama-34b-Instruct', messages)` | `os.environ['TOGETHERAI_API_KEY']` |
| togethercomputer/CodeLlama-34b-Python | `completion('together_ai/togethercomputer/CodeLlama-34b-Python', messages)` | `os.environ['TOGETHERAI_API_KEY']` | | togethercomputer/CodeLlama-34b-Python | `completion('together_ai/togethercomputer/CodeLlama-34b-Python', messages)` | `os.environ['TOGETHERAI_API_KEY']` |
@ -67,7 +67,7 @@ Example TogetherAI Usage - Note: liteLLM supports all models deployed on Togethe
### Language LLMs ### Language LLMs
| Model Name | Function Call | Required OS Variables | | Model Name | Function Call | Required OS Variables |
|-----------------------------------|------------------------------------------------------------------------|---------------------------------| |-------------------------------------|---------------------------------------------------------------------------|------------------------------------|
| NousResearch/Nous-Hermes-Llama2-13b | `completion('together_ai/NousResearch/Nous-Hermes-Llama2-13b', messages)` | `os.environ['TOGETHERAI_API_KEY']` | | NousResearch/Nous-Hermes-Llama2-13b | `completion('together_ai/NousResearch/Nous-Hermes-Llama2-13b', messages)` | `os.environ['TOGETHERAI_API_KEY']` |
| Austism/chronos-hermes-13b | `completion('together_ai/Austism/chronos-hermes-13b', messages)` | `os.environ['TOGETHERAI_API_KEY']` | | Austism/chronos-hermes-13b | `completion('together_ai/Austism/chronos-hermes-13b', messages)` | `os.environ['TOGETHERAI_API_KEY']` |
| upstage/SOLAR-0-70b-16bit | `completion('together_ai/upstage/SOLAR-0-70b-16bit', messages)` | `os.environ['TOGETHERAI_API_KEY']` | | upstage/SOLAR-0-70b-16bit | `completion('together_ai/upstage/SOLAR-0-70b-16bit', messages)` | `os.environ['TOGETHERAI_API_KEY']` |

View file

@ -156,8 +156,8 @@ def default_pt(messages):
#### Models we already have Prompt Templates for #### Models we already have Prompt Templates for
| Model Name | Works for Models | Function Call | | Model Name | Works for Models | Function Call |
| -------- | -------- | -------- | |--------------------------------------|-----------------------------------|------------------------------------------------------------------------------------------------------------------|
| meta-llama/Llama-2-7b-chat | All meta-llama llama2 chat models| `completion(model='vllm/meta-llama/Llama-2-7b', messages=messages, api_base="your_api_endpoint")` | | meta-llama/Llama-2-7b-chat | All meta-llama llama2 chat models | `completion(model='vllm/meta-llama/Llama-2-7b', messages=messages, api_base="your_api_endpoint")` |
| tiiuae/falcon-7b-instruct | All falcon instruct models | `completion(model='vllm/tiiuae/falcon-7b-instruct', messages=messages, api_base="your_api_endpoint")` | | tiiuae/falcon-7b-instruct | All falcon instruct models | `completion(model='vllm/tiiuae/falcon-7b-instruct', messages=messages, api_base="your_api_endpoint")` |
| mosaicml/mpt-7b-chat | All mpt chat models | `completion(model='vllm/mosaicml/mpt-7b-chat', messages=messages, api_base="your_api_endpoint")` | | mosaicml/mpt-7b-chat | All mpt chat models | `completion(model='vllm/mosaicml/mpt-7b-chat', messages=messages, api_base="your_api_endpoint")` |
| codellama/CodeLlama-34b-Instruct-hf | All codellama instruct models | `completion(model='vllm/codellama/CodeLlama-34b-Instruct-hf', messages=messages, api_base="your_api_endpoint")` | | codellama/CodeLlama-34b-Instruct-hf | All codellama instruct models | `completion(model='vllm/codellama/CodeLlama-34b-Instruct-hf', messages=messages, api_base="your_api_endpoint")` |

View file

@ -252,7 +252,7 @@ response = completion(
Here are some examples of models available in IBM watsonx.ai that you can use with LiteLLM: Here are some examples of models available in IBM watsonx.ai that you can use with LiteLLM:
| Mode Name | Command | | Mode Name | Command |
| ---------- | --------- | |------------------------------------|------------------------------------------------------------------------------------------|
| Flan T5 XXL | `completion(model=watsonx/google/flan-t5-xxl, messages=messages)` | | Flan T5 XXL | `completion(model=watsonx/google/flan-t5-xxl, messages=messages)` |
| Flan Ul2 | `completion(model=watsonx/google/flan-ul2, messages=messages)` | | Flan Ul2 | `completion(model=watsonx/google/flan-ul2, messages=messages)` |
| Mt0 XXL | `completion(model=watsonx/bigscience/mt0-xxl, messages=messages)` | | Mt0 XXL | `completion(model=watsonx/bigscience/mt0-xxl, messages=messages)` |
@ -276,7 +276,7 @@ For a list of all available models in watsonx.ai, see [here](https://dataplatfor
## Supported IBM watsonx.ai Embedding Models ## Supported IBM watsonx.ai Embedding Models
| Model Name | Function Call | | Model Name | Function Call |
|----------------------|---------------------------------------------| |------------|------------------------------------------------------------------------|
| Slate 30m | `embedding(model="watsonx/ibm/slate-30m-english-rtrvr", input=input)` | | Slate 30m | `embedding(model="watsonx/ibm/slate-30m-english-rtrvr", input=input)` |
| Slate 125m | `embedding(model="watsonx/ibm/slate-125m-english-rtrvr", input=input)` | | Slate 125m | `embedding(model="watsonx/ibm/slate-125m-english-rtrvr", input=input)` |

View file

@ -38,7 +38,7 @@ print(response)
All models listed here https://inference.readthedocs.io/en/latest/models/builtin/embedding/index.html are supported All models listed here https://inference.readthedocs.io/en/latest/models/builtin/embedding/index.html are supported
| Model Name | Function Call | | Model Name | Function Call |
|------------------------------|--------------------------------------------------------| |-----------------------------|--------------------------------------------------------------------|
| bge-base-en | `embedding(model="xinference/bge-base-en", input)` | | bge-base-en | `embedding(model="xinference/bge-base-en", input)` |
| bge-base-en-v1.5 | `embedding(model="xinference/bge-base-en-v1.5", input)` | | bge-base-en-v1.5 | `embedding(model="xinference/bge-base-en-v1.5", input)` |
| bge-base-zh | `embedding(model="xinference/bge-base-zh", input)` | | bge-base-zh | `embedding(model="xinference/bge-base-zh", input)` |

View file

@ -260,7 +260,7 @@ Requirements:
<TabItem value="docker-deploy" label="Dockerfile"> <TabItem value="docker-deploy" label="Dockerfile">
We maintain a [seperate Dockerfile](https://github.com/BerriAI/litellm/pkgs/container/litellm-database) for reducing build time when running LiteLLM proxy with a connected Postgres Database We maintain a [separate Dockerfile](https://github.com/BerriAI/litellm/pkgs/container/litellm-database) for reducing build time when running LiteLLM proxy with a connected Postgres Database
```shell ```shell
docker pull ghcr.io/berriai/litellm-database:main-latest docker pull ghcr.io/berriai/litellm-database:main-latest

View file

@ -459,7 +459,7 @@ Step 1 Set a `LAKERA_API_KEY` in your env
LAKERA_API_KEY="7a91a1a6059da*******" LAKERA_API_KEY="7a91a1a6059da*******"
``` ```
Step 2. Add `lakera_prompt_injection` to your calbacks Step 2. Add `lakera_prompt_injection` to your callbacks
```yaml ```yaml
litellm_settings: litellm_settings:

View file

@ -1,8 +1,8 @@
# Using Fine-Tuned gpt-3.5-turbo # Using Fine-Tuned gpt-3.5-turbo
LiteLLM allows you to call `completion` with your fine-tuned gpt-3.5-turbo models LiteLLM allows you to call `completion` with your fine-tuned gpt-3.5-turbo models
If you're trying to create your custom finetuned gpt-3.5-turbo model following along on this tutorial: https://platform.openai.com/docs/guides/fine-tuning/preparing-your-dataset If you're trying to create your custom fine-tuned gpt-3.5-turbo model following along on this tutorial: https://platform.openai.com/docs/guides/fine-tuning/preparing-your-dataset
Once you've created your fine tuned model, you can call it with `litellm.completion()` Once you've created your fine-tuned model, you can call it with `litellm.completion()`
## Usage ## Usage
```python ```python

View file

@ -69,6 +69,28 @@ class LangFuseLogger:
else: else:
self.upstream_langfuse = None self.upstream_langfuse = None
@staticmethod
def add_metadata_from_header(litellm_params: dict, metadata: dict) -> dict:
"""
Adds metadata from proxy request headers to Langfuse logging if keys start with "langfuse_"
and overwrites litellm_params.metadata if already included.
For example if you want to append your trace to an existing `trace_id` via header, send
`headers: { ..., langfuse_existing_trace_id: your-existing-trace-id }` via proxy request.
"""
proxy_headers = litellm_params.get("proxy_server_request", {}).get("headers", {})
for metadata_param_key in proxy_headers:
if metadata_param_key.startswith("langfuse_"):
trace_param_key = metadata_param_key.replace("langfuse_", "", 1)
if trace_param_key in metadata:
verbose_logger.warning(f"Overwriting Langfuse `{trace_param_key}` from request header")
else:
verbose_logger.debug(f"Found Langfuse `{trace_param_key}` in request header")
metadata[trace_param_key] = proxy_headers.get(metadata_param_key)
return metadata
# def log_error(kwargs, response_obj, start_time, end_time): # def log_error(kwargs, response_obj, start_time, end_time):
# generation = trace.generation( # generation = trace.generation(
# level ="ERROR" # can be any of DEBUG, DEFAULT, WARNING or ERROR # level ="ERROR" # can be any of DEBUG, DEFAULT, WARNING or ERROR
@ -97,6 +119,7 @@ class LangFuseLogger:
metadata = ( metadata = (
litellm_params.get("metadata", {}) or {} litellm_params.get("metadata", {}) or {}
) # if litellm_params['metadata'] == None ) # if litellm_params['metadata'] == None
metadata = self.add_metadata_from_header(litellm_params, metadata)
optional_params = copy.deepcopy(kwargs.get("optional_params", {})) optional_params = copy.deepcopy(kwargs.get("optional_params", {}))
prompt = {"messages": kwargs.get("messages")} prompt = {"messages": kwargs.get("messages")}

View file

@ -833,7 +833,7 @@ def anthropic_messages_pt_xml(messages: list):
) # either string or none ) # either string or none
if messages[msg_i].get( if messages[msg_i].get(
"tool_calls", [] "tool_calls", []
): # support assistant tool invoke convertion ): # support assistant tool invoke conversion
assistant_text += convert_to_anthropic_tool_invoke_xml( # type: ignore assistant_text += convert_to_anthropic_tool_invoke_xml( # type: ignore
messages[msg_i]["tool_calls"] messages[msg_i]["tool_calls"]
) )
@ -1224,7 +1224,7 @@ def anthropic_messages_pt(messages: list):
if messages[msg_i].get( if messages[msg_i].get(
"tool_calls", [] "tool_calls", []
): # support assistant tool invoke convertion ): # support assistant tool invoke conversion
assistant_content.extend( assistant_content.extend(
convert_to_anthropic_tool_invoke(messages[msg_i]["tool_calls"]) convert_to_anthropic_tool_invoke(messages[msg_i]["tool_calls"])
) )

View file

@ -297,24 +297,29 @@ def _convert_gemini_role(role: str) -> Literal["user", "model"]:
def _process_gemini_image(image_url: str) -> PartType: def _process_gemini_image(image_url: str) -> PartType:
try: try:
if "gs://" in image_url: if ".mp4" in image_url and "gs://" in image_url:
# Case 1: Images with Cloud Storage URIs # Case 1: Videos with Cloud Storage URIs
part_mime = "video/mp4"
_file_data = FileDataType(mime_type=part_mime, file_uri=image_url)
return PartType(file_data=_file_data)
elif ".pdf" in image_url and "gs://" in image_url:
# Case 2: PDF's with Cloud Storage URIs
part_mime = "application/pdf"
_file_data = FileDataType(mime_type=part_mime, file_uri=image_url)
return PartType(file_data=_file_data)
elif "gs://" in image_url:
# Case 3: Images with Cloud Storage URIs
# The supported MIME types for images include image/png and image/jpeg. # The supported MIME types for images include image/png and image/jpeg.
part_mime = "image/png" if "png" in image_url else "image/jpeg" part_mime = "image/png" if "png" in image_url else "image/jpeg"
_file_data = FileDataType(mime_type=part_mime, file_uri=image_url) _file_data = FileDataType(mime_type=part_mime, file_uri=image_url)
return PartType(file_data=_file_data) return PartType(file_data=_file_data)
elif "https:/" in image_url: elif "https:/" in image_url:
# Case 2: Images with direct links # Case 4: Images with direct links
image = _load_image_from_url(image_url) image = _load_image_from_url(image_url)
_blob = BlobType(data=image.data, mime_type=image._mime_type) _blob = BlobType(data=image.data, mime_type=image._mime_type)
return PartType(inline_data=_blob) return PartType(inline_data=_blob)
elif ".mp4" in image_url and "gs://" in image_url:
# Case 3: Videos with Cloud Storage URIs
part_mime = "video/mp4"
_file_data = FileDataType(mime_type=part_mime, file_uri=image_url)
return PartType(file_data=_file_data)
elif "base64" in image_url: elif "base64" in image_url:
# Case 4: Images with base64 encoding # Case 5: Images with base64 encoding
import base64, re import base64, re
# base 64 is passed as data:image/jpeg;base64,<base-64-encoded-image> # base 64 is passed as data:image/jpeg;base64,<base-64-encoded-image>
@ -390,7 +395,7 @@ def _gemini_convert_messages_with_history(messages: list) -> List[ContentType]:
assistant_content.extend(_parts) assistant_content.extend(_parts)
elif messages[msg_i].get( elif messages[msg_i].get(
"tool_calls", [] "tool_calls", []
): # support assistant tool invoke convertion ): # support assistant tool invoke conversion
assistant_content.extend( assistant_content.extend(
convert_to_gemini_tool_call_invoke(messages[msg_i]["tool_calls"]) convert_to_gemini_tool_call_invoke(messages[msg_i]["tool_calls"])
) )

View file

@ -63,7 +63,8 @@ extra_proxy = [
"azure-identity", "azure-identity",
"azure-keyvault-secrets", "azure-keyvault-secrets",
"google-cloud-kms", "google-cloud-kms",
"resend" "resend",
"pynacl"
] ]
[tool.poetry.scripts] [tool.poetry.scripts]