diff --git a/README.md b/README.md index 8868dc8cc..fe7d56b6c 100644 --- a/README.md +++ b/README.md @@ -225,37 +225,37 @@ curl 'http://0.0.0.0:4000/key/generate' \ ## Supported Providers ([Docs](https://docs.litellm.ai/docs/providers)) | Provider | [Completion](https://docs.litellm.ai/docs/#basic-usage) | [Streaming](https://docs.litellm.ai/docs/completion/stream#streaming-responses) | [Async Completion](https://docs.litellm.ai/docs/completion/stream#async-completion) | [Async Streaming](https://docs.litellm.ai/docs/completion/stream#async-streaming) | [Async Embedding](https://docs.litellm.ai/docs/embedding/supported_embedding) | [Async Image Generation](https://docs.litellm.ai/docs/image_generation) | -| ----------------------------------------------------------------------------------- | ------------------------------------------------------- | ------------------------------------------------------------------------------- | ----------------------------------------------------------------------------------- | --------------------------------------------------------------------------------- | ----------------------------------------------------------------------------- | ----------------------------------------------------------------------- | -| [openai](https://docs.litellm.ai/docs/providers/openai) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | -| [azure](https://docs.litellm.ai/docs/providers/azure) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | -| [aws - sagemaker](https://docs.litellm.ai/docs/providers/aws_sagemaker) | ✅ | ✅ | ✅ | ✅ | ✅ | -| [aws - bedrock](https://docs.litellm.ai/docs/providers/bedrock) | ✅ | ✅ | ✅ | ✅ | ✅ | -| [google - vertex_ai](https://docs.litellm.ai/docs/providers/vertex) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ -| [google - palm](https://docs.litellm.ai/docs/providers/palm) | ✅ | ✅ | ✅ | ✅ | -| [google AI Studio - gemini](https://docs.litellm.ai/docs/providers/gemini) | ✅ | ✅ | ✅ | ✅ | | -| [mistral ai api](https://docs.litellm.ai/docs/providers/mistral) | ✅ | ✅ | ✅ | ✅ | ✅ | -| [cloudflare AI Workers](https://docs.litellm.ai/docs/providers/cloudflare_workers) | ✅ | ✅ | ✅ | ✅ | -| [cohere](https://docs.litellm.ai/docs/providers/cohere) | ✅ | ✅ | ✅ | ✅ | ✅ | -| [anthropic](https://docs.litellm.ai/docs/providers/anthropic) | ✅ | ✅ | ✅ | ✅ | -| [huggingface](https://docs.litellm.ai/docs/providers/huggingface) | ✅ | ✅ | ✅ | ✅ | ✅ | -| [replicate](https://docs.litellm.ai/docs/providers/replicate) | ✅ | ✅ | ✅ | ✅ | -| [together_ai](https://docs.litellm.ai/docs/providers/togetherai) | ✅ | ✅ | ✅ | ✅ | -| [openrouter](https://docs.litellm.ai/docs/providers/openrouter) | ✅ | ✅ | ✅ | ✅ | -| [ai21](https://docs.litellm.ai/docs/providers/ai21) | ✅ | ✅ | ✅ | ✅ | -| [baseten](https://docs.litellm.ai/docs/providers/baseten) | ✅ | ✅ | ✅ | ✅ | -| [vllm](https://docs.litellm.ai/docs/providers/vllm) | ✅ | ✅ | ✅ | ✅ | -| [nlp_cloud](https://docs.litellm.ai/docs/providers/nlp_cloud) | ✅ | ✅ | ✅ | ✅ | -| [aleph alpha](https://docs.litellm.ai/docs/providers/aleph_alpha) | ✅ | ✅ | ✅ | ✅ | -| [petals](https://docs.litellm.ai/docs/providers/petals) | ✅ | ✅ | ✅ | ✅ | -| [ollama](https://docs.litellm.ai/docs/providers/ollama) | ✅ | ✅ | ✅ | ✅ | ✅ | -| [deepinfra](https://docs.litellm.ai/docs/providers/deepinfra) | ✅ | ✅ | ✅ | ✅ | -| [perplexity-ai](https://docs.litellm.ai/docs/providers/perplexity) | ✅ | ✅ | ✅ | ✅ | -| [Groq AI](https://docs.litellm.ai/docs/providers/groq) | ✅ | ✅ | ✅ | ✅ | -| [Deepseek](https://docs.litellm.ai/docs/providers/deepseek) | ✅ | ✅ | ✅ | ✅ | -| [anyscale](https://docs.litellm.ai/docs/providers/anyscale) | ✅ | ✅ | ✅ | ✅ | -| [IBM - watsonx.ai](https://docs.litellm.ai/docs/providers/watsonx) | ✅ | ✅ | ✅ | ✅ | ✅ -| [voyage ai](https://docs.litellm.ai/docs/providers/voyage) | | | | | ✅ | -| [xinference [Xorbits Inference]](https://docs.litellm.ai/docs/providers/xinference) | | | | | ✅ | +|-------------------------------------------------------------------------------------|---------------------------------------------------------|---------------------------------------------------------------------------------|-------------------------------------------------------------------------------------|-----------------------------------------------------------------------------------|-------------------------------------------------------------------------------|-------------------------------------------------------------------------| +| [openai](https://docs.litellm.ai/docs/providers/openai) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | +| [azure](https://docs.litellm.ai/docs/providers/azure) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | +| [aws - sagemaker](https://docs.litellm.ai/docs/providers/aws_sagemaker) | ✅ | ✅ | ✅ | ✅ | ✅ | | +| [aws - bedrock](https://docs.litellm.ai/docs/providers/bedrock) | ✅ | ✅ | ✅ | ✅ | ✅ | | +| [google - vertex_ai](https://docs.litellm.ai/docs/providers/vertex) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | +| [google - palm](https://docs.litellm.ai/docs/providers/palm) | ✅ | ✅ | ✅ | ✅ | | | +| [google AI Studio - gemini](https://docs.litellm.ai/docs/providers/gemini) | ✅ | ✅ | ✅ | ✅ | | | +| [mistral ai api](https://docs.litellm.ai/docs/providers/mistral) | ✅ | ✅ | ✅ | ✅ | ✅ | | +| [cloudflare AI Workers](https://docs.litellm.ai/docs/providers/cloudflare_workers) | ✅ | ✅ | ✅ | ✅ | | | +| [cohere](https://docs.litellm.ai/docs/providers/cohere) | ✅ | ✅ | ✅ | ✅ | ✅ | | +| [anthropic](https://docs.litellm.ai/docs/providers/anthropic) | ✅ | ✅ | ✅ | ✅ | | | +| [huggingface](https://docs.litellm.ai/docs/providers/huggingface) | ✅ | ✅ | ✅ | ✅ | ✅ | | +| [replicate](https://docs.litellm.ai/docs/providers/replicate) | ✅ | ✅ | ✅ | ✅ | | | +| [together_ai](https://docs.litellm.ai/docs/providers/togetherai) | ✅ | ✅ | ✅ | ✅ | | | +| [openrouter](https://docs.litellm.ai/docs/providers/openrouter) | ✅ | ✅ | ✅ | ✅ | | | +| [ai21](https://docs.litellm.ai/docs/providers/ai21) | ✅ | ✅ | ✅ | ✅ | | | +| [baseten](https://docs.litellm.ai/docs/providers/baseten) | ✅ | ✅ | ✅ | ✅ | | | +| [vllm](https://docs.litellm.ai/docs/providers/vllm) | ✅ | ✅ | ✅ | ✅ | | | +| [nlp_cloud](https://docs.litellm.ai/docs/providers/nlp_cloud) | ✅ | ✅ | ✅ | ✅ | | | +| [aleph alpha](https://docs.litellm.ai/docs/providers/aleph_alpha) | ✅ | ✅ | ✅ | ✅ | | | +| [petals](https://docs.litellm.ai/docs/providers/petals) | ✅ | ✅ | ✅ | ✅ | | | +| [ollama](https://docs.litellm.ai/docs/providers/ollama) | ✅ | ✅ | ✅ | ✅ | ✅ | | +| [deepinfra](https://docs.litellm.ai/docs/providers/deepinfra) | ✅ | ✅ | ✅ | ✅ | | | +| [perplexity-ai](https://docs.litellm.ai/docs/providers/perplexity) | ✅ | ✅ | ✅ | ✅ | | | +| [Groq AI](https://docs.litellm.ai/docs/providers/groq) | ✅ | ✅ | ✅ | ✅ | | | +| [Deepseek](https://docs.litellm.ai/docs/providers/deepseek) | ✅ | ✅ | ✅ | ✅ | | | +| [anyscale](https://docs.litellm.ai/docs/providers/anyscale) | ✅ | ✅ | ✅ | ✅ | | | +| [IBM - watsonx.ai](https://docs.litellm.ai/docs/providers/watsonx) | ✅ | ✅ | ✅ | ✅ | ✅ | | +| [voyage ai](https://docs.litellm.ai/docs/providers/voyage) | | | | | ✅ | | +| [xinference [Xorbits Inference]](https://docs.litellm.ai/docs/providers/xinference) | | | | | ✅ | | [**Read the Docs**](https://docs.litellm.ai/docs/) diff --git a/docs/my-website/docs/observability/langfuse_integration.md b/docs/my-website/docs/observability/langfuse_integration.md index 6dd5377ea..07970f599 100644 --- a/docs/my-website/docs/observability/langfuse_integration.md +++ b/docs/my-website/docs/observability/langfuse_integration.md @@ -144,6 +144,26 @@ print(response) ``` +You can also pass `metadata` as part of the request header with a `langfuse_*` prefix: + +```shell +curl --location 'http://0.0.0.0:4000/chat/completions' \ + --header 'Content-Type: application/json' \ + --header 'langfuse_trace_id: trace-id22' \ + --header 'langfuse_trace_user_id: user-id2' \ + --header 'langfuse_trace_metadata: {"key":"value"}' \ + --data '{ + "model": "gpt-3.5-turbo", + "messages": [ + { + "role": "user", + "content": "what llm are you" + } + ] +}' +``` + + ### Trace & Generation Parameters #### Trace Specific Parameters diff --git a/docs/my-website/docs/providers/groq.md b/docs/my-website/docs/providers/groq.md index da453c3ce..857aae5bd 100644 --- a/docs/my-website/docs/providers/groq.md +++ b/docs/my-website/docs/providers/groq.md @@ -46,13 +46,13 @@ for chunk in response: ## Supported Models - ALL Groq Models Supported! We support ALL Groq models, just set `groq/` as a prefix when sending completion requests -| Model Name | Function Call | -|--------------------------|------------------------------------------------------------------------------------------------------------------------------------------------------------------| -| llama3-8b-8192 | `completion(model="groq/llama3-8b-8192", messages)` | -| llama3-70b-8192 | `completion(model="groq/llama3-70b-8192", messages)` | -| llama2-70b-4096 | `completion(model="groq/llama2-70b-4096", messages)` | +| Model Name | Function Call | +|--------------------|---------------------------------------------------------| +| llama3-8b-8192 | `completion(model="groq/llama3-8b-8192", messages)` | +| llama3-70b-8192 | `completion(model="groq/llama3-70b-8192", messages)` | +| llama2-70b-4096 | `completion(model="groq/llama2-70b-4096", messages)` | | mixtral-8x7b-32768 | `completion(model="groq/mixtral-8x7b-32768", messages)` | -| gemma-7b-it | `completion(model="groq/gemma-7b-it", messages)` | +| gemma-7b-it | `completion(model="groq/gemma-7b-it", messages)` | ## Groq - Tool / Function Calling Example diff --git a/docs/my-website/docs/providers/togetherai.md b/docs/my-website/docs/providers/togetherai.md index d718619f0..1021f5ba8 100644 --- a/docs/my-website/docs/providers/togetherai.md +++ b/docs/my-website/docs/providers/togetherai.md @@ -26,52 +26,52 @@ Example TogetherAI Usage - Note: liteLLM supports all models deployed on Togethe ### Llama LLMs - Chat -| Model Name | Function Call | Required OS Variables | -|-----------------------------------|------------------------------------------------------------------------|---------------------------------| -| togethercomputer/llama-2-70b-chat | `completion('together_ai/togethercomputer/llama-2-70b-chat', messages)` | `os.environ['TOGETHERAI_API_KEY']` | +| Model Name | Function Call | Required OS Variables | +|-----------------------------------|-------------------------------------------------------------------------|------------------------------------| +| togethercomputer/llama-2-70b-chat | `completion('together_ai/togethercomputer/llama-2-70b-chat', messages)` | `os.environ['TOGETHERAI_API_KEY']` | ### Llama LLMs - Language / Instruct -| Model Name | Function Call | Required OS Variables | -|-----------------------------------|------------------------------------------------------------------------|---------------------------------| -| togethercomputer/llama-2-70b | `completion('together_ai/togethercomputer/llama-2-70b', messages)` | `os.environ['TOGETHERAI_API_KEY']` | -| togethercomputer/LLaMA-2-7B-32K | `completion('together_ai/togethercomputer/LLaMA-2-7B-32K', messages)` | `os.environ['TOGETHERAI_API_KEY']` | -| togethercomputer/Llama-2-7B-32K-Instruct | `completion('together_ai/togethercomputer/Llama-2-7B-32K-Instruct', messages)` | `os.environ['TOGETHERAI_API_KEY']` | -| togethercomputer/llama-2-7b | `completion('together_ai/togethercomputer/llama-2-7b', messages)` | `os.environ['TOGETHERAI_API_KEY']` | +| Model Name | Function Call | Required OS Variables | +|------------------------------------------|--------------------------------------------------------------------------------|------------------------------------| +| togethercomputer/llama-2-70b | `completion('together_ai/togethercomputer/llama-2-70b', messages)` | `os.environ['TOGETHERAI_API_KEY']` | +| togethercomputer/LLaMA-2-7B-32K | `completion('together_ai/togethercomputer/LLaMA-2-7B-32K', messages)` | `os.environ['TOGETHERAI_API_KEY']` | +| togethercomputer/Llama-2-7B-32K-Instruct | `completion('together_ai/togethercomputer/Llama-2-7B-32K-Instruct', messages)` | `os.environ['TOGETHERAI_API_KEY']` | +| togethercomputer/llama-2-7b | `completion('together_ai/togethercomputer/llama-2-7b', messages)` | `os.environ['TOGETHERAI_API_KEY']` | ### Falcon LLMs -| Model Name | Function Call | Required OS Variables | -|-----------------------------------|------------------------------------------------------------------------|---------------------------------| -| togethercomputer/falcon-40b-instruct | `completion('together_ai/togethercomputer/falcon-40b-instruct', messages)` | `os.environ['TOGETHERAI_API_KEY']` | -| togethercomputer/falcon-7b-instruct | `completion('together_ai/togethercomputer/falcon-7b-instruct', messages)` | `os.environ['TOGETHERAI_API_KEY']` | +| Model Name | Function Call | Required OS Variables | +|--------------------------------------|----------------------------------------------------------------------------|------------------------------------| +| togethercomputer/falcon-40b-instruct | `completion('together_ai/togethercomputer/falcon-40b-instruct', messages)` | `os.environ['TOGETHERAI_API_KEY']` | +| togethercomputer/falcon-7b-instruct | `completion('together_ai/togethercomputer/falcon-7b-instruct', messages)` | `os.environ['TOGETHERAI_API_KEY']` | ### Alpaca LLMs -| Model Name | Function Call | Required OS Variables | -|-----------------------------------|------------------------------------------------------------------------|---------------------------------| -| togethercomputer/alpaca-7b | `completion('together_ai/togethercomputer/alpaca-7b', messages)` | `os.environ['TOGETHERAI_API_KEY']` | +| Model Name | Function Call | Required OS Variables | +|----------------------------|------------------------------------------------------------------|------------------------------------| +| togethercomputer/alpaca-7b | `completion('together_ai/togethercomputer/alpaca-7b', messages)` | `os.environ['TOGETHERAI_API_KEY']` | ### Other Chat LLMs -| Model Name | Function Call | Required OS Variables | -|-----------------------------------|------------------------------------------------------------------------|---------------------------------| -| HuggingFaceH4/starchat-alpha | `completion('together_ai/HuggingFaceH4/starchat-alpha', messages)` | `os.environ['TOGETHERAI_API_KEY']` | +| Model Name | Function Call | Required OS Variables | +|------------------------------|--------------------------------------------------------------------|------------------------------------| +| HuggingFaceH4/starchat-alpha | `completion('together_ai/HuggingFaceH4/starchat-alpha', messages)` | `os.environ['TOGETHERAI_API_KEY']` | ### Code LLMs -| Model Name | Function Call | Required OS Variables | -|-----------------------------------|------------------------------------------------------------------------|---------------------------------| -| togethercomputer/CodeLlama-34b | `completion('together_ai/togethercomputer/CodeLlama-34b', messages)` | `os.environ['TOGETHERAI_API_KEY']` | -| togethercomputer/CodeLlama-34b-Instruct | `completion('together_ai/togethercomputer/CodeLlama-34b-Instruct', messages)` | `os.environ['TOGETHERAI_API_KEY']` | -| togethercomputer/CodeLlama-34b-Python | `completion('together_ai/togethercomputer/CodeLlama-34b-Python', messages)` | `os.environ['TOGETHERAI_API_KEY']` | -| defog/sqlcoder | `completion('together_ai/defog/sqlcoder', messages)` | `os.environ['TOGETHERAI_API_KEY']` | -| NumbersStation/nsql-llama-2-7B | `completion('together_ai/NumbersStation/nsql-llama-2-7B', messages)` | `os.environ['TOGETHERAI_API_KEY']` | -| WizardLM/WizardCoder-15B-V1.0 | `completion('together_ai/WizardLM/WizardCoder-15B-V1.0', messages)` | `os.environ['TOGETHERAI_API_KEY']` | -| WizardLM/WizardCoder-Python-34B-V1.0 | `completion('together_ai/WizardLM/WizardCoder-Python-34B-V1.0', messages)` | `os.environ['TOGETHERAI_API_KEY']` | +| Model Name | Function Call | Required OS Variables | +|-----------------------------------------|-------------------------------------------------------------------------------|------------------------------------| +| togethercomputer/CodeLlama-34b | `completion('together_ai/togethercomputer/CodeLlama-34b', messages)` | `os.environ['TOGETHERAI_API_KEY']` | +| togethercomputer/CodeLlama-34b-Instruct | `completion('together_ai/togethercomputer/CodeLlama-34b-Instruct', messages)` | `os.environ['TOGETHERAI_API_KEY']` | +| togethercomputer/CodeLlama-34b-Python | `completion('together_ai/togethercomputer/CodeLlama-34b-Python', messages)` | `os.environ['TOGETHERAI_API_KEY']` | +| defog/sqlcoder | `completion('together_ai/defog/sqlcoder', messages)` | `os.environ['TOGETHERAI_API_KEY']` | +| NumbersStation/nsql-llama-2-7B | `completion('together_ai/NumbersStation/nsql-llama-2-7B', messages)` | `os.environ['TOGETHERAI_API_KEY']` | +| WizardLM/WizardCoder-15B-V1.0 | `completion('together_ai/WizardLM/WizardCoder-15B-V1.0', messages)` | `os.environ['TOGETHERAI_API_KEY']` | +| WizardLM/WizardCoder-Python-34B-V1.0 | `completion('together_ai/WizardLM/WizardCoder-Python-34B-V1.0', messages)` | `os.environ['TOGETHERAI_API_KEY']` | ### Language LLMs -| Model Name | Function Call | Required OS Variables | -|-----------------------------------|------------------------------------------------------------------------|---------------------------------| -| NousResearch/Nous-Hermes-Llama2-13b | `completion('together_ai/NousResearch/Nous-Hermes-Llama2-13b', messages)` | `os.environ['TOGETHERAI_API_KEY']` | -| Austism/chronos-hermes-13b | `completion('together_ai/Austism/chronos-hermes-13b', messages)` | `os.environ['TOGETHERAI_API_KEY']` | -| upstage/SOLAR-0-70b-16bit | `completion('together_ai/upstage/SOLAR-0-70b-16bit', messages)` | `os.environ['TOGETHERAI_API_KEY']` | -| WizardLM/WizardLM-70B-V1.0 | `completion('together_ai/WizardLM/WizardLM-70B-V1.0', messages)` | `os.environ['TOGETHERAI_API_KEY']` | +| Model Name | Function Call | Required OS Variables | +|-------------------------------------|---------------------------------------------------------------------------|------------------------------------| +| NousResearch/Nous-Hermes-Llama2-13b | `completion('together_ai/NousResearch/Nous-Hermes-Llama2-13b', messages)` | `os.environ['TOGETHERAI_API_KEY']` | +| Austism/chronos-hermes-13b | `completion('together_ai/Austism/chronos-hermes-13b', messages)` | `os.environ['TOGETHERAI_API_KEY']` | +| upstage/SOLAR-0-70b-16bit | `completion('together_ai/upstage/SOLAR-0-70b-16bit', messages)` | `os.environ['TOGETHERAI_API_KEY']` | +| WizardLM/WizardLM-70B-V1.0 | `completion('together_ai/WizardLM/WizardLM-70B-V1.0', messages)` | `os.environ['TOGETHERAI_API_KEY']` | ## Prompt Templates diff --git a/docs/my-website/docs/providers/vllm.md b/docs/my-website/docs/providers/vllm.md index c22cd4fc2..61dd1fffd 100644 --- a/docs/my-website/docs/providers/vllm.md +++ b/docs/my-website/docs/providers/vllm.md @@ -155,14 +155,14 @@ def default_pt(messages): #### Models we already have Prompt Templates for -| Model Name | Works for Models | Function Call | -| -------- | -------- | -------- | -| meta-llama/Llama-2-7b-chat | All meta-llama llama2 chat models| `completion(model='vllm/meta-llama/Llama-2-7b', messages=messages, api_base="your_api_endpoint")` | -| tiiuae/falcon-7b-instruct | All falcon instruct models | `completion(model='vllm/tiiuae/falcon-7b-instruct', messages=messages, api_base="your_api_endpoint")` | -| mosaicml/mpt-7b-chat | All mpt chat models | `completion(model='vllm/mosaicml/mpt-7b-chat', messages=messages, api_base="your_api_endpoint")` | -| codellama/CodeLlama-34b-Instruct-hf | All codellama instruct models | `completion(model='vllm/codellama/CodeLlama-34b-Instruct-hf', messages=messages, api_base="your_api_endpoint")` | -| WizardLM/WizardCoder-Python-34B-V1.0 | All wizardcoder models | `completion(model='vllm/WizardLM/WizardCoder-Python-34B-V1.0', messages=messages, api_base="your_api_endpoint")` | -| Phind/Phind-CodeLlama-34B-v2 | All phind-codellama models | `completion(model='vllm/Phind/Phind-CodeLlama-34B-v2', messages=messages, api_base="your_api_endpoint")` | +| Model Name | Works for Models | Function Call | +|--------------------------------------|-----------------------------------|------------------------------------------------------------------------------------------------------------------| +| meta-llama/Llama-2-7b-chat | All meta-llama llama2 chat models | `completion(model='vllm/meta-llama/Llama-2-7b', messages=messages, api_base="your_api_endpoint")` | +| tiiuae/falcon-7b-instruct | All falcon instruct models | `completion(model='vllm/tiiuae/falcon-7b-instruct', messages=messages, api_base="your_api_endpoint")` | +| mosaicml/mpt-7b-chat | All mpt chat models | `completion(model='vllm/mosaicml/mpt-7b-chat', messages=messages, api_base="your_api_endpoint")` | +| codellama/CodeLlama-34b-Instruct-hf | All codellama instruct models | `completion(model='vllm/codellama/CodeLlama-34b-Instruct-hf', messages=messages, api_base="your_api_endpoint")` | +| WizardLM/WizardCoder-Python-34B-V1.0 | All wizardcoder models | `completion(model='vllm/WizardLM/WizardCoder-Python-34B-V1.0', messages=messages, api_base="your_api_endpoint")` | +| Phind/Phind-CodeLlama-34B-v2 | All phind-codellama models | `completion(model='vllm/Phind/Phind-CodeLlama-34B-v2', messages=messages, api_base="your_api_endpoint")` | #### Custom prompt templates diff --git a/docs/my-website/docs/providers/watsonx.md b/docs/my-website/docs/providers/watsonx.md index d8c5740a8..7a42a54ed 100644 --- a/docs/my-website/docs/providers/watsonx.md +++ b/docs/my-website/docs/providers/watsonx.md @@ -251,23 +251,23 @@ response = completion( Here are some examples of models available in IBM watsonx.ai that you can use with LiteLLM: -| Mode Name | Command | -| ---------- | --------- | -| Flan T5 XXL | `completion(model=watsonx/google/flan-t5-xxl, messages=messages)` | -| Flan Ul2 | `completion(model=watsonx/google/flan-ul2, messages=messages)` | -| Mt0 XXL | `completion(model=watsonx/bigscience/mt0-xxl, messages=messages)` | -| Gpt Neox | `completion(model=watsonx/eleutherai/gpt-neox-20b, messages=messages)` | -| Mpt 7B Instruct2 | `completion(model=watsonx/ibm/mpt-7b-instruct2, messages=messages)` | -| Starcoder | `completion(model=watsonx/bigcode/starcoder, messages=messages)` | -| Llama 2 70B Chat | `completion(model=watsonx/meta-llama/llama-2-70b-chat, messages=messages)` | -| Llama 2 13B Chat | `completion(model=watsonx/meta-llama/llama-2-13b-chat, messages=messages)` | -| Granite 13B Instruct | `completion(model=watsonx/ibm/granite-13b-instruct-v1, messages=messages)` | -| Granite 13B Chat | `completion(model=watsonx/ibm/granite-13b-chat-v1, messages=messages)` | -| Flan T5 XL | `completion(model=watsonx/google/flan-t5-xl, messages=messages)` | -| Granite 13B Chat V2 | `completion(model=watsonx/ibm/granite-13b-chat-v2, messages=messages)` | -| Granite 13B Instruct V2 | `completion(model=watsonx/ibm/granite-13b-instruct-v2, messages=messages)` | -| Elyza Japanese Llama 2 7B Instruct | `completion(model=watsonx/elyza/elyza-japanese-llama-2-7b-instruct, messages=messages)` | -| Mixtral 8X7B Instruct V01 Q | `completion(model=watsonx/ibm-mistralai/mixtral-8x7b-instruct-v01-q, messages=messages)` | +| Mode Name | Command | +|------------------------------------|------------------------------------------------------------------------------------------| +| Flan T5 XXL | `completion(model=watsonx/google/flan-t5-xxl, messages=messages)` | +| Flan Ul2 | `completion(model=watsonx/google/flan-ul2, messages=messages)` | +| Mt0 XXL | `completion(model=watsonx/bigscience/mt0-xxl, messages=messages)` | +| Gpt Neox | `completion(model=watsonx/eleutherai/gpt-neox-20b, messages=messages)` | +| Mpt 7B Instruct2 | `completion(model=watsonx/ibm/mpt-7b-instruct2, messages=messages)` | +| Starcoder | `completion(model=watsonx/bigcode/starcoder, messages=messages)` | +| Llama 2 70B Chat | `completion(model=watsonx/meta-llama/llama-2-70b-chat, messages=messages)` | +| Llama 2 13B Chat | `completion(model=watsonx/meta-llama/llama-2-13b-chat, messages=messages)` | +| Granite 13B Instruct | `completion(model=watsonx/ibm/granite-13b-instruct-v1, messages=messages)` | +| Granite 13B Chat | `completion(model=watsonx/ibm/granite-13b-chat-v1, messages=messages)` | +| Flan T5 XL | `completion(model=watsonx/google/flan-t5-xl, messages=messages)` | +| Granite 13B Chat V2 | `completion(model=watsonx/ibm/granite-13b-chat-v2, messages=messages)` | +| Granite 13B Instruct V2 | `completion(model=watsonx/ibm/granite-13b-instruct-v2, messages=messages)` | +| Elyza Japanese Llama 2 7B Instruct | `completion(model=watsonx/elyza/elyza-japanese-llama-2-7b-instruct, messages=messages)` | +| Mixtral 8X7B Instruct V01 Q | `completion(model=watsonx/ibm-mistralai/mixtral-8x7b-instruct-v01-q, messages=messages)` | For a list of all available models in watsonx.ai, see [here](https://dataplatform.cloud.ibm.com/docs/content/wsj/analyze-data/fm-models.html?context=wx&locale=en&audience=wdp). @@ -275,10 +275,10 @@ For a list of all available models in watsonx.ai, see [here](https://dataplatfor ## Supported IBM watsonx.ai Embedding Models -| Model Name | Function Call | -|----------------------|---------------------------------------------| -| Slate 30m | `embedding(model="watsonx/ibm/slate-30m-english-rtrvr", input=input)` | -| Slate 125m | `embedding(model="watsonx/ibm/slate-125m-english-rtrvr", input=input)` | +| Model Name | Function Call | +|------------|------------------------------------------------------------------------| +| Slate 30m | `embedding(model="watsonx/ibm/slate-30m-english-rtrvr", input=input)` | +| Slate 125m | `embedding(model="watsonx/ibm/slate-125m-english-rtrvr", input=input)` | For a list of all available embedding models in watsonx.ai, see [here](https://dataplatform.cloud.ibm.com/docs/content/wsj/analyze-data/fm-models-embed.html?context=wx). \ No newline at end of file diff --git a/docs/my-website/docs/providers/xinference.md b/docs/my-website/docs/providers/xinference.md index 3c927dcb4..3686c0209 100644 --- a/docs/my-website/docs/providers/xinference.md +++ b/docs/my-website/docs/providers/xinference.md @@ -37,26 +37,26 @@ print(response) ## Supported Models All models listed here https://inference.readthedocs.io/en/latest/models/builtin/embedding/index.html are supported -| Model Name | Function Call | -|------------------------------|--------------------------------------------------------| -| bge-base-en | `embedding(model="xinference/bge-base-en", input)` | -| bge-base-en-v1.5 | `embedding(model="xinference/bge-base-en-v1.5", input)` | -| bge-base-zh | `embedding(model="xinference/bge-base-zh", input)` | -| bge-base-zh-v1.5 | `embedding(model="xinference/bge-base-zh-v1.5", input)` | -| bge-large-en | `embedding(model="xinference/bge-large-en", input)` | -| bge-large-en-v1.5 | `embedding(model="xinference/bge-large-en-v1.5", input)` | -| bge-large-zh | `embedding(model="xinference/bge-large-zh", input)` | -| bge-large-zh-noinstruct | `embedding(model="xinference/bge-large-zh-noinstruct", input)` | -| bge-large-zh-v1.5 | `embedding(model="xinference/bge-large-zh-v1.5", input)` | -| bge-small-en-v1.5 | `embedding(model="xinference/bge-small-en-v1.5", input)` | -| bge-small-zh | `embedding(model="xinference/bge-small-zh", input)` | -| bge-small-zh-v1.5 | `embedding(model="xinference/bge-small-zh-v1.5", input)` | -| e5-large-v2 | `embedding(model="xinference/e5-large-v2", input)` | -| gte-base | `embedding(model="xinference/gte-base", input)` | -| gte-large | `embedding(model="xinference/gte-large", input)` | -| jina-embeddings-v2-base-en | `embedding(model="xinference/jina-embeddings-v2-base-en", input)` | -| jina-embeddings-v2-small-en | `embedding(model="xinference/jina-embeddings-v2-small-en", input)` | -| multilingual-e5-large | `embedding(model="xinference/multilingual-e5-large", input)` | +| Model Name | Function Call | +|-----------------------------|--------------------------------------------------------------------| +| bge-base-en | `embedding(model="xinference/bge-base-en", input)` | +| bge-base-en-v1.5 | `embedding(model="xinference/bge-base-en-v1.5", input)` | +| bge-base-zh | `embedding(model="xinference/bge-base-zh", input)` | +| bge-base-zh-v1.5 | `embedding(model="xinference/bge-base-zh-v1.5", input)` | +| bge-large-en | `embedding(model="xinference/bge-large-en", input)` | +| bge-large-en-v1.5 | `embedding(model="xinference/bge-large-en-v1.5", input)` | +| bge-large-zh | `embedding(model="xinference/bge-large-zh", input)` | +| bge-large-zh-noinstruct | `embedding(model="xinference/bge-large-zh-noinstruct", input)` | +| bge-large-zh-v1.5 | `embedding(model="xinference/bge-large-zh-v1.5", input)` | +| bge-small-en-v1.5 | `embedding(model="xinference/bge-small-en-v1.5", input)` | +| bge-small-zh | `embedding(model="xinference/bge-small-zh", input)` | +| bge-small-zh-v1.5 | `embedding(model="xinference/bge-small-zh-v1.5", input)` | +| e5-large-v2 | `embedding(model="xinference/e5-large-v2", input)` | +| gte-base | `embedding(model="xinference/gte-base", input)` | +| gte-large | `embedding(model="xinference/gte-large", input)` | +| jina-embeddings-v2-base-en | `embedding(model="xinference/jina-embeddings-v2-base-en", input)` | +| jina-embeddings-v2-small-en | `embedding(model="xinference/jina-embeddings-v2-small-en", input)` | +| multilingual-e5-large | `embedding(model="xinference/multilingual-e5-large", input)` | diff --git a/docs/my-website/docs/proxy/deploy.md b/docs/my-website/docs/proxy/deploy.md index 6fb8c5bfe..b756f56e2 100644 --- a/docs/my-website/docs/proxy/deploy.md +++ b/docs/my-website/docs/proxy/deploy.md @@ -260,7 +260,7 @@ Requirements: -We maintain a [seperate Dockerfile](https://github.com/BerriAI/litellm/pkgs/container/litellm-database) for reducing build time when running LiteLLM proxy with a connected Postgres Database +We maintain a [separate Dockerfile](https://github.com/BerriAI/litellm/pkgs/container/litellm-database) for reducing build time when running LiteLLM proxy with a connected Postgres Database ```shell docker pull ghcr.io/berriai/litellm-database:main-latest diff --git a/docs/my-website/docs/proxy/enterprise.md b/docs/my-website/docs/proxy/enterprise.md index e52a19162..2b984b3e7 100644 --- a/docs/my-website/docs/proxy/enterprise.md +++ b/docs/my-website/docs/proxy/enterprise.md @@ -459,7 +459,7 @@ Step 1 Set a `LAKERA_API_KEY` in your env LAKERA_API_KEY="7a91a1a6059da*******" ``` -Step 2. Add `lakera_prompt_injection` to your calbacks +Step 2. Add `lakera_prompt_injection` to your callbacks ```yaml litellm_settings: diff --git a/docs/my-website/docs/tutorials/finetuned_chat_gpt.md b/docs/my-website/docs/tutorials/finetuned_chat_gpt.md index 641c45b5f..5dde3b3ff 100644 --- a/docs/my-website/docs/tutorials/finetuned_chat_gpt.md +++ b/docs/my-website/docs/tutorials/finetuned_chat_gpt.md @@ -1,8 +1,8 @@ # Using Fine-Tuned gpt-3.5-turbo LiteLLM allows you to call `completion` with your fine-tuned gpt-3.5-turbo models -If you're trying to create your custom finetuned gpt-3.5-turbo model following along on this tutorial: https://platform.openai.com/docs/guides/fine-tuning/preparing-your-dataset +If you're trying to create your custom fine-tuned gpt-3.5-turbo model following along on this tutorial: https://platform.openai.com/docs/guides/fine-tuning/preparing-your-dataset -Once you've created your fine tuned model, you can call it with `litellm.completion()` +Once you've created your fine-tuned model, you can call it with `litellm.completion()` ## Usage ```python diff --git a/litellm/integrations/langfuse.py b/litellm/integrations/langfuse.py index 4d580f666..4218e2dc5 100644 --- a/litellm/integrations/langfuse.py +++ b/litellm/integrations/langfuse.py @@ -69,6 +69,28 @@ class LangFuseLogger: else: self.upstream_langfuse = None + @staticmethod + def add_metadata_from_header(litellm_params: dict, metadata: dict) -> dict: + """ + Adds metadata from proxy request headers to Langfuse logging if keys start with "langfuse_" + and overwrites litellm_params.metadata if already included. + + For example if you want to append your trace to an existing `trace_id` via header, send + `headers: { ..., langfuse_existing_trace_id: your-existing-trace-id }` via proxy request. + """ + proxy_headers = litellm_params.get("proxy_server_request", {}).get("headers", {}) + + for metadata_param_key in proxy_headers: + if metadata_param_key.startswith("langfuse_"): + trace_param_key = metadata_param_key.replace("langfuse_", "", 1) + if trace_param_key in metadata: + verbose_logger.warning(f"Overwriting Langfuse `{trace_param_key}` from request header") + else: + verbose_logger.debug(f"Found Langfuse `{trace_param_key}` in request header") + metadata[trace_param_key] = proxy_headers.get(metadata_param_key) + + return metadata + # def log_error(kwargs, response_obj, start_time, end_time): # generation = trace.generation( # level ="ERROR" # can be any of DEBUG, DEFAULT, WARNING or ERROR @@ -97,6 +119,7 @@ class LangFuseLogger: metadata = ( litellm_params.get("metadata", {}) or {} ) # if litellm_params['metadata'] == None + metadata = self.add_metadata_from_header(litellm_params, metadata) optional_params = copy.deepcopy(kwargs.get("optional_params", {})) prompt = {"messages": kwargs.get("messages")} diff --git a/litellm/llms/prompt_templates/factory.py b/litellm/llms/prompt_templates/factory.py index 41ecb486c..10f3f16ed 100644 --- a/litellm/llms/prompt_templates/factory.py +++ b/litellm/llms/prompt_templates/factory.py @@ -833,7 +833,7 @@ def anthropic_messages_pt_xml(messages: list): ) # either string or none if messages[msg_i].get( "tool_calls", [] - ): # support assistant tool invoke convertion + ): # support assistant tool invoke conversion assistant_text += convert_to_anthropic_tool_invoke_xml( # type: ignore messages[msg_i]["tool_calls"] ) @@ -1224,7 +1224,7 @@ def anthropic_messages_pt(messages: list): if messages[msg_i].get( "tool_calls", [] - ): # support assistant tool invoke convertion + ): # support assistant tool invoke conversion assistant_content.extend( convert_to_anthropic_tool_invoke(messages[msg_i]["tool_calls"]) ) diff --git a/litellm/llms/vertex_ai.py b/litellm/llms/vertex_ai.py index 5171b1efc..dc79e7e4e 100644 --- a/litellm/llms/vertex_ai.py +++ b/litellm/llms/vertex_ai.py @@ -297,24 +297,29 @@ def _convert_gemini_role(role: str) -> Literal["user", "model"]: def _process_gemini_image(image_url: str) -> PartType: try: - if "gs://" in image_url: - # Case 1: Images with Cloud Storage URIs + if ".mp4" in image_url and "gs://" in image_url: + # Case 1: Videos with Cloud Storage URIs + part_mime = "video/mp4" + _file_data = FileDataType(mime_type=part_mime, file_uri=image_url) + return PartType(file_data=_file_data) + elif ".pdf" in image_url and "gs://" in image_url: + # Case 2: PDF's with Cloud Storage URIs + part_mime = "application/pdf" + _file_data = FileDataType(mime_type=part_mime, file_uri=image_url) + return PartType(file_data=_file_data) + elif "gs://" in image_url: + # Case 3: Images with Cloud Storage URIs # The supported MIME types for images include image/png and image/jpeg. part_mime = "image/png" if "png" in image_url else "image/jpeg" _file_data = FileDataType(mime_type=part_mime, file_uri=image_url) return PartType(file_data=_file_data) elif "https:/" in image_url: - # Case 2: Images with direct links + # Case 4: Images with direct links image = _load_image_from_url(image_url) _blob = BlobType(data=image.data, mime_type=image._mime_type) return PartType(inline_data=_blob) - elif ".mp4" in image_url and "gs://" in image_url: - # Case 3: Videos with Cloud Storage URIs - part_mime = "video/mp4" - _file_data = FileDataType(mime_type=part_mime, file_uri=image_url) - return PartType(file_data=_file_data) elif "base64" in image_url: - # Case 4: Images with base64 encoding + # Case 5: Images with base64 encoding import base64, re # base 64 is passed as data:image/jpeg;base64, @@ -390,7 +395,7 @@ def _gemini_convert_messages_with_history(messages: list) -> List[ContentType]: assistant_content.extend(_parts) elif messages[msg_i].get( "tool_calls", [] - ): # support assistant tool invoke convertion + ): # support assistant tool invoke conversion assistant_content.extend( convert_to_gemini_tool_call_invoke(messages[msg_i]["tool_calls"]) ) diff --git a/pyproject.toml b/pyproject.toml index f17bb940c..cd57092c4 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -63,7 +63,8 @@ extra_proxy = [ "azure-identity", "azure-keyvault-secrets", "google-cloud-kms", - "resend" + "resend", + "pynacl" ] [tool.poetry.scripts]