Merge branch 'main' into litellm_edit_teams

This commit is contained in:
Ishaan Jaff 2024-03-30 12:15:53 -07:00 committed by GitHub
commit 58cc11a312
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
52 changed files with 1343 additions and 608 deletions

View file

@ -34,6 +34,7 @@ jobs:
pip install "boto3>=1.28.57" pip install "boto3>=1.28.57"
pip install "aioboto3>=12.3.0" pip install "aioboto3>=12.3.0"
pip install langchain pip install langchain
pip install lunary==0.2.5
pip install "langfuse>=2.0.0" pip install "langfuse>=2.0.0"
pip install numpydoc pip install numpydoc
pip install traceloop-sdk==0.0.69 pip install traceloop-sdk==0.0.69

View file

@ -25,6 +25,7 @@
</h4> </h4>
LiteLLM manages: LiteLLM manages:
- Translate inputs to provider's `completion`, `embedding`, and `image_generation` endpoints - Translate inputs to provider's `completion`, `embedding`, and `image_generation` endpoints
- [Consistent output](https://docs.litellm.ai/docs/completion/output), text responses will always be available at `['choices'][0]['message']['content']` - [Consistent output](https://docs.litellm.ai/docs/completion/output), text responses will always be available at `['choices'][0]['message']['content']`
- Retry/fallback logic across multiple deployments (e.g. Azure/OpenAI) - [Router](https://docs.litellm.ai/docs/routing) - Retry/fallback logic across multiple deployments (e.g. Azure/OpenAI) - [Router](https://docs.litellm.ai/docs/routing)
@ -38,15 +39,14 @@ LiteLLM manages:
Support for more providers. Missing a provider or LLM Platform, raise a [feature request](https://github.com/BerriAI/litellm/issues/new?assignees=&labels=enhancement&projects=&template=feature_request.yml&title=%5BFeature%5D%3A+). Support for more providers. Missing a provider or LLM Platform, raise a [feature request](https://github.com/BerriAI/litellm/issues/new?assignees=&labels=enhancement&projects=&template=feature_request.yml&title=%5BFeature%5D%3A+).
# Usage ([**Docs**](https://docs.litellm.ai/docs/)) # Usage ([**Docs**](https://docs.litellm.ai/docs/))
> [!IMPORTANT] > [!IMPORTANT]
> LiteLLM v1.0.0 now requires `openai>=1.0.0`. Migration guide [here](https://docs.litellm.ai/docs/migration) > LiteLLM v1.0.0 now requires `openai>=1.0.0`. Migration guide [here](https://docs.litellm.ai/docs/migration)
<a target="_blank" href="https://colab.research.google.com/github/BerriAI/litellm/blob/main/cookbook/liteLLM_Getting_Started.ipynb"> <a target="_blank" href="https://colab.research.google.com/github/BerriAI/litellm/blob/main/cookbook/liteLLM_Getting_Started.ipynb">
<img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/> <img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/>
</a> </a>
```shell ```shell
pip install litellm pip install litellm
``` ```
@ -88,8 +88,10 @@ print(response)
``` ```
## Streaming ([Docs](https://docs.litellm.ai/docs/completion/stream)) ## Streaming ([Docs](https://docs.litellm.ai/docs/completion/stream))
liteLLM supports streaming the model response back, pass `stream=True` to get a streaming iterator in response. liteLLM supports streaming the model response back, pass `stream=True` to get a streaming iterator in response.
Streaming is supported for all models (Bedrock, Huggingface, TogetherAI, Azure, OpenAI, etc.) Streaming is supported for all models (Bedrock, Huggingface, TogetherAI, Azure, OpenAI, etc.)
```python ```python
from litellm import completion from litellm import completion
response = completion(model="gpt-3.5-turbo", messages=messages, stream=True) response = completion(model="gpt-3.5-turbo", messages=messages, stream=True)
@ -103,20 +105,22 @@ for part in response:
``` ```
## Logging Observability ([Docs](https://docs.litellm.ai/docs/observability/callbacks)) ## Logging Observability ([Docs](https://docs.litellm.ai/docs/observability/callbacks))
LiteLLM exposes pre defined callbacks to send data to Langfuse, DynamoDB, s3 Buckets, LLMonitor, Helicone, Promptlayer, Traceloop, Athina, Slack
LiteLLM exposes pre defined callbacks to send data to Lunary, Langfuse, DynamoDB, s3 Buckets, Helicone, Promptlayer, Traceloop, Athina, Slack
```python ```python
from litellm import completion from litellm import completion
## set env variables for logging tools ## set env variables for logging tools
os.environ["LUNARY_PUBLIC_KEY"] = "your-lunary-public-key"
os.environ["LANGFUSE_PUBLIC_KEY"] = "" os.environ["LANGFUSE_PUBLIC_KEY"] = ""
os.environ["LANGFUSE_SECRET_KEY"] = "" os.environ["LANGFUSE_SECRET_KEY"] = ""
os.environ["LLMONITOR_APP_ID"] = "your-llmonitor-app-id"
os.environ["ATHINA_API_KEY"] = "your-athina-api-key" os.environ["ATHINA_API_KEY"] = "your-athina-api-key"
os.environ["OPENAI_API_KEY"] os.environ["OPENAI_API_KEY"]
# set callbacks # set callbacks
litellm.success_callback = ["langfuse", "llmonitor", "athina"] # log input/output to langfuse, llmonitor, supabase, athina etc litellm.success_callback = ["lunary", "langfuse", "athina"] # log input/output to lunary, langfuse, supabase, athina etc
#openai call #openai call
response = completion(model="gpt-3.5-turbo", messages=[{"role": "user", "content": "Hi 👋 - i'm openai"}]) response = completion(model="gpt-3.5-turbo", messages=[{"role": "user", "content": "Hi 👋 - i'm openai"}])
@ -127,6 +131,7 @@ response = completion(model="gpt-3.5-turbo", messages=[{"role": "user", "content
Set Budgets & Rate limits across multiple projects Set Budgets & Rate limits across multiple projects
The proxy provides: The proxy provides:
1. [Hooks for auth](https://docs.litellm.ai/docs/proxy/virtual_keys#custom-auth) 1. [Hooks for auth](https://docs.litellm.ai/docs/proxy/virtual_keys#custom-auth)
2. [Hooks for logging](https://docs.litellm.ai/docs/proxy/logging#step-1---create-your-custom-litellm-callback-class) 2. [Hooks for logging](https://docs.litellm.ai/docs/proxy/logging#step-1---create-your-custom-litellm-callback-class)
3. [Cost tracking](https://docs.litellm.ai/docs/proxy/virtual_keys#tracking-spend) 3. [Cost tracking](https://docs.litellm.ai/docs/proxy/virtual_keys#tracking-spend)
@ -141,6 +146,7 @@ pip install 'litellm[proxy]'
``` ```
### Step 1: Start litellm proxy ### Step 1: Start litellm proxy
```shell ```shell
$ litellm --model huggingface/bigcode/starcoder $ litellm --model huggingface/bigcode/starcoder
@ -148,6 +154,7 @@ $ litellm --model huggingface/bigcode/starcoder
``` ```
### Step 2: Make ChatCompletions Request to Proxy ### Step 2: Make ChatCompletions Request to Proxy
```python ```python
import openai # openai v1.0.0+ import openai # openai v1.0.0+
client = openai.OpenAI(api_key="anything",base_url="http://0.0.0.0:4000") # set proxy to base_url client = openai.OpenAI(api_key="anything",base_url="http://0.0.0.0:4000") # set proxy to base_url
@ -163,6 +170,7 @@ print(response)
``` ```
## Proxy Key Management ([Docs](https://docs.litellm.ai/docs/proxy/virtual_keys)) ## Proxy Key Management ([Docs](https://docs.litellm.ai/docs/proxy/virtual_keys))
UI on `/ui` on your proxy server UI on `/ui` on your proxy server
![ui_3](https://github.com/BerriAI/litellm/assets/29436595/47c97d5e-b9be-4839-b28c-43d7f4f10033) ![ui_3](https://github.com/BerriAI/litellm/assets/29436595/47c97d5e-b9be-4839-b28c-43d7f4f10033)
@ -170,6 +178,7 @@ Set budgets and rate limits across multiple projects
`POST /key/generate` `POST /key/generate`
### Request ### Request
```shell ```shell
curl 'http://0.0.0.0:4000/key/generate' \ curl 'http://0.0.0.0:4000/key/generate' \
--header 'Authorization: Bearer sk-1234' \ --header 'Authorization: Bearer sk-1234' \
@ -178,6 +187,7 @@ curl 'http://0.0.0.0:4000/key/generate' \
``` ```
### Expected Response ### Expected Response
```shell ```shell
{ {
"key": "sk-kdEXbIqZRwEeEiHwdg7sFA", # Bearer token "key": "sk-kdEXbIqZRwEeEiHwdg7sFA", # Bearer token
@ -186,56 +196,60 @@ curl 'http://0.0.0.0:4000/key/generate' \
``` ```
## Supported Providers ([Docs](https://docs.litellm.ai/docs/providers)) ## Supported Providers ([Docs](https://docs.litellm.ai/docs/providers))
| Provider | [Completion](https://docs.litellm.ai/docs/#basic-usage) | [Streaming](https://docs.litellm.ai/docs/completion/stream#streaming-responses) | [Async Completion](https://docs.litellm.ai/docs/completion/stream#async-completion) | [Async Streaming](https://docs.litellm.ai/docs/completion/stream#async-streaming) | [Async Embedding](https://docs.litellm.ai/docs/embedding/supported_embedding) | [Async Image Generation](https://docs.litellm.ai/docs/image_generation) |
| ------------- | ------------- | ------------- | ------------- | ------------- | ------------- | ------------- |
| [openai](https://docs.litellm.ai/docs/providers/openai) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ |
| [azure](https://docs.litellm.ai/docs/providers/azure) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ |
| [aws - sagemaker](https://docs.litellm.ai/docs/providers/aws_sagemaker) | ✅ | ✅ | ✅ | ✅ | ✅ |
| [aws - bedrock](https://docs.litellm.ai/docs/providers/bedrock) | ✅ | ✅ | ✅ | ✅ |✅ |
| [google - vertex_ai [Gemini]](https://docs.litellm.ai/docs/providers/vertex) | ✅ | ✅ | ✅ | ✅ |
| [google - palm](https://docs.litellm.ai/docs/providers/palm) | ✅ | ✅ | ✅ | ✅ |
| [google AI Studio - gemini](https://docs.litellm.ai/docs/providers/gemini) | ✅ | | ✅ | | |
| [mistral ai api](https://docs.litellm.ai/docs/providers/mistral) | ✅ | ✅ | ✅ | ✅ | ✅ |
| [cloudflare AI Workers](https://docs.litellm.ai/docs/providers/cloudflare_workers) | ✅ | ✅ | ✅ | ✅ |
| [cohere](https://docs.litellm.ai/docs/providers/cohere) | ✅ | ✅ | ✅ | ✅ | ✅ |
| [anthropic](https://docs.litellm.ai/docs/providers/anthropic) | ✅ | ✅ | ✅ | ✅ |
| [huggingface](https://docs.litellm.ai/docs/providers/huggingface) | ✅ | ✅ | ✅ | ✅ | ✅ |
| [replicate](https://docs.litellm.ai/docs/providers/replicate) | ✅ | ✅ | ✅ | ✅ |
| [together_ai](https://docs.litellm.ai/docs/providers/togetherai) | ✅ | ✅ | ✅ | ✅ |
| [openrouter](https://docs.litellm.ai/docs/providers/openrouter) | ✅ | ✅ | ✅ | ✅ |
| [ai21](https://docs.litellm.ai/docs/providers/ai21) | ✅ | ✅ | ✅ | ✅ |
| [baseten](https://docs.litellm.ai/docs/providers/baseten) | ✅ | ✅ | ✅ | ✅ |
| [vllm](https://docs.litellm.ai/docs/providers/vllm) | ✅ | ✅ | ✅ | ✅ |
| [nlp_cloud](https://docs.litellm.ai/docs/providers/nlp_cloud) | ✅ | ✅ | ✅ | ✅ |
| [aleph alpha](https://docs.litellm.ai/docs/providers/aleph_alpha) | ✅ | ✅ | ✅ | ✅ |
| [petals](https://docs.litellm.ai/docs/providers/petals) | ✅ | ✅ | ✅ | ✅ |
| [ollama](https://docs.litellm.ai/docs/providers/ollama) | ✅ | ✅ | ✅ | ✅ |
| [deepinfra](https://docs.litellm.ai/docs/providers/deepinfra) | ✅ | ✅ | ✅ | ✅ |
| [perplexity-ai](https://docs.litellm.ai/docs/providers/perplexity) | ✅ | ✅ | ✅ | ✅ |
| [Groq AI](https://docs.litellm.ai/docs/providers/groq) | ✅ | ✅ | ✅ | ✅ |
| [anyscale](https://docs.litellm.ai/docs/providers/anyscale) | ✅ | ✅ | ✅ | ✅ |
| [voyage ai](https://docs.litellm.ai/docs/providers/voyage) | | | | | ✅ |
| [xinference [Xorbits Inference]](https://docs.litellm.ai/docs/providers/xinference) | | | | | ✅ |
| Provider | [Completion](https://docs.litellm.ai/docs/#basic-usage) | [Streaming](https://docs.litellm.ai/docs/completion/stream#streaming-responses) | [Async Completion](https://docs.litellm.ai/docs/completion/stream#async-completion) | [Async Streaming](https://docs.litellm.ai/docs/completion/stream#async-streaming) | [Async Embedding](https://docs.litellm.ai/docs/embedding/supported_embedding) | [Async Image Generation](https://docs.litellm.ai/docs/image_generation) |
| ----------------------------------------------------------------------------------- | ------------------------------------------------------- | ------------------------------------------------------------------------------- | ----------------------------------------------------------------------------------- | --------------------------------------------------------------------------------- | ----------------------------------------------------------------------------- | ----------------------------------------------------------------------- |
| [openai](https://docs.litellm.ai/docs/providers/openai) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ |
| [azure](https://docs.litellm.ai/docs/providers/azure) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ |
| [aws - sagemaker](https://docs.litellm.ai/docs/providers/aws_sagemaker) | ✅ | ✅ | ✅ | ✅ | ✅ |
| [aws - bedrock](https://docs.litellm.ai/docs/providers/bedrock) | ✅ | ✅ | ✅ | ✅ | ✅ |
| [google - vertex_ai [Gemini]](https://docs.litellm.ai/docs/providers/vertex) | ✅ | ✅ | ✅ | ✅ |
| [google - palm](https://docs.litellm.ai/docs/providers/palm) | ✅ | ✅ | ✅ | ✅ |
| [google AI Studio - gemini](https://docs.litellm.ai/docs/providers/gemini) | ✅ | | ✅ | | |
| [mistral ai api](https://docs.litellm.ai/docs/providers/mistral) | ✅ | ✅ | ✅ | ✅ | ✅ |
| [cloudflare AI Workers](https://docs.litellm.ai/docs/providers/cloudflare_workers) | ✅ | ✅ | ✅ | ✅ |
| [cohere](https://docs.litellm.ai/docs/providers/cohere) | ✅ | ✅ | ✅ | ✅ | ✅ |
| [anthropic](https://docs.litellm.ai/docs/providers/anthropic) | ✅ | ✅ | ✅ | ✅ |
| [huggingface](https://docs.litellm.ai/docs/providers/huggingface) | ✅ | ✅ | ✅ | ✅ | ✅ |
| [replicate](https://docs.litellm.ai/docs/providers/replicate) | ✅ | ✅ | ✅ | ✅ |
| [together_ai](https://docs.litellm.ai/docs/providers/togetherai) | ✅ | ✅ | ✅ | ✅ |
| [openrouter](https://docs.litellm.ai/docs/providers/openrouter) | ✅ | ✅ | ✅ | ✅ |
| [ai21](https://docs.litellm.ai/docs/providers/ai21) | ✅ | ✅ | ✅ | ✅ |
| [baseten](https://docs.litellm.ai/docs/providers/baseten) | ✅ | ✅ | ✅ | ✅ |
| [vllm](https://docs.litellm.ai/docs/providers/vllm) | ✅ | ✅ | ✅ | ✅ |
| [nlp_cloud](https://docs.litellm.ai/docs/providers/nlp_cloud) | ✅ | ✅ | ✅ | ✅ |
| [aleph alpha](https://docs.litellm.ai/docs/providers/aleph_alpha) | ✅ | ✅ | ✅ | ✅ |
| [petals](https://docs.litellm.ai/docs/providers/petals) | ✅ | ✅ | ✅ | ✅ |
| [ollama](https://docs.litellm.ai/docs/providers/ollama) | ✅ | ✅ | ✅ | ✅ |
| [deepinfra](https://docs.litellm.ai/docs/providers/deepinfra) | ✅ | ✅ | ✅ | ✅ |
| [perplexity-ai](https://docs.litellm.ai/docs/providers/perplexity) | ✅ | ✅ | ✅ | ✅ |
| [Groq AI](https://docs.litellm.ai/docs/providers/groq) | ✅ | ✅ | ✅ | ✅ |
| [anyscale](https://docs.litellm.ai/docs/providers/anyscale) | ✅ | ✅ | ✅ | ✅ |
| [voyage ai](https://docs.litellm.ai/docs/providers/voyage) | | | | | ✅ |
| [xinference [Xorbits Inference]](https://docs.litellm.ai/docs/providers/xinference) | | | | | ✅ |
[**Read the Docs**](https://docs.litellm.ai/docs/) [**Read the Docs**](https://docs.litellm.ai/docs/)
## Contributing ## Contributing
To contribute: Clone the repo locally -> Make a change -> Submit a PR with the change. To contribute: Clone the repo locally -> Make a change -> Submit a PR with the change.
Here's how to modify the repo locally: Here's how to modify the repo locally:
Step 1: Clone the repo Step 1: Clone the repo
``` ```
git clone https://github.com/BerriAI/litellm.git git clone https://github.com/BerriAI/litellm.git
``` ```
Step 2: Navigate into the project, and install dependencies: Step 2: Navigate into the project, and install dependencies:
``` ```
cd litellm cd litellm
poetry install poetry install
``` ```
Step 3: Test your change: Step 3: Test your change:
``` ```
cd litellm/tests # pwd: Documents/litellm/litellm/tests cd litellm/tests # pwd: Documents/litellm/litellm/tests
poetry run flake8 poetry run flake8
@ -243,6 +257,7 @@ poetry run pytest .
``` ```
Step 4: Submit a PR with your changes! 🚀 Step 4: Submit a PR with your changes! 🚀
- push your fork to your GitHub repo - push your fork to your GitHub repo
- submit a PR from there - submit a PR from there
@ -260,12 +275,14 @@ This covers:
- ✅ **Secure access with Single Sign-On** - ✅ **Secure access with Single Sign-On**
# Support / talk with founders # Support / talk with founders
- [Schedule Demo 👋](https://calendly.com/d/4mp-gd3-k5k/berriai-1-1-onboarding-litellm-hosted-version) - [Schedule Demo 👋](https://calendly.com/d/4mp-gd3-k5k/berriai-1-1-onboarding-litellm-hosted-version)
- [Community Discord 💭](https://discord.gg/wuPM9dRgDw) - [Community Discord 💭](https://discord.gg/wuPM9dRgDw)
- Our numbers 📞 +1 (770) 8783-106 / +1 (412) 618-6238 - Our numbers 📞 +1 (770) 8783-106 / +1 (412) 618-6238
- Our emails ✉️ ishaan@berri.ai / krrish@berri.ai - Our emails ✉️ ishaan@berri.ai / krrish@berri.ai
# Why did we build this # Why did we build this
- **Need for simplicity**: Our code started to get extremely complicated managing & translating calls between Azure, OpenAI and Cohere. - **Need for simplicity**: Our code started to get extremely complicated managing & translating calls between Azure, OpenAI and Cohere.
# Contributors # Contributors
@ -282,4 +299,3 @@ This covers:
<a href="https://github.com/BerriAI/litellm/graphs/contributors"> <a href="https://github.com/BerriAI/litellm/graphs/contributors">
<img src="https://contrib.rocks/image?repo=BerriAI/litellm" /> <img src="https://contrib.rocks/image?repo=BerriAI/litellm" />
</a> </a>

View file

@ -0,0 +1,348 @@
{
"cells": [
{
"cell_type": "markdown",
"metadata": {
"id": "4FbDOmcj2VkM"
},
"source": [
"## Use LiteLLM with Langfuse\n",
"https://docs.litellm.ai/docs/observability/langfuse_integration"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "21W8Woog26Ns"
},
"source": [
"## Install Dependencies"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "xrjKLBxhxu2L"
},
"outputs": [],
"source": [
"%pip install litellm lunary"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "jHEu-TjZ29PJ"
},
"source": [
"## Set Env Variables"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {
"id": "QWd9rTysxsWO"
},
"outputs": [],
"source": [
"import litellm\n",
"from litellm import completion\n",
"import os\n",
"\n",
"# from https://app.lunary.ai/\n",
"os.environ[\"LUNARY_PUBLIC_KEY\"] = \"\"\n",
"\n",
"\n",
"# LLM provider keys\n",
"# You can use any of the litellm supported providers: https://docs.litellm.ai/docs/providers\n",
"os.environ['OPENAI_API_KEY'] = \"\"\n"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "NodQl0hp3Lma"
},
"source": [
"## Set Lunary as a callback for sending data\n",
"## OpenAI completion call"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "vNAuwJY1yp_F",
"outputId": "c3a71e26-13f5-4379-fac9-409290ba79bb"
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"[Choices(finish_reason='stop', index=0, message=Message(content='Hello! How can I assist you today?', role='assistant'))]ModelResponse(id='chatcmpl-8xIWykI0GiJSmYtXYuB8Z363kpIBm', choices=[Choices(finish_reason='stop', index=0, message=Message(content='Hello! How can I assist you today?', role='assistant'))], created=1709143276, model='gpt-3.5-turbo-0125', object='chat.completion', system_fingerprint='fp_86156a94a0', usage=Usage(completion_tokens=9, prompt_tokens=15, total_tokens=24))\n",
"\n",
"[Lunary] Add event: {\n",
" \"event\": \"start\",\n",
" \"type\": \"llm\",\n",
" \"name\": \"gpt-3.5-turbo\",\n",
" \"runId\": \"a363776a-bd07-4474-bce2-193067f01b2e\",\n",
" \"timestamp\": \"2024-02-28T18:01:15.188153+00:00\",\n",
" \"input\": {\n",
" \"role\": \"user\",\n",
" \"content\": \"Hi \\ud83d\\udc4b - i'm openai\"\n",
" },\n",
" \"extra\": {},\n",
" \"runtime\": \"litellm\",\n",
" \"metadata\": {}\n",
"}\n",
"\n",
"\n",
"[Lunary] Add event: {\n",
" \"event\": \"end\",\n",
" \"type\": \"llm\",\n",
" \"runId\": \"a363776a-bd07-4474-bce2-193067f01b2e\",\n",
" \"timestamp\": \"2024-02-28T18:01:16.846581+00:00\",\n",
" \"output\": {\n",
" \"role\": \"assistant\",\n",
" \"content\": \"Hello! How can I assist you today?\"\n",
" },\n",
" \"runtime\": \"litellm\",\n",
" \"tokensUsage\": {\n",
" \"completion\": 9,\n",
" \"prompt\": 15\n",
" }\n",
"}\n",
"\n",
"\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"--- Logging error ---\n",
"Traceback (most recent call last):\n",
" File \"/Users/vince/Library/Caches/pypoetry/virtualenvs/litellm-7WKnDWGw-py3.12/lib/python3.12/site-packages/urllib3/connectionpool.py\", line 537, in _make_request\n",
" response = conn.getresponse()\n",
" ^^^^^^^^^^^^^^^^^^\n",
" File \"/Users/vince/Library/Caches/pypoetry/virtualenvs/litellm-7WKnDWGw-py3.12/lib/python3.12/site-packages/urllib3/connection.py\", line 466, in getresponse\n",
" httplib_response = super().getresponse()\n",
" ^^^^^^^^^^^^^^^^^^^^^\n",
" File \"/opt/homebrew/Cellar/python@3.12/3.12.2_1/Frameworks/Python.framework/Versions/3.12/lib/python3.12/http/client.py\", line 1423, in getresponse\n",
" response.begin()\n",
" File \"/opt/homebrew/Cellar/python@3.12/3.12.2_1/Frameworks/Python.framework/Versions/3.12/lib/python3.12/http/client.py\", line 331, in begin\n",
" version, status, reason = self._read_status()\n",
" ^^^^^^^^^^^^^^^^^^^\n",
" File \"/opt/homebrew/Cellar/python@3.12/3.12.2_1/Frameworks/Python.framework/Versions/3.12/lib/python3.12/http/client.py\", line 292, in _read_status\n",
" line = str(self.fp.readline(_MAXLINE + 1), \"iso-8859-1\")\n",
" ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n",
" File \"/opt/homebrew/Cellar/python@3.12/3.12.2_1/Frameworks/Python.framework/Versions/3.12/lib/python3.12/socket.py\", line 707, in readinto\n",
" return self._sock.recv_into(b)\n",
" ^^^^^^^^^^^^^^^^^^^^^^^\n",
"TimeoutError: timed out\n",
"\n",
"The above exception was the direct cause of the following exception:\n",
"\n",
"Traceback (most recent call last):\n",
" File \"/Users/vince/Library/Caches/pypoetry/virtualenvs/litellm-7WKnDWGw-py3.12/lib/python3.12/site-packages/requests/adapters.py\", line 486, in send\n",
" resp = conn.urlopen(\n",
" ^^^^^^^^^^^^^\n",
" File \"/Users/vince/Library/Caches/pypoetry/virtualenvs/litellm-7WKnDWGw-py3.12/lib/python3.12/site-packages/urllib3/connectionpool.py\", line 847, in urlopen\n",
" retries = retries.increment(\n",
" ^^^^^^^^^^^^^^^^^^\n",
" File \"/Users/vince/Library/Caches/pypoetry/virtualenvs/litellm-7WKnDWGw-py3.12/lib/python3.12/site-packages/urllib3/util/retry.py\", line 470, in increment\n",
" raise reraise(type(error), error, _stacktrace)\n",
" ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n",
" File \"/Users/vince/Library/Caches/pypoetry/virtualenvs/litellm-7WKnDWGw-py3.12/lib/python3.12/site-packages/urllib3/util/util.py\", line 39, in reraise\n",
" raise value\n",
" File \"/Users/vince/Library/Caches/pypoetry/virtualenvs/litellm-7WKnDWGw-py3.12/lib/python3.12/site-packages/urllib3/connectionpool.py\", line 793, in urlopen\n",
" response = self._make_request(\n",
" ^^^^^^^^^^^^^^^^^^^\n",
" File \"/Users/vince/Library/Caches/pypoetry/virtualenvs/litellm-7WKnDWGw-py3.12/lib/python3.12/site-packages/urllib3/connectionpool.py\", line 539, in _make_request\n",
" self._raise_timeout(err=e, url=url, timeout_value=read_timeout)\n",
" File \"/Users/vince/Library/Caches/pypoetry/virtualenvs/litellm-7WKnDWGw-py3.12/lib/python3.12/site-packages/urllib3/connectionpool.py\", line 370, in _raise_timeout\n",
" raise ReadTimeoutError(\n",
"urllib3.exceptions.ReadTimeoutError: HTTPConnectionPool(host='localhost', port=3333): Read timed out. (read timeout=5)\n",
"\n",
"During handling of the above exception, another exception occurred:\n",
"\n",
"Traceback (most recent call last):\n",
" File \"/Users/vince/Library/Caches/pypoetry/virtualenvs/litellm-7WKnDWGw-py3.12/lib/python3.12/site-packages/lunary/consumer.py\", line 59, in send_batch\n",
" response = requests.post(\n",
" ^^^^^^^^^^^^^^\n",
" File \"/Users/vince/Library/Caches/pypoetry/virtualenvs/litellm-7WKnDWGw-py3.12/lib/python3.12/site-packages/requests/api.py\", line 115, in post\n",
" return request(\"post\", url, data=data, json=json, **kwargs)\n",
" ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n",
" File \"/Users/vince/Library/Caches/pypoetry/virtualenvs/litellm-7WKnDWGw-py3.12/lib/python3.12/site-packages/requests/api.py\", line 59, in request\n",
" return session.request(method=method, url=url, **kwargs)\n",
" ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n",
" File \"/Users/vince/Library/Caches/pypoetry/virtualenvs/litellm-7WKnDWGw-py3.12/lib/python3.12/site-packages/requests/sessions.py\", line 589, in request\n",
" resp = self.send(prep, **send_kwargs)\n",
" ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n",
" File \"/Users/vince/Library/Caches/pypoetry/virtualenvs/litellm-7WKnDWGw-py3.12/lib/python3.12/site-packages/requests/sessions.py\", line 703, in send\n",
" r = adapter.send(request, **kwargs)\n",
" ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n",
" File \"/Users/vince/Library/Caches/pypoetry/virtualenvs/litellm-7WKnDWGw-py3.12/lib/python3.12/site-packages/requests/adapters.py\", line 532, in send\n",
" raise ReadTimeout(e, request=request)\n",
"requests.exceptions.ReadTimeout: HTTPConnectionPool(host='localhost', port=3333): Read timed out. (read timeout=5)\n",
"\n",
"During handling of the above exception, another exception occurred:\n",
"\n",
"Traceback (most recent call last):\n",
" File \"/opt/homebrew/Cellar/python@3.12/3.12.2_1/Frameworks/Python.framework/Versions/3.12/lib/python3.12/logging/__init__.py\", line 1160, in emit\n",
" msg = self.format(record)\n",
" ^^^^^^^^^^^^^^^^^^^\n",
" File \"/opt/homebrew/Cellar/python@3.12/3.12.2_1/Frameworks/Python.framework/Versions/3.12/lib/python3.12/logging/__init__.py\", line 999, in format\n",
" return fmt.format(record)\n",
" ^^^^^^^^^^^^^^^^^^\n",
" File \"/opt/homebrew/Cellar/python@3.12/3.12.2_1/Frameworks/Python.framework/Versions/3.12/lib/python3.12/logging/__init__.py\", line 703, in format\n",
" record.message = record.getMessage()\n",
" ^^^^^^^^^^^^^^^^^^^\n",
" File \"/opt/homebrew/Cellar/python@3.12/3.12.2_1/Frameworks/Python.framework/Versions/3.12/lib/python3.12/logging/__init__.py\", line 392, in getMessage\n",
" msg = msg % self.args\n",
" ~~~~^~~~~~~~~~~\n",
"TypeError: not all arguments converted during string formatting\n",
"Call stack:\n",
" File \"/opt/homebrew/Cellar/python@3.12/3.12.2_1/Frameworks/Python.framework/Versions/3.12/lib/python3.12/threading.py\", line 1030, in _bootstrap\n",
" self._bootstrap_inner()\n",
" File \"/opt/homebrew/Cellar/python@3.12/3.12.2_1/Frameworks/Python.framework/Versions/3.12/lib/python3.12/threading.py\", line 1073, in _bootstrap_inner\n",
" self.run()\n",
" File \"/Users/vince/Library/Caches/pypoetry/virtualenvs/litellm-7WKnDWGw-py3.12/lib/python3.12/site-packages/lunary/consumer.py\", line 24, in run\n",
" self.send_batch()\n",
" File \"/Users/vince/Library/Caches/pypoetry/virtualenvs/litellm-7WKnDWGw-py3.12/lib/python3.12/site-packages/lunary/consumer.py\", line 73, in send_batch\n",
" logging.error(\"[Lunary] Error sending events\", e)\n",
"Message: '[Lunary] Error sending events'\n",
"Arguments: (ReadTimeout(ReadTimeoutError(\"HTTPConnectionPool(host='localhost', port=3333): Read timed out. (read timeout=5)\")),)\n"
]
}
],
"source": [
"# set langfuse as a callback, litellm will send the data to langfuse\n",
"litellm.success_callback = [\"lunary\"]\n",
"\n",
"# openai call\n",
"response = completion(\n",
" model=\"gpt-3.5-turbo\",\n",
" messages=[\n",
" {\"role\": \"user\", \"content\": \"Hi 👋 - i'm openai\"}\n",
" ]\n",
")\n",
"\n",
"print(response)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Using LiteLLM with Lunary Templates\n",
"\n",
"You can use LiteLLM seamlessly with Lunary templates to manage your prompts and completions.\n",
"\n",
"Assuming you have created a template \"test-template\" with a variable \"question\", you can use it like this:"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "2PMSLc_FziJL",
"outputId": "1c37605e-b406-4ffc-aafd-e1983489c6be"
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"[Choices(finish_reason='stop', index=0, message=Message(content='Hello! How can I assist you today?', role='assistant'))]ModelResponse(id='chatcmpl-8xIXegwpudg4YKnLB6pmpFGXqTHcH', choices=[Choices(finish_reason='stop', index=0, message=Message(content='Hello! How can I assist you today?', role='assistant'))], created=1709143318, model='gpt-4-0125-preview', object='chat.completion', system_fingerprint='fp_c8aa5a06d6', usage=Usage(completion_tokens=9, prompt_tokens=21, total_tokens=30))\n",
"\n",
"[Lunary] Add event: {\n",
" \"event\": \"start\",\n",
" \"type\": \"llm\",\n",
" \"name\": \"gpt-4-turbo-preview\",\n",
" \"runId\": \"3a5b698d-cb55-4b3b-ab6d-04d2b99e40cb\",\n",
" \"timestamp\": \"2024-02-28T18:01:56.746249+00:00\",\n",
" \"input\": [\n",
" {\n",
" \"role\": \"system\",\n",
" \"content\": \"You are an helpful assistant.\"\n",
" },\n",
" {\n",
" \"role\": \"user\",\n",
" \"content\": \"Hi! Hello!\"\n",
" }\n",
" ],\n",
" \"extra\": {\n",
" \"temperature\": 1,\n",
" \"max_tokens\": 100\n",
" },\n",
" \"runtime\": \"litellm\",\n",
" \"metadata\": {}\n",
"}\n",
"\n",
"\n",
"[Lunary] Add event: {\n",
" \"event\": \"end\",\n",
" \"type\": \"llm\",\n",
" \"runId\": \"3a5b698d-cb55-4b3b-ab6d-04d2b99e40cb\",\n",
" \"timestamp\": \"2024-02-28T18:01:58.741244+00:00\",\n",
" \"output\": {\n",
" \"role\": \"assistant\",\n",
" \"content\": \"Hello! How can I assist you today?\"\n",
" },\n",
" \"runtime\": \"litellm\",\n",
" \"tokensUsage\": {\n",
" \"completion\": 9,\n",
" \"prompt\": 21\n",
" }\n",
"}\n",
"\n",
"\n"
]
}
],
"source": [
"import lunary\n",
"from litellm import completion\n",
"\n",
"template = lunary.render_template(\"test-template\", {\"question\": \"Hello!\"})\n",
"\n",
"response = completion(**template)\n",
"\n",
"print(response)"
]
}
],
"metadata": {
"colab": {
"provenance": []
},
"kernelspec": {
"display_name": "Python 3",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.12.2"
}
},
"nbformat": 4,
"nbformat_minor": 0
}

View file

@ -33,7 +33,7 @@
- Call all models using the OpenAI format - `completion(model, messages)` - Call all models using the OpenAI format - `completion(model, messages)`
- Text responses will always be available at `['choices'][0]['message']['content']` - Text responses will always be available at `['choices'][0]['message']['content']`
- **Error Handling** Using Model Fallbacks (if `GPT-4` fails, try `llama2`) - **Error Handling** Using Model Fallbacks (if `GPT-4` fails, try `llama2`)
- **Logging** - Log Requests, Responses and Errors to `Supabase`, `Posthog`, `Mixpanel`, `Sentry`, `LLMonitor`,`Athina`, `Helicone` (Any of the supported providers here: https://litellm.readthedocs.io/en/latest/advanced/ - **Logging** - Log Requests, Responses and Errors to `Supabase`, `Posthog`, `Mixpanel`, `Sentry`, `Lunary`,`Athina`, `Helicone` (Any of the supported providers here: https://litellm.readthedocs.io/en/latest/advanced/
**Example: Logs sent to Supabase** **Example: Logs sent to Supabase**
<img width="1015" alt="Screenshot 2023-08-11 at 4 02 46 PM" src="https://github.com/ishaan-jaff/proxy-server/assets/29436595/237557b8-ba09-4917-982c-8f3e1b2c8d08"> <img width="1015" alt="Screenshot 2023-08-11 at 4 02 46 PM" src="https://github.com/ishaan-jaff/proxy-server/assets/29436595/237557b8-ba09-4917-982c-8f3e1b2c8d08">

View file

@ -30,13 +30,15 @@ Email us @ krrish@berri.ai
Next Steps 👉 [Call all supported models - e.g. Claude-2, Llama2-70b, etc.](./proxy_api.md#supported-models) Next Steps 👉 [Call all supported models - e.g. Claude-2, Llama2-70b, etc.](./proxy_api.md#supported-models)
More details 👉 More details 👉
* [Completion() function details](./completion/)
* [All supported models / providers on LiteLLM](./providers/) - [Completion() function details](./completion/)
* [Build your own OpenAI proxy](https://github.com/BerriAI/liteLLM-proxy/tree/main) - [All supported models / providers on LiteLLM](./providers/)
- [Build your own OpenAI proxy](https://github.com/BerriAI/liteLLM-proxy/tree/main)
## streaming ## streaming
Same example from before. Just pass in `stream=True` in the completion args. Same example from before. Just pass in `stream=True` in the completion args.
```python ```python
from litellm import completion from litellm import completion
@ -56,8 +58,9 @@ print(response)
``` ```
More details 👉 More details 👉
* [streaming + async](./completion/stream.md)
* [tutorial for streaming Llama2 on TogetherAI](./tutorials/TogetherAI_liteLLM.md) - [streaming + async](./completion/stream.md)
- [tutorial for streaming Llama2 on TogetherAI](./tutorials/TogetherAI_liteLLM.md)
## exception handling ## exception handling
@ -76,25 +79,28 @@ except OpenAIError as e:
``` ```
## Logging Observability - Log LLM Input/Output ([Docs](https://docs.litellm.ai/docs/observability/callbacks)) ## Logging Observability - Log LLM Input/Output ([Docs](https://docs.litellm.ai/docs/observability/callbacks))
LiteLLM exposes pre defined callbacks to send data to Langfuse, LLMonitor, Helicone, Promptlayer, Traceloop, Slack
LiteLLM exposes pre defined callbacks to send data to Lunary, Langfuse, Helicone, Promptlayer, Traceloop, Slack
```python ```python
from litellm import completion from litellm import completion
## set env variables for logging tools ## set env variables for logging tools
os.environ["LUNARY_PUBLIC_KEY"] = "your-lunary-public-key"
os.environ["LANGFUSE_PUBLIC_KEY"] = "" os.environ["LANGFUSE_PUBLIC_KEY"] = ""
os.environ["LANGFUSE_SECRET_KEY"] = "" os.environ["LANGFUSE_SECRET_KEY"] = ""
os.environ["LLMONITOR_APP_ID"] = "your-llmonitor-app-id"
os.environ["OPENAI_API_KEY"] os.environ["OPENAI_API_KEY"]
# set callbacks # set callbacks
litellm.success_callback = ["langfuse", "llmonitor"] # log input/output to langfuse, llmonitor, supabase litellm.success_callback = ["lunary", "langfuse"] # log input/output to langfuse, lunary, supabase
#openai call #openai call
response = completion(model="gpt-3.5-turbo", messages=[{"role": "user", "content": "Hi 👋 - i'm openai"}]) response = completion(model="gpt-3.5-turbo", messages=[{"role": "user", "content": "Hi 👋 - i'm openai"}])
``` ```
More details 👉 More details 👉
* [exception mapping](./exception_mapping.md)
* [retries + model fallbacks for completion()](./completion/reliable_completions.md) - [exception mapping](./exception_mapping.md)
* [tutorial for model fallbacks with completion()](./tutorials/fallbacks.md) - [retries + model fallbacks for completion()](./completion/reliable_completions.md)
- [tutorial for model fallbacks with completion()](./tutorials/fallbacks.md)

View file

@ -5,7 +5,6 @@ import TabItem from '@theme/TabItem';
https://github.com/BerriAI/litellm https://github.com/BerriAI/litellm
## **Call 100+ LLMs using the same Input/Output Format** ## **Call 100+ LLMs using the same Input/Output Format**
- Translate inputs to provider's `completion`, `embedding`, and `image_generation` endpoints - Translate inputs to provider's `completion`, `embedding`, and `image_generation` endpoints
@ -21,6 +20,7 @@ You can use litellm through either:
## LiteLLM Python SDK ## LiteLLM Python SDK
### Basic usage ### Basic usage
<a target="_blank" href="https://colab.research.google.com/github/BerriAI/litellm/blob/main/cookbook/liteLLM_Getting_Started.ipynb"> <a target="_blank" href="https://colab.research.google.com/github/BerriAI/litellm/blob/main/cookbook/liteLLM_Getting_Started.ipynb">
<img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/> <img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/>
</a> </a>
@ -28,6 +28,7 @@ You can use litellm through either:
```shell ```shell
pip install litellm pip install litellm
``` ```
<Tabs> <Tabs>
<TabItem value="openai" label="OpenAI"> <TabItem value="openai" label="OpenAI">
@ -120,7 +121,6 @@ response = completion(
</TabItem> </TabItem>
<TabItem value="ollama" label="Ollama"> <TabItem value="ollama" label="Ollama">
```python ```python
@ -132,6 +132,7 @@ response = completion(
api_base="http://localhost:11434" api_base="http://localhost:11434"
) )
``` ```
</TabItem> </TabItem>
<TabItem value="or" label="Openrouter"> <TabItem value="or" label="Openrouter">
@ -147,12 +148,14 @@ response = completion(
messages = [{ "content": "Hello, how are you?","role": "user"}], messages = [{ "content": "Hello, how are you?","role": "user"}],
) )
``` ```
</TabItem> </TabItem>
</Tabs> </Tabs>
### Streaming ### Streaming
Set `stream=True` in the `completion` args. Set `stream=True` in the `completion` args.
<Tabs> <Tabs>
<TabItem value="openai" label="OpenAI"> <TabItem value="openai" label="OpenAI">
@ -250,7 +253,6 @@ response = completion(
</TabItem> </TabItem>
<TabItem value="ollama" label="Ollama"> <TabItem value="ollama" label="Ollama">
```python ```python
@ -263,6 +265,7 @@ response = completion(
stream=True, stream=True,
) )
``` ```
</TabItem> </TabItem>
<TabItem value="or" label="Openrouter"> <TabItem value="or" label="Openrouter">
@ -279,6 +282,7 @@ response = completion(
stream=True, stream=True,
) )
``` ```
</TabItem> </TabItem>
</Tabs> </Tabs>
@ -300,19 +304,20 @@ except OpenAIError as e:
``` ```
### Logging Observability - Log LLM Input/Output ([Docs](https://docs.litellm.ai/docs/observability/callbacks)) ### Logging Observability - Log LLM Input/Output ([Docs](https://docs.litellm.ai/docs/observability/callbacks))
LiteLLM exposes pre defined callbacks to send data to Langfuse, LLMonitor, Helicone, Promptlayer, Traceloop, Slack LiteLLM exposes pre defined callbacks to send data to Lunary, Langfuse, Helicone, Promptlayer, Traceloop, Slack
```python ```python
from litellm import completion from litellm import completion
## set env variables for logging tools ## set env variables for logging tools
os.environ["LANGFUSE_PUBLIC_KEY"] = "" os.environ["LANGFUSE_PUBLIC_KEY"] = ""
os.environ["LANGFUSE_SECRET_KEY"] = "" os.environ["LANGFUSE_SECRET_KEY"] = ""
os.environ["LLMONITOR_APP_ID"] = "your-llmonitor-app-id" os.environ["LUNARY_PUBLIC_KEY"] = "your-lunary-public-key"
os.environ["OPENAI_API_KEY"] os.environ["OPENAI_API_KEY"]
# set callbacks # set callbacks
litellm.success_callback = ["langfuse", "llmonitor"] # log input/output to langfuse, llmonitor, supabase litellm.success_callback = ["lunary", "langfuse"] # log input/output to lunary, langfuse, supabase
#openai call #openai call
response = completion(model="gpt-3.5-turbo", messages=[{"role": "user", "content": "Hi 👋 - i'm openai"}]) response = completion(model="gpt-3.5-turbo", messages=[{"role": "user", "content": "Hi 👋 - i'm openai"}])
@ -358,6 +363,7 @@ Track spend across multiple projects/people
![ui_3](https://github.com/BerriAI/litellm/assets/29436595/47c97d5e-b9be-4839-b28c-43d7f4f10033) ![ui_3](https://github.com/BerriAI/litellm/assets/29436595/47c97d5e-b9be-4839-b28c-43d7f4f10033)
The proxy provides: The proxy provides:
1. [Hooks for auth](https://docs.litellm.ai/docs/proxy/virtual_keys#custom-auth) 1. [Hooks for auth](https://docs.litellm.ai/docs/proxy/virtual_keys#custom-auth)
2. [Hooks for logging](https://docs.litellm.ai/docs/proxy/logging#step-1---create-your-custom-litellm-callback-class) 2. [Hooks for logging](https://docs.litellm.ai/docs/proxy/logging#step-1---create-your-custom-litellm-callback-class)
3. [Cost tracking](https://docs.litellm.ai/docs/proxy/virtual_keys#tracking-spend) 3. [Cost tracking](https://docs.litellm.ai/docs/proxy/virtual_keys#tracking-spend)
@ -372,6 +378,7 @@ pip install 'litellm[proxy]'
``` ```
#### Step 1: Start litellm proxy #### Step 1: Start litellm proxy
```shell ```shell
$ litellm --model huggingface/bigcode/starcoder $ litellm --model huggingface/bigcode/starcoder
@ -379,6 +386,7 @@ $ litellm --model huggingface/bigcode/starcoder
``` ```
#### Step 2: Make ChatCompletions Request to Proxy #### Step 2: Make ChatCompletions Request to Proxy
```python ```python
import openai # openai v1.0.0+ import openai # openai v1.0.0+
client = openai.OpenAI(api_key="anything",base_url="http://0.0.0.0:4000") # set proxy to base_url client = openai.OpenAI(api_key="anything",base_url="http://0.0.0.0:4000") # set proxy to base_url
@ -394,6 +402,7 @@ print(response)
``` ```
## More details ## More details
* [exception mapping](./exception_mapping.md)
* [retries + model fallbacks for completion()](./completion/reliable_completions.md) - [exception mapping](./exception_mapping.md)
* [proxy virtual keys & spend management](./tutorials/fallbacks.md) - [retries + model fallbacks for completion()](./completion/reliable_completions.md)
- [proxy virtual keys & spend management](./tutorials/fallbacks.md)

View file

@ -7,7 +7,7 @@ liteLLM provides `input_callbacks`, `success_callbacks` and `failure_callbacks`,
liteLLM supports: liteLLM supports:
- [Custom Callback Functions](https://docs.litellm.ai/docs/observability/custom_callback) - [Custom Callback Functions](https://docs.litellm.ai/docs/observability/custom_callback)
- [LLMonitor](https://llmonitor.com/docs) - [Lunary](https://lunary.ai/docs)
- [Helicone](https://docs.helicone.ai/introduction) - [Helicone](https://docs.helicone.ai/introduction)
- [Traceloop](https://traceloop.com/docs) - [Traceloop](https://traceloop.com/docs)
- [Athina](https://docs.athina.ai/) - [Athina](https://docs.athina.ai/)
@ -22,15 +22,15 @@ from litellm import completion
# set callbacks # set callbacks
litellm.input_callback=["sentry"] # for sentry breadcrumbing - logs the input being sent to the api litellm.input_callback=["sentry"] # for sentry breadcrumbing - logs the input being sent to the api
litellm.success_callback=["posthog", "helicone", "llmonitor", "athina"] litellm.success_callback=["posthog", "helicone", "lunary", "athina"]
litellm.failure_callback=["sentry", "llmonitor"] litellm.failure_callback=["sentry", "lunary"]
## set env variables ## set env variables
os.environ['SENTRY_DSN'], os.environ['SENTRY_API_TRACE_RATE']= "" os.environ['SENTRY_DSN'], os.environ['SENTRY_API_TRACE_RATE']= ""
os.environ['POSTHOG_API_KEY'], os.environ['POSTHOG_API_URL'] = "api-key", "api-url" os.environ['POSTHOG_API_KEY'], os.environ['POSTHOG_API_URL'] = "api-key", "api-url"
os.environ["HELICONE_API_KEY"] = "" os.environ["HELICONE_API_KEY"] = ""
os.environ["TRACELOOP_API_KEY"] = "" os.environ["TRACELOOP_API_KEY"] = ""
os.environ["LLMONITOR_APP_ID"] = "" os.environ["LUNARY_PUBLIC_KEY"] = ""
os.environ["ATHINA_API_KEY"] = "" os.environ["ATHINA_API_KEY"] = ""
response = completion(model="gpt-3.5-turbo", messages=messages) response = completion(model="gpt-3.5-turbo", messages=messages)

View file

@ -1,65 +0,0 @@
# LLMonitor Tutorial
[LLMonitor](https://llmonitor.com/) is an open-source observability platform that provides cost tracking, user tracking and powerful agent tracing.
<video controls width='900' >
<source src='https://llmonitor.com/videos/demo-annotated.mp4'/>
</video>
## Use LLMonitor to log requests across all LLM Providers (OpenAI, Azure, Anthropic, Cohere, Replicate, PaLM)
liteLLM provides `callbacks`, making it easy for you to log data depending on the status of your responses.
:::info
We want to learn how we can make the callbacks better! Meet the [founders](https://calendly.com/d/4mp-gd3-k5k/berriai-1-1-onboarding-litellm-hosted-version) or
join our [discord](https://discord.gg/wuPM9dRgDw)
:::
### Using Callbacks
First, sign up to get an app ID on the [LLMonitor dashboard](https://llmonitor.com).
Use just 2 lines of code, to instantly log your responses **across all providers** with llmonitor:
```python
litellm.success_callback = ["llmonitor"]
litellm.failure_callback = ["llmonitor"]
```
Complete code
```python
from litellm import completion
## set env variables
os.environ["LLMONITOR_APP_ID"] = "your-llmonitor-app-id"
# Optional: os.environ["LLMONITOR_API_URL"] = "self-hosting-url"
os.environ["OPENAI_API_KEY"], os.environ["COHERE_API_KEY"] = "", ""
# set callbacks
litellm.success_callback = ["llmonitor"]
litellm.failure_callback = ["llmonitor"]
#openai call
response = completion(
model="gpt-3.5-turbo",
messages=[{"role": "user", "content": "Hi 👋 - i'm openai"}],
user="ishaan_litellm"
)
#cohere call
response = completion(
model="command-nightly",
messages=[{"role": "user", "content": "Hi 👋 - i'm cohere"}],
user="ishaan_litellm"
)
```
## Support & Talk to Founders
- [Schedule Demo 👋](https://calendly.com/d/4mp-gd3-k5k/berriai-1-1-onboarding-litellm-hosted-version)
- [Community Discord 💭](https://discord.gg/wuPM9dRgDw)
- Our numbers 📞 +1 (770) 8783-106 / +1 (412) 618-6238
- Our emails ✉️ ishaan@berri.ai / krrish@berri.ai
- Meet the LLMonitor team on [Discord](http://discord.com/invite/8PafSG58kK) or via [email](mailto:vince@llmonitor.com).

View file

@ -0,0 +1,82 @@
# Lunary - Logging and tracing LLM input/output
[Lunary](https://lunary.ai/) is an open-source AI developer platform providing observability, prompt management, and evaluation tools for AI developers.
<video controls width='900' >
<source src='https://lunary.ai/videos/demo-annotated.mp4'/>
</video>
## Use Lunary to log requests across all LLM Providers (OpenAI, Azure, Anthropic, Cohere, Replicate, PaLM)
liteLLM provides `callbacks`, making it easy for you to log data depending on the status of your responses.
:::info
We want to learn how we can make the callbacks better! Meet the [founders](https://calendly.com/d/4mp-gd3-k5k/berriai-1-1-onboarding-litellm-hosted-version) or
join our [discord](https://discord.gg/wuPM9dRgDw)
:::
### Using Callbacks
First, sign up to get a public key on the [Lunary dashboard](https://lunary.ai).
Use just 2 lines of code, to instantly log your responses **across all providers** with lunary:
```python
litellm.success_callback = ["lunary"]
litellm.failure_callback = ["lunary"]
```
Complete code
```python
from litellm import completion
## set env variables
os.environ["LUNARY_PUBLIC_KEY"] = "your-lunary-public-key"
os.environ["OPENAI_API_KEY"] = ""
# set callbacks
litellm.success_callback = ["lunary"]
litellm.failure_callback = ["lunary"]
#openai call
response = completion(
model="gpt-3.5-turbo",
messages=[{"role": "user", "content": "Hi 👋 - i'm openai"}],
user="ishaan_litellm"
)
```
## Templates
You can use Lunary to manage prompt templates and use them across all your LLM providers.
Make sure to have `lunary` installed:
```bash
pip install lunary
```
Then, use the following code to pull templates into Lunary:
```python
from litellm import completion
from lunary
template = lunary.render_template("template-slug", {
"name": "John", # Inject variables
})
litellm.success_callback = ["lunary"]
result = completion(**template)
```
## Support & Talk to Founders
- Meet the Lunary team via [email](mailto:hello@lunary.ai).
- [Schedule Demo 👋](https://calendly.com/d/4mp-gd3-k5k/berriai-1-1-onboarding-litellm-hosted-version)
- [Community Discord 💭](https://discord.gg/wuPM9dRgDw)
- Our numbers 📞 +1 (770) 8783-106 / +1 (412) 618-6238
- Our emails ✉️ ishaan@berri.ai / krrish@berri.ai

View file

@ -175,6 +175,15 @@ print(response)
## Usage - Function Calling ## Usage - Function Calling
:::info
Claude returns it's output as an XML Tree. [Here is how we translate it](https://github.com/BerriAI/litellm/blob/49642a5b00a53b1babc1a753426a8afcac85dbbe/litellm/llms/prompt_templates/factory.py#L734).
You can see the raw response via `response._hidden_params["original_response"]`.
Claude hallucinates, e.g. returning the list param `value` as `<value>\n<item>apple</item>\n<item>banana</item>\n</value>` or `<value>\n<list>\n<item>apple</item>\n<item>banana</item>\n</list>\n</value>`.
:::
```python ```python
from litellm import completion from litellm import completion

View file

@ -146,6 +146,15 @@ print(response)
## Usage - Function Calling ## Usage - Function Calling
:::info
Claude returns it's output as an XML Tree. [Here is how we translate it](https://github.com/BerriAI/litellm/blob/49642a5b00a53b1babc1a753426a8afcac85dbbe/litellm/llms/prompt_templates/factory.py#L734).
You can see the raw response via `response._hidden_params["original_response"]`.
Claude hallucinates, e.g. returning the list param `value` as `<value>\n<item>apple</item>\n<item>banana</item>\n</value>` or `<value>\n<list>\n<item>apple</item>\n<item>banana</item>\n</list>\n</value>`.
:::
```python ```python
from litellm import completion from litellm import completion

View file

@ -176,8 +176,7 @@ general_settings:
master_key: sk-1234 master_key: sk-1234
litellm_settings: litellm_settings:
max_budget: 10 # global budget for proxy max_end_user_budget: 0.0001 # budget for 'user' passed to /chat/completions
max_user_budget: 0.0001 # budget for 'user' passed to /chat/completions
``` ```
2. Make a /chat/completions call, pass 'user' - First call Works 2. Make a /chat/completions call, pass 'user' - First call Works

View file

@ -22,10 +22,10 @@ const sidebars = {
type: "category", type: "category",
label: "💥 OpenAI Proxy Server", label: "💥 OpenAI Proxy Server",
link: { link: {
type: 'generated-index', type: "generated-index",
title: '💥 OpenAI Proxy Server', title: "💥 OpenAI Proxy Server",
description: `Proxy Server to call 100+ LLMs in a unified interface & track spend, set budgets per virtual key/user`, description: `Proxy Server to call 100+ LLMs in a unified interface & track spend, set budgets per virtual key/user`,
slug: '/simple_proxy', slug: "/simple_proxy",
}, },
items: [ items: [
"proxy/quick_start", "proxy/quick_start",
@ -33,9 +33,9 @@ const sidebars = {
"proxy/prod", "proxy/prod",
"proxy/configs", "proxy/configs",
{ {
type: 'link', type: "link",
label: '📖 All Endpoints', label: "📖 All Endpoints",
href: 'https://litellm-api.up.railway.app/', href: "https://litellm-api.up.railway.app/",
}, },
"proxy/enterprise", "proxy/enterprise",
"proxy/user_keys", "proxy/user_keys",
@ -46,12 +46,9 @@ const sidebars = {
"proxy/cost_tracking", "proxy/cost_tracking",
"proxy/token_auth", "proxy/token_auth",
{ {
"type": "category", type: "category",
"label": "🔥 Load Balancing", label: "🔥 Load Balancing",
"items": [ items: ["proxy/load_balancing", "proxy/reliability"],
"proxy/load_balancing",
"proxy/reliability",
]
}, },
"proxy/model_management", "proxy/model_management",
"proxy/health", "proxy/health",
@ -60,13 +57,9 @@ const sidebars = {
"proxy/prompt_injection", "proxy/prompt_injection",
"proxy/caching", "proxy/caching",
{ {
"type": "category", type: "category",
"label": "Logging, Alerting", label: "Logging, Alerting",
"items": [ items: ["proxy/logging", "proxy/alerting", "proxy/streaming_logging"],
"proxy/logging",
"proxy/alerting",
"proxy/streaming_logging",
]
}, },
"proxy/grafana_metrics", "proxy/grafana_metrics",
"proxy/call_hooks", "proxy/call_hooks",
@ -78,10 +71,10 @@ const sidebars = {
type: "category", type: "category",
label: "Completion()", label: "Completion()",
link: { link: {
type: 'generated-index', type: "generated-index",
title: 'Completion()', title: "Completion()",
description: 'Details on the completion() function', description: "Details on the completion() function",
slug: '/completion', slug: "/completion",
}, },
items: [ items: [
"completion/input", "completion/input",
@ -112,10 +105,11 @@ const sidebars = {
type: "category", type: "category",
label: "Supported Models & Providers", label: "Supported Models & Providers",
link: { link: {
type: 'generated-index', type: "generated-index",
title: 'Providers', title: "Providers",
description: 'Learn how to deploy + call models from different providers on LiteLLM', description:
slug: '/providers', "Learn how to deploy + call models from different providers on LiteLLM",
slug: "/providers",
}, },
items: [ items: [
"providers/openai", "providers/openai",
@ -150,7 +144,7 @@ const sidebars = {
"providers/openrouter", "providers/openrouter",
"providers/custom_openai_proxy", "providers/custom_openai_proxy",
"providers/petals", "providers/petals",
] ],
}, },
"proxy/custom_pricing", "proxy/custom_pricing",
"routing", "routing",
@ -165,9 +159,10 @@ const sidebars = {
type: "category", type: "category",
label: "Logging & Observability", label: "Logging & Observability",
items: [ items: [
'debugging/local_debugging', "debugging/local_debugging",
"observability/callbacks", "observability/callbacks",
"observability/custom_callback", "observability/custom_callback",
"observability/lunary_integration",
"observability/langfuse_integration", "observability/langfuse_integration",
"observability/sentry", "observability/sentry",
"observability/promptlayer_integration", "observability/promptlayer_integration",
@ -176,7 +171,6 @@ const sidebars = {
"observability/slack_integration", "observability/slack_integration",
"observability/traceloop_integration", "observability/traceloop_integration",
"observability/athina_integration", "observability/athina_integration",
"observability/llmonitor_integration",
"observability/helicone_integration", "observability/helicone_integration",
"observability/supabase_integration", "observability/supabase_integration",
`observability/telemetry`, `observability/telemetry`,
@ -184,19 +178,19 @@ const sidebars = {
}, },
"caching/redis_cache", "caching/redis_cache",
{ {
type: 'category', type: "category",
label: 'Tutorials', label: "Tutorials",
items: [ items: [
'tutorials/azure_openai', 'tutorials/azure_openai',
'tutorials/instructor', 'tutorials/instructor',
'tutorials/oobabooga', 'tutorials/oobabooga',
"tutorials/gradio_integration", "tutorials/gradio_integration",
'tutorials/huggingface_codellama', "tutorials/huggingface_codellama",
'tutorials/huggingface_tutorial', "tutorials/huggingface_tutorial",
'tutorials/TogetherAI_liteLLM', "tutorials/TogetherAI_liteLLM",
'tutorials/finetuned_chat_gpt', "tutorials/finetuned_chat_gpt",
'tutorials/sagemaker_llms', "tutorials/sagemaker_llms",
'tutorials/text_completion', "tutorials/text_completion",
"tutorials/first_playground", "tutorials/first_playground",
"tutorials/model_fallbacks", "tutorials/model_fallbacks",
], ],
@ -204,24 +198,23 @@ const sidebars = {
{ {
type: "category", type: "category",
label: "LangChain, LlamaIndex Integration", label: "LangChain, LlamaIndex Integration",
items: [ items: ["langchain/langchain"],
"langchain/langchain"
],
}, },
{ {
type: 'category', type: "category",
label: 'Extras', label: "Extras",
items: [ items: [
'extras/contributing', "extras/contributing",
"proxy_server", "proxy_server",
{ {
type: "category", type: "category",
label: "❤️ 🚅 Projects built on LiteLLM", label: "❤️ 🚅 Projects built on LiteLLM",
link: { link: {
type: 'generated-index', type: "generated-index",
title: 'Projects built on LiteLLM', title: "Projects built on LiteLLM",
description: 'Learn how to deploy + call models from different providers on LiteLLM', description:
slug: '/project', "Learn how to deploy + call models from different providers on LiteLLM",
slug: "/project",
}, },
items: [ items: [
"projects/Docq.AI", "projects/Docq.AI",
@ -237,7 +230,7 @@ const sidebars = {
"projects/GPT Migrate", "projects/GPT Migrate",
"projects/YiVal", "projects/YiVal",
"projects/LiteLLM Proxy", "projects/LiteLLM Proxy",
] ],
}, },
], ],
}, },

View file

@ -5,7 +5,6 @@ import TabItem from '@theme/TabItem';
https://github.com/BerriAI/litellm https://github.com/BerriAI/litellm
## **Call 100+ LLMs using the same Input/Output Format** ## **Call 100+ LLMs using the same Input/Output Format**
- Translate inputs to provider's `completion`, `embedding`, and `image_generation` endpoints - Translate inputs to provider's `completion`, `embedding`, and `image_generation` endpoints
@ -14,6 +13,7 @@ https://github.com/BerriAI/litellm
- Track spend & set budgets per project [OpenAI Proxy Server](https://docs.litellm.ai/docs/simple_proxy) - Track spend & set budgets per project [OpenAI Proxy Server](https://docs.litellm.ai/docs/simple_proxy)
## Basic usage ## Basic usage
<a target="_blank" href="https://colab.research.google.com/github/BerriAI/litellm/blob/main/cookbook/liteLLM_Getting_Started.ipynb"> <a target="_blank" href="https://colab.research.google.com/github/BerriAI/litellm/blob/main/cookbook/liteLLM_Getting_Started.ipynb">
<img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/> <img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/>
</a> </a>
@ -21,6 +21,7 @@ https://github.com/BerriAI/litellm
```shell ```shell
pip install litellm pip install litellm
``` ```
<Tabs> <Tabs>
<TabItem value="openai" label="OpenAI"> <TabItem value="openai" label="OpenAI">
@ -113,7 +114,6 @@ response = completion(
</TabItem> </TabItem>
<TabItem value="ollama" label="Ollama"> <TabItem value="ollama" label="Ollama">
```python ```python
@ -125,6 +125,7 @@ response = completion(
api_base="http://localhost:11434" api_base="http://localhost:11434"
) )
``` ```
</TabItem> </TabItem>
<TabItem value="or" label="Openrouter"> <TabItem value="or" label="Openrouter">
@ -140,11 +141,13 @@ response = completion(
messages = [{ "content": "Hello, how are you?","role": "user"}], messages = [{ "content": "Hello, how are you?","role": "user"}],
) )
``` ```
</TabItem> </TabItem>
</Tabs> </Tabs>
## Streaming ## Streaming
Set `stream=True` in the `completion` args. Set `stream=True` in the `completion` args.
<Tabs> <Tabs>
<TabItem value="openai" label="OpenAI"> <TabItem value="openai" label="OpenAI">
@ -243,7 +246,6 @@ response = completion(
</TabItem> </TabItem>
<TabItem value="ollama" label="Ollama"> <TabItem value="ollama" label="Ollama">
```python ```python
@ -256,6 +258,7 @@ response = completion(
stream=True, stream=True,
) )
``` ```
</TabItem> </TabItem>
<TabItem value="or" label="Openrouter"> <TabItem value="or" label="Openrouter">
@ -272,6 +275,7 @@ response = completion(
stream=True, stream=True,
) )
``` ```
</TabItem> </TabItem>
</Tabs> </Tabs>
@ -293,25 +297,28 @@ except OpenAIError as e:
``` ```
## Logging Observability - Log LLM Input/Output ([Docs](https://docs.litellm.ai/docs/observability/callbacks)) ## Logging Observability - Log LLM Input/Output ([Docs](https://docs.litellm.ai/docs/observability/callbacks))
LiteLLM exposes pre defined callbacks to send data to Langfuse, LLMonitor, Helicone, Promptlayer, Traceloop, Slack
LiteLLM exposes pre defined callbacks to send data to Lunary, Langfuse, Helicone, Promptlayer, Traceloop, Slack
```python ```python
from litellm import completion from litellm import completion
## set env variables for logging tools ## set env variables for logging tools
os.environ["LANGFUSE_PUBLIC_KEY"] = "" os.environ["LANGFUSE_PUBLIC_KEY"] = ""
os.environ["LANGFUSE_SECRET_KEY"] = "" os.environ["LANGFUSE_SECRET_KEY"] = ""
os.environ["LLMONITOR_APP_ID"] = "your-llmonitor-app-id" os.environ["LUNARY_PUBLIC_KEY"] = "your-lunary-public-key"
os.environ["OPENAI_API_KEY"] os.environ["OPENAI_API_KEY"]
# set callbacks # set callbacks
litellm.success_callback = ["langfuse", "llmonitor"] # log input/output to langfuse, llmonitor, supabase litellm.success_callback = ["langfuse", "lunary"] # log input/output to lunary, langfuse, supabase
#openai call #openai call
response = completion(model="gpt-3.5-turbo", messages=[{"role": "user", "content": "Hi 👋 - i'm openai"}]) response = completion(model="gpt-3.5-turbo", messages=[{"role": "user", "content": "Hi 👋 - i'm openai"}])
``` ```
## Track Costs, Usage, Latency for streaming ## Track Costs, Usage, Latency for streaming
Use a callback function for this - more info on custom callbacks: https://docs.litellm.ai/docs/observability/custom_callback Use a callback function for this - more info on custom callbacks: https://docs.litellm.ai/docs/observability/custom_callback
```python ```python
@ -351,6 +358,7 @@ Track spend across multiple projects/people
![ui_3](https://github.com/BerriAI/litellm/assets/29436595/47c97d5e-b9be-4839-b28c-43d7f4f10033) ![ui_3](https://github.com/BerriAI/litellm/assets/29436595/47c97d5e-b9be-4839-b28c-43d7f4f10033)
The proxy provides: The proxy provides:
1. [Hooks for auth](https://docs.litellm.ai/docs/proxy/virtual_keys#custom-auth) 1. [Hooks for auth](https://docs.litellm.ai/docs/proxy/virtual_keys#custom-auth)
2. [Hooks for logging](https://docs.litellm.ai/docs/proxy/logging#step-1---create-your-custom-litellm-callback-class) 2. [Hooks for logging](https://docs.litellm.ai/docs/proxy/logging#step-1---create-your-custom-litellm-callback-class)
3. [Cost tracking](https://docs.litellm.ai/docs/proxy/virtual_keys#tracking-spend) 3. [Cost tracking](https://docs.litellm.ai/docs/proxy/virtual_keys#tracking-spend)
@ -365,6 +373,7 @@ pip install 'litellm[proxy]'
``` ```
#### Step 1: Start litellm proxy #### Step 1: Start litellm proxy
```shell ```shell
$ litellm --model huggingface/bigcode/starcoder $ litellm --model huggingface/bigcode/starcoder
@ -372,6 +381,7 @@ $ litellm --model huggingface/bigcode/starcoder
``` ```
#### Step 2: Make ChatCompletions Request to Proxy #### Step 2: Make ChatCompletions Request to Proxy
```python ```python
import openai # openai v1.0.0+ import openai # openai v1.0.0+
client = openai.OpenAI(api_key="anything",base_url="http://0.0.0.0:8000") # set proxy to base_url client = openai.OpenAI(api_key="anything",base_url="http://0.0.0.0:8000") # set proxy to base_url
@ -387,6 +397,7 @@ print(response)
``` ```
## More details ## More details
* [exception mapping](./exception_mapping.md)
* [retries + model fallbacks for completion()](./completion/reliable_completions.md) - [exception mapping](./exception_mapping.md)
* [proxy virtual keys & spend management](./tutorials/fallbacks.md) - [retries + model fallbacks for completion()](./completion/reliable_completions.md)
- [proxy virtual keys & spend management](./tutorials/fallbacks.md)

View file

@ -6,7 +6,7 @@ liteLLM provides `success_callbacks` and `failure_callbacks`, making it easy for
liteLLM supports: liteLLM supports:
- [LLMonitor](https://llmonitor.com/docs) - [Lunary](https://lunary.ai/docs)
- [Helicone](https://docs.helicone.ai/introduction) - [Helicone](https://docs.helicone.ai/introduction)
- [Sentry](https://docs.sentry.io/platforms/python/) - [Sentry](https://docs.sentry.io/platforms/python/)
- [PostHog](https://posthog.com/docs/libraries/python) - [PostHog](https://posthog.com/docs/libraries/python)
@ -18,8 +18,8 @@ liteLLM supports:
from litellm import completion from litellm import completion
# set callbacks # set callbacks
litellm.success_callback=["posthog", "helicone", "llmonitor"] litellm.success_callback=["posthog", "helicone", "lunary"]
litellm.failure_callback=["sentry", "llmonitor"] litellm.failure_callback=["sentry", "lunary"]
## set env variables ## set env variables
os.environ['SENTRY_DSN'], os.environ['SENTRY_API_TRACE_RATE']= "" os.environ['SENTRY_DSN'], os.environ['SENTRY_API_TRACE_RATE']= ""

View file

@ -174,6 +174,7 @@ upperbound_key_generate_params: Optional[Dict] = None
default_user_params: Optional[Dict] = None default_user_params: Optional[Dict] = None
default_team_settings: Optional[List] = None default_team_settings: Optional[List] = None
max_user_budget: Optional[float] = None max_user_budget: Optional[float] = None
max_end_user_budget: Optional[float] = None
#### RELIABILITY #### #### RELIABILITY ####
request_timeout: Optional[float] = 6000 request_timeout: Optional[float] = 6000
num_retries: Optional[int] = None # per model endpoint num_retries: Optional[int] = None # per model endpoint

View file

@ -1,127 +0,0 @@
#### What this does ####
# On success + failure, log events to aispend.io
import datetime
import traceback
import dotenv
import os
import requests
dotenv.load_dotenv() # Loading env variables using dotenv
# convert to {completion: xx, tokens: xx}
def parse_usage(usage):
return {
"completion": usage["completion_tokens"] if "completion_tokens" in usage else 0,
"prompt": usage["prompt_tokens"] if "prompt_tokens" in usage else 0,
}
def parse_messages(input):
if input is None:
return None
def clean_message(message):
# if is strin, return as is
if isinstance(message, str):
return message
if "message" in message:
return clean_message(message["message"])
text = message["content"]
if text == None:
text = message.get("function_call", None)
return {
"role": message["role"],
"text": text,
}
if isinstance(input, list):
if len(input) == 1:
return clean_message(input[0])
else:
return [clean_message(msg) for msg in input]
else:
return clean_message(input)
class LLMonitorLogger:
# Class variables or attributes
def __init__(self):
# Instance variables
self.api_url = os.getenv("LLMONITOR_API_URL") or "https://app.llmonitor.com"
self.app_id = os.getenv("LLMONITOR_APP_ID")
def log_event(
self,
type,
event,
run_id,
model,
print_verbose,
input=None,
user_id=None,
response_obj=None,
start_time=datetime.datetime.now(),
end_time=datetime.datetime.now(),
error=None,
):
# Method definition
try:
print_verbose(f"LLMonitor Logging - Logging request for model {model}")
if response_obj:
usage = (
parse_usage(response_obj["usage"])
if "usage" in response_obj
else None
)
output = response_obj["choices"] if "choices" in response_obj else None
else:
usage = None
output = None
if error:
error_obj = {"stack": error}
else:
error_obj = None
data = [
{
"type": type,
"name": model,
"runId": run_id,
"app": self.app_id,
"event": "start",
"timestamp": start_time.isoformat(),
"userId": user_id,
"input": parse_messages(input),
},
{
"type": type,
"runId": run_id,
"app": self.app_id,
"event": event,
"error": error_obj,
"timestamp": end_time.isoformat(),
"userId": user_id,
"output": parse_messages(output),
"tokensUsage": usage,
},
]
print_verbose(f"LLMonitor Logging - final data object: {data}")
response = requests.post(
self.api_url + "/api/report",
headers={"Content-Type": "application/json"},
json={"events": data},
)
print_verbose(f"LLMonitor Logging - response: {response}")
except:
# traceback.print_exc()
print_verbose(f"LLMonitor Logging Error - {traceback.format_exc()}")
pass

View file

@ -0,0 +1,157 @@
#### What this does ####
# On success + failure, log events to lunary.ai
from datetime import datetime, timezone
import traceback
import dotenv
import importlib
from pkg_resources import parse_version
import sys
dotenv.load_dotenv()
# convert to {completion: xx, tokens: xx}
def parse_usage(usage):
return {
"completion": usage["completion_tokens"] if "completion_tokens" in usage else 0,
"prompt": usage["prompt_tokens"] if "prompt_tokens" in usage else 0,
}
def parse_messages(input):
if input is None:
return None
def clean_message(message):
# if is strin, return as is
if isinstance(message, str):
return message
if "message" in message:
return clean_message(message["message"])
serialized = {
"role": message.get("role"),
"content": message.get("content"),
}
# Only add tool_calls and function_call to res if they are set
if message.get("tool_calls"):
serialized["tool_calls"] = message.get("tool_calls")
if message.get("function_call"):
serialized["function_call"] = message.get("function_call")
return serialized
if isinstance(input, list):
if len(input) == 1:
return clean_message(input[0])
else:
return [clean_message(msg) for msg in input]
else:
return clean_message(input)
class LunaryLogger:
# Class variables or attributes
def __init__(self):
try:
import lunary
version = importlib.metadata.version("lunary")
# if version < 0.1.43 then raise ImportError
if parse_version(version) < parse_version("0.1.43"):
print("Lunary version outdated. Required: > 0.1.43. Upgrade via 'pip install lunary --upgrade'")
raise ImportError
self.lunary_client = lunary
except ImportError:
print("Lunary not installed. Please install it using 'pip install lunary'")
raise ImportError
def log_event(
self,
kwargs,
type,
event,
run_id,
model,
print_verbose,
extra=None,
input=None,
user_id=None,
response_obj=None,
start_time=datetime.now(timezone.utc),
end_time=datetime.now(timezone.utc),
error=None,
):
# Method definition
try:
print_verbose(f"Lunary Logging - Logging request for model {model}")
litellm_params = kwargs.get("litellm_params", {})
metadata = (
litellm_params.get("metadata", {}) or {}
)
tags = litellm_params.pop("tags", None) or []
if extra:
extra.pop("extra_body", None)
extra.pop("user", None)
template_id = extra.pop("extra_headers", {}).get("Template-Id", None)
# keep only serializable types
for param, value in extra.items():
if not isinstance(value, (str, int, bool, float)):
try:
extra[param] = str(value)
except:
pass
if response_obj:
usage = (
parse_usage(response_obj["usage"])
if "usage" in response_obj
else None
)
output = response_obj["choices"] if "choices" in response_obj else None
else:
usage = None
output = None
if error:
error_obj = {"stack": error}
else:
error_obj = None
self.lunary_client.track_event(
type,
"start",
run_id,
user_id=user_id,
name=model,
input=parse_messages(input),
timestamp=start_time.astimezone(timezone.utc).isoformat(),
template_id=template_id,
metadata=metadata,
runtime="litellm",
tags=tags,
extra=extra,
)
self.lunary_client.track_event(
type,
event,
run_id,
timestamp=end_time.astimezone(timezone.utc).isoformat(),
runtime="litellm",
error=error_obj,
output=parse_messages(output),
token_usage=usage
)
except:
# traceback.print_exc()
print_verbose(f"Lunary Logging Error - {traceback.format_exc()}")
pass

View file

@ -3,7 +3,7 @@ import json
from enum import Enum from enum import Enum
import requests, copy import requests, copy
import time, uuid import time, uuid
from typing import Callable, Optional from typing import Callable, Optional, List
from litellm.utils import ModelResponse, Usage, map_finish_reason, CustomStreamWrapper from litellm.utils import ModelResponse, Usage, map_finish_reason, CustomStreamWrapper
import litellm import litellm
from .prompt_templates.factory import ( from .prompt_templates.factory import (
@ -118,6 +118,7 @@ def completion(
): ):
headers = validate_environment(api_key, headers) headers = validate_environment(api_key, headers)
_is_function_call = False _is_function_call = False
json_schemas: dict = {}
messages = copy.deepcopy(messages) messages = copy.deepcopy(messages)
optional_params = copy.deepcopy(optional_params) optional_params = copy.deepcopy(optional_params)
if model in custom_prompt_dict: if model in custom_prompt_dict:
@ -161,6 +162,10 @@ def completion(
## Handle Tool Calling ## Handle Tool Calling
if "tools" in optional_params: if "tools" in optional_params:
_is_function_call = True _is_function_call = True
for tool in optional_params["tools"]:
json_schemas[tool["function"]["name"]] = tool["function"].get(
"parameters", None
)
tool_calling_system_prompt = construct_tool_use_system_prompt( tool_calling_system_prompt = construct_tool_use_system_prompt(
tools=optional_params["tools"] tools=optional_params["tools"]
) )
@ -248,7 +253,12 @@ def completion(
0 0
].strip() ].strip()
function_arguments_str = f"<invoke>{function_arguments_str}</invoke>" function_arguments_str = f"<invoke>{function_arguments_str}</invoke>"
function_arguments = parse_xml_params(function_arguments_str) function_arguments = parse_xml_params(
function_arguments_str,
json_schema=json_schemas.get(
function_name, None
), # check if we have a json schema for this function name
)
_message = litellm.Message( _message = litellm.Message(
tool_calls=[ tool_calls=[
{ {
@ -263,6 +273,9 @@ def completion(
content=None, content=None,
) )
model_response.choices[0].message = _message # type: ignore model_response.choices[0].message = _message # type: ignore
model_response._hidden_params["original_response"] = (
text_content # allow user to access raw anthropic tool calling response
)
else: else:
model_response.choices[0].message.content = text_content # type: ignore model_response.choices[0].message.content = text_content # type: ignore
model_response.choices[0].finish_reason = map_finish_reason( model_response.choices[0].finish_reason = map_finish_reason(

View file

@ -691,6 +691,7 @@ def completion(
): ):
exception_mapping_worked = False exception_mapping_worked = False
_is_function_call = False _is_function_call = False
json_schemas: dict = {}
try: try:
# pop aws_secret_access_key, aws_access_key_id, aws_region_name from kwargs, since completion calls fail with them # pop aws_secret_access_key, aws_access_key_id, aws_region_name from kwargs, since completion calls fail with them
aws_secret_access_key = optional_params.pop("aws_secret_access_key", None) aws_secret_access_key = optional_params.pop("aws_secret_access_key", None)
@ -757,6 +758,10 @@ def completion(
## Handle Tool Calling ## Handle Tool Calling
if "tools" in inference_params: if "tools" in inference_params:
_is_function_call = True _is_function_call = True
for tool in inference_params["tools"]:
json_schemas[tool["function"]["name"]] = tool["function"].get(
"parameters", None
)
tool_calling_system_prompt = construct_tool_use_system_prompt( tool_calling_system_prompt = construct_tool_use_system_prompt(
tools=inference_params["tools"] tools=inference_params["tools"]
) )
@ -943,7 +948,12 @@ def completion(
function_arguments_str = ( function_arguments_str = (
f"<invoke>{function_arguments_str}</invoke>" f"<invoke>{function_arguments_str}</invoke>"
) )
function_arguments = parse_xml_params(function_arguments_str) function_arguments = parse_xml_params(
function_arguments_str,
json_schema=json_schemas.get(
function_name, None
), # check if we have a json schema for this function name)
)
_message = litellm.Message( _message = litellm.Message(
tool_calls=[ tool_calls=[
{ {
@ -958,6 +968,9 @@ def completion(
content=None, content=None,
) )
model_response.choices[0].message = _message # type: ignore model_response.choices[0].message = _message # type: ignore
model_response._hidden_params["original_response"] = (
outputText # allow user to access raw anthropic tool calling response
)
if _is_function_call == True and stream is not None and stream == True: if _is_function_call == True and stream is not None and stream == True:
print_verbose( print_verbose(
f"INSIDE BEDROCK STREAMING TOOL CALLING CONDITION BLOCK" f"INSIDE BEDROCK STREAMING TOOL CALLING CONDITION BLOCK"

View file

@ -731,18 +731,53 @@ def contains_tag(tag: str, string: str) -> bool:
return bool(re.search(f"<{tag}>(.+?)</{tag}>", string, re.DOTALL)) return bool(re.search(f"<{tag}>(.+?)</{tag}>", string, re.DOTALL))
def parse_xml_params(xml_content): def parse_xml_params(xml_content, json_schema: Optional[dict] = None):
"""
Compare the xml output to the json schema
check if a value is a list - if so, get it's child elements
"""
root = ET.fromstring(xml_content) root = ET.fromstring(xml_content)
params = {} params = {}
for child in root.findall(".//parameters/*"):
try: if json_schema is not None: # check if we have a json schema for this function call
# Attempt to decode the element's text as JSON # iterate over all properties in the schema
params[child.tag] = json.loads(child.text) for prop in json_schema["properties"]:
except json.JSONDecodeError: # If property is an array, get the nested items
# If JSON decoding fails, use the original text _element = root.find(f"parameters/{prop}")
params[child.tag] = child.text if json_schema["properties"][prop]["type"] == "array":
items = []
if _element is not None:
for value in _element:
try:
if value.text is not None:
_value = json.loads(value.text)
else:
continue
except json.JSONDecodeError:
_value = value.text
items.append(_value)
params[prop] = items
# If property is not an array, append the value directly
elif _element is not None and _element.text is not None:
try:
_value = json.loads(_element.text)
except json.JSONDecodeError:
_value = _element.text
params[prop] = _value
else:
for child in root.findall(".//parameters/*"):
if child is not None and child.text is not None:
try:
# Attempt to decode the element's text as JSON
params[child.tag] = json.loads(child.text) # type: ignore
except json.JSONDecodeError:
# If JSON decoding fails, use the original text
params[child.tag] = child.text # type: ignore
return params return params
### ###

View file

@ -2952,7 +2952,26 @@ async def atext_completion(*args, **kwargs):
model=model, model=model,
) )
else: else:
return response transformed_logprobs = None
# only supported for TGI models
try:
raw_response = response._hidden_params.get("original_response", None)
transformed_logprobs = litellm.utils.transform_logprobs(raw_response)
except Exception as e:
print_verbose(f"LiteLLM non blocking exception: {e}")
text_completion_response = TextCompletionResponse()
text_completion_response["id"] = response.get("id", None)
text_completion_response["object"] = "text_completion"
text_completion_response["created"] = response.get("created", None)
text_completion_response["model"] = response.get("model", None)
text_choices = TextChoices()
text_choices["text"] = response["choices"][0]["message"]["content"]
text_choices["index"] = response["choices"][0]["index"]
text_choices["logprobs"] = transformed_logprobs
text_choices["finish_reason"] = response["choices"][0]["finish_reason"]
text_completion_response["choices"] = [text_choices]
text_completion_response["usage"] = response.get("usage", None)
return text_completion_response
except Exception as e: except Exception as e:
custom_llm_provider = custom_llm_provider or "openai" custom_llm_provider = custom_llm_provider or "openai"
raise exception_type( raise exception_type(
@ -3165,6 +3184,7 @@ def text_completion(
transformed_logprobs = litellm.utils.transform_logprobs(raw_response) transformed_logprobs = litellm.utils.transform_logprobs(raw_response)
except Exception as e: except Exception as e:
print_verbose(f"LiteLLM non blocking exception: {e}") print_verbose(f"LiteLLM non blocking exception: {e}")
text_completion_response["id"] = response.get("id", None) text_completion_response["id"] = response.get("id", None)
text_completion_response["object"] = "text_completion" text_completion_response["object"] = "text_completion"
text_completion_response["created"] = response.get("created", None) text_completion_response["created"] = response.get("created", None)
@ -3176,6 +3196,7 @@ def text_completion(
text_choices["finish_reason"] = response["choices"][0]["finish_reason"] text_choices["finish_reason"] = response["choices"][0]["finish_reason"]
text_completion_response["choices"] = [text_choices] text_completion_response["choices"] = [text_choices]
text_completion_response["usage"] = response.get("usage", None) text_completion_response["usage"] = response.get("usage", None)
return text_completion_response return text_completion_response

View file

@ -1503,7 +1503,7 @@
"litellm_provider": "bedrock", "litellm_provider": "bedrock",
"mode": "chat" "mode": "chat"
}, },
"mistral.mixtral-8x7b-instruct": { "mistral.mixtral-8x7b-instruct-v0:1": {
"max_tokens": 8191, "max_tokens": 8191,
"max_input_tokens": 32000, "max_input_tokens": 32000,
"max_output_tokens": 8191, "max_output_tokens": 8191,
@ -1512,7 +1512,7 @@
"litellm_provider": "bedrock", "litellm_provider": "bedrock",
"mode": "chat" "mode": "chat"
}, },
"bedrock/us-west-2/mistral.mixtral-8x7b-instruct": { "bedrock/us-west-2/mistral.mixtral-8x7b-instruct-v0:1": {
"max_tokens": 8191, "max_tokens": 8191,
"max_input_tokens": 32000, "max_input_tokens": 32000,
"max_output_tokens": 8191, "max_output_tokens": 8191,

File diff suppressed because one or more lines are too long

View file

@ -0,0 +1 @@
self.__BUILD_MANIFEST={__rewrites:{afterFiles:[],beforeFiles:[],fallback:[]},"/_error":["static/chunks/pages/_error-d6107f1aac0c574c.js"],sortedPages:["/_app","/_error"]},self.__BUILD_MANIFEST_CB&&self.__BUILD_MANIFEST_CB();

View file

@ -0,0 +1 @@
self.__SSG_MANIFEST=new Set([]);self.__SSG_MANIFEST_CB&&self.__SSG_MANIFEST_CB()

File diff suppressed because one or more lines are too long

View file

@ -1 +0,0 @@
<!DOCTYPE html><html id="__next_error__"><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width, initial-scale=1"/><link rel="preload" as="script" fetchPriority="low" href="/ui/_next/static/chunks/webpack-d1ad37b1875df240.js" crossorigin=""/><script src="/ui/_next/static/chunks/fd9d1056-a507ee9e75a3be72.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/69-589b47e7a69d316f.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/main-app-9b4fb13a7db53edf.js" async="" crossorigin=""></script><title>🚅 LiteLLM</title><meta name="description" content="LiteLLM Proxy Admin UI"/><link rel="icon" href="/ui/favicon.ico" type="image/x-icon" sizes="16x16"/><meta name="next-size-adjust"/><script src="/ui/_next/static/chunks/polyfills-c67a75d1b6f99dc8.js" crossorigin="" noModule=""></script></head><body><script src="/ui/_next/static/chunks/webpack-d1ad37b1875df240.js" crossorigin="" async=""></script><script>(self.__next_f=self.__next_f||[]).push([0]);self.__next_f.push([2,null])</script><script>self.__next_f.push([1,"1:HL[\"/ui/_next/static/media/c9a5bc6a7c948fb0-s.p.woff2\",\"font\",{\"crossOrigin\":\"\",\"type\":\"font/woff2\"}]\n2:HL[\"/ui/_next/static/css/f8da5a6a5b29d249.css\",\"style\",{\"crossOrigin\":\"\"}]\n0:\"$L3\"\n"])</script><script>self.__next_f.push([1,"4:I[47690,[],\"\"]\n6:I[77831,[],\"\"]\n7:I[90177,[\"798\",\"static/chunks/798-4baed68da0c5497d.js\",\"931\",\"static/chunks/app/page-37392d6753f8a3d0.js\"],\"\"]\n8:I[5613,[],\"\"]\n9:I[31778,[],\"\"]\nb:I[48955,[],\"\"]\nc:[]\n"])</script><script>self.__next_f.push([1,"3:[[[\"$\",\"link\",\"0\",{\"rel\":\"stylesheet\",\"href\":\"/ui/_next/static/css/f8da5a6a5b29d249.css\",\"precedence\":\"next\",\"crossOrigin\":\"\"}]],[\"$\",\"$L4\",null,{\"buildId\":\"L9N6TOWJaqSp22Vj96YE4\",\"assetPrefix\":\"/ui\",\"initialCanonicalUrl\":\"/\",\"initialTree\":[\"\",{\"children\":[\"__PAGE__\",{}]},\"$undefined\",\"$undefined\",true],\"initialSeedData\":[\"\",{\"children\":[\"__PAGE__\",{},[\"$L5\",[\"$\",\"$L6\",null,{\"propsForComponent\":{\"params\":{}},\"Component\":\"$7\",\"isStaticGeneration\":true}],null]]},[null,[\"$\",\"html\",null,{\"lang\":\"en\",\"children\":[\"$\",\"body\",null,{\"className\":\"__className_c23dc8\",\"children\":[\"$\",\"$L8\",null,{\"parallelRouterKey\":\"children\",\"segmentPath\":[\"children\"],\"loading\":\"$undefined\",\"loadingStyles\":\"$undefined\",\"loadingScripts\":\"$undefined\",\"hasLoading\":false,\"error\":\"$undefined\",\"errorStyles\":\"$undefined\",\"errorScripts\":\"$undefined\",\"template\":[\"$\",\"$L9\",null,{}],\"templateStyles\":\"$undefined\",\"templateScripts\":\"$undefined\",\"notFound\":[[\"$\",\"title\",null,{\"children\":\"404: This page could not be found.\"}],[\"$\",\"div\",null,{\"style\":{\"fontFamily\":\"system-ui,\\\"Segoe UI\\\",Roboto,Helvetica,Arial,sans-serif,\\\"Apple Color Emoji\\\",\\\"Segoe UI Emoji\\\"\",\"height\":\"100vh\",\"textAlign\":\"center\",\"display\":\"flex\",\"flexDirection\":\"column\",\"alignItems\":\"center\",\"justifyContent\":\"center\"},\"children\":[\"$\",\"div\",null,{\"children\":[[\"$\",\"style\",null,{\"dangerouslySetInnerHTML\":{\"__html\":\"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}\"}}],[\"$\",\"h1\",null,{\"className\":\"next-error-h1\",\"style\":{\"display\":\"inline-block\",\"margin\":\"0 20px 0 0\",\"padding\":\"0 23px 0 0\",\"fontSize\":24,\"fontWeight\":500,\"verticalAlign\":\"top\",\"lineHeight\":\"49px\"},\"children\":\"404\"}],[\"$\",\"div\",null,{\"style\":{\"display\":\"inline-block\"},\"children\":[\"$\",\"h2\",null,{\"style\":{\"fontSize\":14,\"fontWeight\":400,\"lineHeight\":\"49px\",\"margin\":0},\"children\":\"This page could not be found.\"}]}]]}]}]],\"notFoundStyles\":[],\"styles\":null}]}]}],null]],\"initialHead\":[false,\"$La\"],\"globalErrorComponent\":\"$b\",\"missingSlots\":\"$Wc\"}]]\n"])</script><script>self.__next_f.push([1,"a:[[\"$\",\"meta\",\"0\",{\"name\":\"viewport\",\"content\":\"width=device-width, initial-scale=1\"}],[\"$\",\"meta\",\"1\",{\"charSet\":\"utf-8\"}],[\"$\",\"title\",\"2\",{\"children\":\"🚅 LiteLLM\"}],[\"$\",\"meta\",\"3\",{\"name\":\"description\",\"content\":\"LiteLLM Proxy Admin UI\"}],[\"$\",\"link\",\"4\",{\"rel\":\"icon\",\"href\":\"/ui/favicon.ico\",\"type\":\"image/x-icon\",\"sizes\":\"16x16\"}],[\"$\",\"meta\",\"5\",{\"name\":\"next-size-adjust\"}]]\n5:null\n"])</script><script>self.__next_f.push([1,""])</script></body></html>

View file

@ -1,7 +0,0 @@
2:I[77831,[],""]
3:I[90177,["798","static/chunks/798-4baed68da0c5497d.js","931","static/chunks/app/page-37392d6753f8a3d0.js"],""]
4:I[5613,[],""]
5:I[31778,[],""]
0:["L9N6TOWJaqSp22Vj96YE4",[[["",{"children":["__PAGE__",{}]},"$undefined","$undefined",true],["",{"children":["__PAGE__",{},["$L1",["$","$L2",null,{"propsForComponent":{"params":{}},"Component":"$3","isStaticGeneration":true}],null]]},[null,["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_c23dc8","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[],"styles":null}]}]}],null]],[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/f8da5a6a5b29d249.css","precedence":"next","crossOrigin":""}]],"$L6"]]]]
6:[["$","meta","0",{"name":"viewport","content":"width=device-width, initial-scale=1"}],["$","meta","1",{"charSet":"utf-8"}],["$","title","2",{"children":"🚅 LiteLLM"}],["$","meta","3",{"name":"description","content":"LiteLLM Proxy Admin UI"}],["$","link","4",{"rel":"icon","href":"/ui/favicon.ico","type":"image/x-icon","sizes":"16x16"}],["$","meta","5",{"name":"next-size-adjust"}]]
1:null

View file

@ -5,10 +5,15 @@ model_list:
api_key: my-fake-key api_key: my-fake-key
api_base: https://exampleopenaiendpoint-production.up.railway.app/ api_base: https://exampleopenaiendpoint-production.up.railway.app/
litellm_settings:
max_budget: 600020
budget_duration: 30d
general_settings: general_settings:
master_key: sk-1234 master_key: sk-1234
proxy_batch_write_at: 5 # 👈 Frequency of batch writing logs to server (in seconds) proxy_batch_write_at: 60 # 👈 Frequency of batch writing logs to server (in seconds)
enable_jwt_auth: True enable_jwt_auth: True
alerting: ["slack"]
litellm_jwtauth: litellm_jwtauth:
admin_jwt_scope: "litellm_proxy_admin" admin_jwt_scope: "litellm_proxy_admin"
team_jwt_scope: "litellm_team" team_jwt_scope: "litellm_team"

View file

@ -18,6 +18,7 @@ from litellm.proxy._types import (
from typing import Optional, Literal, Union from typing import Optional, Literal, Union
from litellm.proxy.utils import PrismaClient from litellm.proxy.utils import PrismaClient
from litellm.caching import DualCache from litellm.caching import DualCache
import litellm
all_routes = LiteLLMRoutes.openai_routes.value + LiteLLMRoutes.management_routes.value all_routes = LiteLLMRoutes.openai_routes.value + LiteLLMRoutes.management_routes.value
@ -26,6 +27,7 @@ def common_checks(
request_body: dict, request_body: dict,
team_object: LiteLLM_TeamTable, team_object: LiteLLM_TeamTable,
end_user_object: Optional[LiteLLM_EndUserTable], end_user_object: Optional[LiteLLM_EndUserTable],
global_proxy_spend: Optional[float],
general_settings: dict, general_settings: dict,
route: str, route: str,
) -> bool: ) -> bool:
@ -37,6 +39,7 @@ def common_checks(
3. If team is in budget 3. If team is in budget
4. If end_user ('user' passed to /chat/completions, /embeddings endpoint) is in budget 4. If end_user ('user' passed to /chat/completions, /embeddings endpoint) is in budget
5. [OPTIONAL] If 'enforce_end_user' enabled - did developer pass in 'user' param for openai endpoints 5. [OPTIONAL] If 'enforce_end_user' enabled - did developer pass in 'user' param for openai endpoints
6. [OPTIONAL] If 'litellm.max_budget' is set (>0), is proxy under budget
""" """
_model = request_body.get("model", None) _model = request_body.get("model", None)
if team_object.blocked == True: if team_object.blocked == True:
@ -66,7 +69,7 @@ def common_checks(
end_user_budget = end_user_object.litellm_budget_table.max_budget end_user_budget = end_user_object.litellm_budget_table.max_budget
if end_user_budget is not None and end_user_object.spend > end_user_budget: if end_user_budget is not None and end_user_object.spend > end_user_budget:
raise Exception( raise Exception(
f"End User={end_user_object.user_id} over budget. Spend={end_user_object.spend}, Budget={end_user_budget}" f"ExceededBudget: End User={end_user_object.user_id} over budget. Spend={end_user_object.spend}, Budget={end_user_budget}"
) )
# 5. [OPTIONAL] If 'enforce_user_param' enabled - did developer pass in 'user' param for openai endpoints # 5. [OPTIONAL] If 'enforce_user_param' enabled - did developer pass in 'user' param for openai endpoints
if ( if (
@ -77,7 +80,12 @@ def common_checks(
raise Exception( raise Exception(
f"'user' param not passed in. 'enforce_user_param'={general_settings['enforce_user_param']}" f"'user' param not passed in. 'enforce_user_param'={general_settings['enforce_user_param']}"
) )
# 6. [OPTIONAL] If 'litellm.max_budget' is set (>0), is proxy under budget
if litellm.max_budget > 0 and global_proxy_spend is not None:
if global_proxy_spend > litellm.max_budget:
raise Exception(
f"ExceededBudget: LiteLLM Proxy has exceeded its budget. Current spend: {global_proxy_spend}; Max Budget: {litellm.max_budget}"
)
return True return True

View file

@ -114,7 +114,8 @@ class JWTHandler:
public_key: Optional[dict] = None public_key: Optional[dict] = None
if len(keys) == 1: if len(keys) == 1:
public_key = keys[0] if kid is None or keys["kid"] == kid:
public_key = keys[0]
elif len(keys) > 1: elif len(keys) > 1:
for key in keys: for key in keys:
if kid is not None and key["kid"] == kid: if kid is not None and key["kid"] == kid:

View file

@ -437,12 +437,49 @@ async def user_api_key_auth(
key=end_user_id, value=end_user_object key=end_user_id, value=end_user_object
) )
global_proxy_spend = None
if litellm.max_budget > 0: # user set proxy max budget
# check cache
global_proxy_spend = await user_api_key_cache.async_get_cache(
key="{}:spend".format(litellm_proxy_admin_name)
)
if global_proxy_spend is None and prisma_client is not None:
# get from db
sql_query = """SELECT SUM(spend) as total_spend FROM "MonthlyGlobalSpend";"""
response = await prisma_client.db.query_raw(query=sql_query)
global_proxy_spend = response[0]["total_spend"]
await user_api_key_cache.async_set_cache(
key="{}:spend".format(litellm_proxy_admin_name),
value=global_proxy_spend,
ttl=60,
)
if global_proxy_spend is not None:
user_info = {
"user_id": litellm_proxy_admin_name,
"max_budget": litellm.max_budget,
"spend": global_proxy_spend,
"user_email": "",
}
asyncio.create_task(
proxy_logging_obj.budget_alerts(
user_max_budget=litellm.max_budget,
user_current_spend=global_proxy_spend,
type="user_and_proxy_budget",
user_info=user_info,
)
)
# run through common checks # run through common checks
_ = common_checks( _ = common_checks(
request_body=request_data, request_body=request_data,
team_object=team_object, team_object=team_object,
end_user_object=end_user_object, end_user_object=end_user_object,
general_settings=general_settings, general_settings=general_settings,
global_proxy_spend=global_proxy_spend,
route=route, route=route,
) )
# save user object in cache # save user object in cache
@ -656,17 +693,8 @@ async def user_api_key_auth(
) )
# Check 2. If user_id for this token is in budget # Check 2. If user_id for this token is in budget
## Check 2.1 If global proxy is in budget
## Check 2.2 [OPTIONAL - checked only if litellm.max_user_budget is not None] If 'user' passed in /chat/completions is in budget
if valid_token.user_id is not None: if valid_token.user_id is not None:
user_id_list = [valid_token.user_id, litellm_proxy_budget_name] user_id_list = [valid_token.user_id]
if (
litellm.max_user_budget is not None
): # Check if 'user' passed in /chat/completions is in budget, only checked if litellm.max_user_budget is set
user_passed_to_chat_completions = request_data.get("user", None)
if user_passed_to_chat_completions is not None:
user_id_list.append(user_passed_to_chat_completions)
for id in user_id_list: for id in user_id_list:
value = user_api_key_cache.get_cache(key=id) value = user_api_key_cache.get_cache(key=id)
if value is not None: if value is not None:
@ -675,13 +703,12 @@ async def user_api_key_auth(
user_id_information.append(value) user_id_information.append(value)
if user_id_information is None or ( if user_id_information is None or (
isinstance(user_id_information, list) isinstance(user_id_information, list)
and len(user_id_information) < 2 and len(user_id_information) < 1
): ):
if prisma_client is not None: if prisma_client is not None:
user_id_information = await prisma_client.get_data( user_id_information = await prisma_client.get_data(
user_id_list=[ user_id_list=[
valid_token.user_id, valid_token.user_id,
litellm_proxy_budget_name,
], ],
table_name="user", table_name="user",
query_type="find_all", query_type="find_all",
@ -881,11 +908,54 @@ async def user_api_key_auth(
blocked=valid_token.team_blocked, blocked=valid_token.team_blocked,
models=valid_token.team_models, models=valid_token.team_models,
) )
_end_user_object = None
if "user" in request_data:
_id = "end_user_id:{}".format(request_data["user"])
_end_user_object = await user_api_key_cache.async_get_cache(key=_id)
if _end_user_object is not None:
_end_user_object = LiteLLM_EndUserTable(**_end_user_object)
global_proxy_spend = None
if litellm.max_budget > 0: # user set proxy max budget
# check cache
global_proxy_spend = await user_api_key_cache.async_get_cache(
key="{}:spend".format(litellm_proxy_admin_name)
)
if global_proxy_spend is None:
# get from db
sql_query = """SELECT SUM(spend) as total_spend FROM "MonthlyGlobalSpend";"""
response = await prisma_client.db.query_raw(query=sql_query)
global_proxy_spend = response[0]["total_spend"]
await user_api_key_cache.async_set_cache(
key="{}:spend".format(litellm_proxy_admin_name),
value=global_proxy_spend,
ttl=60,
)
if global_proxy_spend is not None:
user_info = {
"user_id": litellm_proxy_admin_name,
"max_budget": litellm.max_budget,
"spend": global_proxy_spend,
"user_email": "",
}
asyncio.create_task(
proxy_logging_obj.budget_alerts(
user_max_budget=litellm.max_budget,
user_current_spend=global_proxy_spend,
type="user_and_proxy_budget",
user_info=user_info,
)
)
_ = common_checks( _ = common_checks(
request_body=request_data, request_body=request_data,
team_object=_team_obj, team_object=_team_obj,
end_user_object=None, end_user_object=_end_user_object,
general_settings=general_settings, general_settings=general_settings,
global_proxy_spend=global_proxy_spend,
route=route, route=route,
) )
# Token passed all checks # Token passed all checks
@ -1553,7 +1623,7 @@ async def update_cache(
async def _update_user_cache(): async def _update_user_cache():
## UPDATE CACHE FOR USER ID + GLOBAL PROXY ## UPDATE CACHE FOR USER ID + GLOBAL PROXY
user_ids = [user_id, litellm_proxy_budget_name, end_user_id] user_ids = [user_id]
try: try:
for _id in user_ids: for _id in user_ids:
# Fetch the existing cost for the given user # Fetch the existing cost for the given user
@ -1594,14 +1664,26 @@ async def update_cache(
user_api_key_cache.set_cache( user_api_key_cache.set_cache(
key=_id, value=existing_spend_obj.json() key=_id, value=existing_spend_obj.json()
) )
## UPDATE GLOBAL PROXY ##
global_proxy_spend = await user_api_key_cache.async_get_cache(
key="{}:spend".format(litellm_proxy_admin_name)
)
if global_proxy_spend is None:
await user_api_key_cache.async_set_cache(
key="{}:spend".format(litellm_proxy_admin_name), value=response_cost
)
elif response_cost is not None and global_proxy_spend is not None:
increment = global_proxy_spend + response_cost
await user_api_key_cache.async_set_cache(
key="{}:spend".format(litellm_proxy_admin_name), value=increment
)
except Exception as e: except Exception as e:
verbose_proxy_logger.debug( verbose_proxy_logger.debug(
f"An error occurred updating user cache: {str(e)}\n\n{traceback.format_exc()}" f"An error occurred updating user cache: {str(e)}\n\n{traceback.format_exc()}"
) )
async def _update_end_user_cache(): async def _update_end_user_cache():
## UPDATE CACHE FOR USER ID + GLOBAL PROXY _id = "end_user_id:{}".format(end_user_id)
_id = end_user_id
try: try:
# Fetch the existing cost for the given user # Fetch the existing cost for the given user
existing_spend_obj = await user_api_key_cache.async_get_cache(key=_id) existing_spend_obj = await user_api_key_cache.async_get_cache(key=_id)
@ -1609,14 +1691,14 @@ async def update_cache(
# if user does not exist in LiteLLM_UserTable, create a new user # if user does not exist in LiteLLM_UserTable, create a new user
existing_spend = 0 existing_spend = 0
max_user_budget = None max_user_budget = None
if litellm.max_user_budget is not None: if litellm.max_end_user_budget is not None:
max_user_budget = litellm.max_user_budget max_end_user_budget = litellm.max_end_user_budget
existing_spend_obj = LiteLLM_EndUserTable( existing_spend_obj = LiteLLM_EndUserTable(
user_id=_id, user_id=_id,
spend=0, spend=0,
blocked=False, blocked=False,
litellm_budget_table=LiteLLM_BudgetTable( litellm_budget_table=LiteLLM_BudgetTable(
max_budget=max_user_budget max_budget=max_end_user_budget
), ),
) )
verbose_proxy_logger.debug( verbose_proxy_logger.debug(
@ -2909,6 +2991,11 @@ def model_list(
dependencies=[Depends(user_api_key_auth)], dependencies=[Depends(user_api_key_auth)],
tags=["completions"], tags=["completions"],
) )
@router.post(
"/openai/deployments/{model:path}/completions",
dependencies=[Depends(user_api_key_auth)],
tags=["completions"],
)
async def completion( async def completion(
request: Request, request: Request,
fastapi_response: Response, fastapi_response: Response,
@ -4049,7 +4136,6 @@ async def generate_key_fn(
) )
_budget_id = getattr(_budget, "budget_id", None) _budget_id = getattr(_budget, "budget_id", None)
data_json = data.json() # type: ignore data_json = data.json() # type: ignore
# if we get max_budget passed to /key/generate, then use it as key_max_budget. Since generate_key_helper_fn is used to make new users # if we get max_budget passed to /key/generate, then use it as key_max_budget. Since generate_key_helper_fn is used to make new users
if "max_budget" in data_json: if "max_budget" in data_json:
data_json["key_max_budget"] = data_json.pop("max_budget", None) data_json["key_max_budget"] = data_json.pop("max_budget", None)
@ -4108,6 +4194,13 @@ async def update_key_fn(request: Request, data: UpdateKeyRequest):
0, 0,
): # models default to [], spend defaults to 0, we should not reset these values ): # models default to [], spend defaults to 0, we should not reset these values
non_default_values[k] = v non_default_values[k] = v
if "duration" in non_default_values:
duration = non_default_values.pop("duration")
duration_s = _duration_in_seconds(duration=duration)
expires = datetime.utcnow() + timedelta(seconds=duration_s)
non_default_values["expires"] = expires
response = await prisma_client.update_data( response = await prisma_client.update_data(
token=key, data={**non_default_values, "token": key} token=key, data={**non_default_values, "token": key}
) )
@ -6051,7 +6144,7 @@ async def team_member_delete(
-D '{ -D '{
"team_id": "45e3e396-ee08-4a61-a88e-16b3ce7e0849", "team_id": "45e3e396-ee08-4a61-a88e-16b3ce7e0849",
"member": {"role": "user", "user_id": "krrish247652@berri.ai"} "user_id": "krrish247652@berri.ai"
}' }'
``` ```
""" """

View file

@ -1941,9 +1941,9 @@ async def update_spend(
end_user_id, end_user_id,
response_cost, response_cost,
) in prisma_client.end_user_list_transactons.items(): ) in prisma_client.end_user_list_transactons.items():
max_user_budget = None max_end_user_budget = None
if litellm.max_user_budget is not None: if litellm.max_end_user_budget is not None:
max_user_budget = litellm.max_user_budget max_end_user_budget = litellm.max_end_user_budget
new_user_obj = LiteLLM_EndUserTable( new_user_obj = LiteLLM_EndUserTable(
user_id=end_user_id, spend=response_cost, blocked=False user_id=end_user_id, spend=response_cost, blocked=False
) )

View file

@ -195,6 +195,48 @@ def test_completion_claude_3_function_call():
pytest.fail(f"Error occurred: {e}") pytest.fail(f"Error occurred: {e}")
def test_parse_xml_params():
from litellm.llms.prompt_templates.factory import parse_xml_params
## SCENARIO 1 ## - W/ ARRAY
xml_content = """<invoke><tool_name>return_list_of_str</tool_name>\n<parameters>\n<value>\n<item>apple</item>\n<item>banana</item>\n<item>orange</item>\n</value>\n</parameters></invoke>"""
json_schema = {
"properties": {
"value": {
"items": {"type": "string"},
"title": "Value",
"type": "array",
}
},
"required": ["value"],
"type": "object",
}
response = parse_xml_params(xml_content=xml_content, json_schema=json_schema)
print(f"response: {response}")
assert response["value"] == ["apple", "banana", "orange"]
## SCENARIO 2 ## - W/OUT ARRAY
xml_content = """<invoke><tool_name>get_current_weather</tool_name>\n<parameters>\n<location>Boston, MA</location>\n<unit>fahrenheit</unit>\n</parameters></invoke>"""
json_schema = {
"type": "object",
"properties": {
"location": {
"type": "string",
"description": "The city and state, e.g. San Francisco, CA",
},
"unit": {"type": "string", "enum": ["celsius", "fahrenheit"]},
},
"required": ["location"],
}
response = parse_xml_params(xml_content=xml_content, json_schema=json_schema)
print(f"response: {response}")
assert response["location"] == "Boston, MA"
assert response["unit"] == "fahrenheit"
def test_completion_claude_3_multi_turn_conversations(): def test_completion_claude_3_multi_turn_conversations():
litellm.set_verbose = True litellm.set_verbose = True
litellm.modify_params = True litellm.modify_params = True

View file

@ -324,7 +324,7 @@ def test_call_with_end_user_over_budget(prisma_client):
setattr(litellm.proxy.proxy_server, "prisma_client", prisma_client) setattr(litellm.proxy.proxy_server, "prisma_client", prisma_client)
setattr(litellm.proxy.proxy_server, "master_key", "sk-1234") setattr(litellm.proxy.proxy_server, "master_key", "sk-1234")
setattr(litellm, "max_user_budget", 0.00001) setattr(litellm, "max_end_user_budget", 0.00001)
try: try:
async def test(): async def test():
@ -378,7 +378,9 @@ def test_call_with_end_user_over_budget(prisma_client):
"user_api_key_user_id": user, "user_api_key_user_id": user,
}, },
"proxy_server_request": { "proxy_server_request": {
"user": user, "body": {
"user": user,
}
}, },
}, },
"response_cost": 10, "response_cost": 10,
@ -407,18 +409,20 @@ def test_call_with_proxy_over_budget(prisma_client):
litellm_proxy_budget_name = f"litellm-proxy-budget-{time.time()}" litellm_proxy_budget_name = f"litellm-proxy-budget-{time.time()}"
setattr( setattr(
litellm.proxy.proxy_server, litellm.proxy.proxy_server,
"litellm_proxy_budget_name", "litellm_proxy_admin_name",
litellm_proxy_budget_name, litellm_proxy_budget_name,
) )
setattr(litellm, "max_budget", 0.00001)
from litellm.proxy.proxy_server import user_api_key_cache
user_api_key_cache.set_cache(
key="{}:spend".format(litellm_proxy_budget_name), value=0
)
setattr(litellm.proxy.proxy_server, "user_api_key_cache", user_api_key_cache)
try: try:
async def test(): async def test():
await litellm.proxy.proxy_server.prisma_client.connect() await litellm.proxy.proxy_server.prisma_client.connect()
## CREATE PROXY + USER BUDGET ##
request = NewUserRequest(
max_budget=0.00001, user_id=litellm_proxy_budget_name
)
await new_user(request)
request = NewUserRequest() request = NewUserRequest()
key = await new_user(request) key = await new_user(request)
print(key) print(key)
@ -470,6 +474,7 @@ def test_call_with_proxy_over_budget(prisma_client):
start_time=datetime.now(), start_time=datetime.now(),
end_time=datetime.now(), end_time=datetime.now(),
) )
await asyncio.sleep(5) await asyncio.sleep(5)
# use generated key to auth in # use generated key to auth in
result = await user_api_key_auth(request=request, api_key=bearer_token) result = await user_api_key_auth(request=request, api_key=bearer_token)
@ -571,9 +576,17 @@ def test_call_with_proxy_over_budget_stream(prisma_client):
litellm_proxy_budget_name = f"litellm-proxy-budget-{time.time()}" litellm_proxy_budget_name = f"litellm-proxy-budget-{time.time()}"
setattr( setattr(
litellm.proxy.proxy_server, litellm.proxy.proxy_server,
"litellm_proxy_budget_name", "litellm_proxy_admin_name",
litellm_proxy_budget_name, litellm_proxy_budget_name,
) )
setattr(litellm, "max_budget", 0.00001)
from litellm.proxy.proxy_server import user_api_key_cache
user_api_key_cache.set_cache(
key="{}:spend".format(litellm_proxy_budget_name), value=0
)
setattr(litellm.proxy.proxy_server, "user_api_key_cache", user_api_key_cache)
from litellm._logging import verbose_proxy_logger from litellm._logging import verbose_proxy_logger
import logging import logging
@ -584,10 +597,10 @@ def test_call_with_proxy_over_budget_stream(prisma_client):
async def test(): async def test():
await litellm.proxy.proxy_server.prisma_client.connect() await litellm.proxy.proxy_server.prisma_client.connect()
## CREATE PROXY + USER BUDGET ## ## CREATE PROXY + USER BUDGET ##
request = NewUserRequest( # request = NewUserRequest(
max_budget=0.00001, user_id=litellm_proxy_budget_name # max_budget=0.00001, user_id=litellm_proxy_budget_name
) # )
await new_user(request) # await new_user(request)
request = NewUserRequest() request = NewUserRequest()
key = await new_user(request) key = await new_user(request)
print(key) print(key)

View file

@ -1,76 +0,0 @@
# #### What this tests ####
# # This tests if logging to the llmonitor integration actually works
# # Adds the parent directory to the system path
# import sys
# import os
# sys.path.insert(0, os.path.abspath("../.."))
# from litellm import completion, embedding
# import litellm
# litellm.success_callback = ["llmonitor"]
# litellm.failure_callback = ["llmonitor"]
# litellm.set_verbose = True
# def test_chat_openai():
# try:
# response = completion(
# model="gpt-3.5-turbo",
# messages=[{"role": "user", "content": "Hi 👋 - i'm openai"}],
# user="ishaan_from_litellm"
# )
# print(response)
# except Exception as e:
# print(e)
# def test_embedding_openai():
# try:
# response = embedding(model="text-embedding-ada-002", input=["test"])
# # Add any assertions here to check the response
# print(f"response: {str(response)[:50]}")
# except Exception as e:
# print(e)
# test_chat_openai()
# # test_embedding_openai()
# def test_llmonitor_logging_function_calling():
# function1 = [
# {
# "name": "get_current_weather",
# "description": "Get the current weather in a given location",
# "parameters": {
# "type": "object",
# "properties": {
# "location": {
# "type": "string",
# "description": "The city and state, e.g. San Francisco, CA",
# },
# "unit": {"type": "string", "enum": ["celsius", "fahrenheit"]},
# },
# "required": ["location"],
# },
# }
# ]
# try:
# response = completion(model="gpt-3.5-turbo",
# messages=[{
# "role": "user",
# "content": "what's the weather in boston"
# }],
# temperature=0.1,
# functions=function1,
# )
# print(response)
# except Exception as e:
# print(e)
# # test_llmonitor_logging_function_calling()

View file

@ -0,0 +1,85 @@
import sys
import os
import io
sys.path.insert(0, os.path.abspath("../.."))
from litellm import completion
import litellm
litellm.failure_callback = ["lunary"]
litellm.success_callback = ["lunary"]
litellm.set_verbose = True
def test_lunary_logging():
try:
response = completion(
model="gpt-3.5-turbo",
messages=[{"role": "user", "content": "what llm are u"}],
max_tokens=10,
temperature=0.2,
user="test-user",
)
print(response)
except Exception as e:
print(e)
# test_lunary_logging()
def test_lunary_template():
import lunary
try:
template = lunary.render_template("test-template", {"question": "Hello!"})
response = completion(**template)
print(response)
except Exception as e:
print(e)
# test_lunary_template()
def test_lunary_logging_with_metadata():
try:
response = completion(
model="gpt-3.5-turbo",
messages=[{"role": "user", "content": "what llm are u"}],
max_tokens=10,
temperature=0.2,
metadata={
"run_name": "litellmRUN",
"project_name": "litellm-completion",
},
)
print(response)
except Exception as e:
print(e)
# test_lunary_logging_with_metadata()
def test_lunary_logging_with_streaming_and_metadata():
try:
response = completion(
model="gpt-3.5-turbo",
messages=[{"role": "user", "content": "what llm are u"}],
max_tokens=10,
temperature=0.2,
metadata={
"run_name": "litellmRUN",
"project_name": "litellm-completion",
},
stream=True,
)
for chunk in response:
continue
except Exception as e:
print(e)
# test_lunary_logging_with_streaming_and_metadata()

View file

@ -59,7 +59,7 @@ from .integrations.helicone import HeliconeLogger
from .integrations.aispend import AISpendLogger from .integrations.aispend import AISpendLogger
from .integrations.berrispend import BerriSpendLogger from .integrations.berrispend import BerriSpendLogger
from .integrations.supabase import Supabase from .integrations.supabase import Supabase
from .integrations.llmonitor import LLMonitorLogger from .integrations.lunary import LunaryLogger
from .integrations.prompt_layer import PromptLayerLogger from .integrations.prompt_layer import PromptLayerLogger
from .integrations.langsmith import LangsmithLogger from .integrations.langsmith import LangsmithLogger
from .integrations.weights_biases import WeightsBiasesLogger from .integrations.weights_biases import WeightsBiasesLogger
@ -129,7 +129,7 @@ dynamoLogger = None
s3Logger = None s3Logger = None
genericAPILogger = None genericAPILogger = None
clickHouseLogger = None clickHouseLogger = None
llmonitorLogger = None lunaryLogger = None
aispendLogger = None aispendLogger = None
berrispendLogger = None berrispendLogger = None
supabaseClient = None supabaseClient = None
@ -882,7 +882,7 @@ class CallTypes(Enum):
# Logging function -> log the exact model details + what's being sent | Non-BlockingP # Logging function -> log the exact model details + what's being sent | Non-BlockingP
class Logging: class Logging:
global supabaseClient, liteDebuggerClient, promptLayerLogger, weightsBiasesLogger, langsmithLogger, capture_exception, add_breadcrumb, llmonitorLogger global supabaseClient, liteDebuggerClient, promptLayerLogger, weightsBiasesLogger, langsmithLogger, capture_exception, add_breadcrumb, lunaryLogger
def __init__( def __init__(
self, self,
@ -1429,27 +1429,37 @@ class Logging:
end_time=end_time, end_time=end_time,
print_verbose=print_verbose, print_verbose=print_verbose,
) )
if callback == "llmonitor": if callback == "lunary":
print_verbose("reaches llmonitor for logging!") print_verbose("reaches lunary for logging!")
model = self.model model = self.model
kwargs = self.model_call_details
input = self.model_call_details.get( input = kwargs.get(
"messages", self.model_call_details.get("input", None) "messages", kwargs.get("input", None)
) )
# if contains input, it's 'embedding', otherwise 'llm'
type = ( type = (
"embed" "embed"
if self.call_type == CallTypes.embedding.value if self.call_type == CallTypes.embedding.value
else "llm" else "llm"
) )
llmonitorLogger.log_event( # this only logs streaming once, complete_streaming_response exists i.e when stream ends
if self.stream:
if "complete_streaming_response" not in kwargs:
break
else:
result = kwargs["complete_streaming_response"]
lunaryLogger.log_event(
type=type, type=type,
kwargs=kwargs,
event="end", event="end",
model=model, model=model,
input=input, input=input,
user_id=self.model_call_details.get("user", "default"), user_id=kwargs.get("user", None),
#user_props=self.model_call_details.get("user_props", None),
extra=kwargs.get("optional_params", {}),
response_obj=result, response_obj=result,
start_time=start_time, start_time=start_time,
end_time=end_time, end_time=end_time,
@ -2041,8 +2051,8 @@ class Logging:
call_type=self.call_type, call_type=self.call_type,
stream=self.stream, stream=self.stream,
) )
elif callback == "llmonitor": elif callback == "lunary":
print_verbose("reaches llmonitor for logging error!") print_verbose("reaches lunary for logging error!")
model = self.model model = self.model
@ -2054,7 +2064,9 @@ class Logging:
else "llm" else "llm"
) )
llmonitorLogger.log_event(
lunaryLogger.log_event(
type=_type, type=_type,
event="error", event="error",
user_id=self.model_call_details.get("user", "default"), user_id=self.model_call_details.get("user", "default"),
@ -6166,7 +6178,9 @@ def validate_environment(model: Optional[str] = None) -> dict:
def set_callbacks(callback_list, function_id=None): def set_callbacks(callback_list, function_id=None):
global sentry_sdk_instance, capture_exception, add_breadcrumb, posthog, slack_app, alerts_channel, traceloopLogger, athinaLogger, heliconeLogger, aispendLogger, berrispendLogger, supabaseClient, liteDebuggerClient, llmonitorLogger, promptLayerLogger, langFuseLogger, customLogger, weightsBiasesLogger, langsmithLogger, dynamoLogger, s3Logger, dataDogLogger, prometheusLogger
global sentry_sdk_instance, capture_exception, add_breadcrumb, posthog, slack_app, alerts_channel, traceloopLogger, athinaLogger, heliconeLogger, aispendLogger, berrispendLogger, supabaseClient, liteDebuggerClient, lunaryLogger, promptLayerLogger, langFuseLogger, customLogger, weightsBiasesLogger, langsmithLogger, dynamoLogger, s3Logger, dataDogLogger, prometheusLogger
try: try:
for callback in callback_list: for callback in callback_list:
print_verbose(f"callback: {callback}") print_verbose(f"callback: {callback}")
@ -6226,8 +6240,8 @@ def set_callbacks(callback_list, function_id=None):
print_verbose("Initialized Athina Logger") print_verbose("Initialized Athina Logger")
elif callback == "helicone": elif callback == "helicone":
heliconeLogger = HeliconeLogger() heliconeLogger = HeliconeLogger()
elif callback == "llmonitor": elif callback == "lunary":
llmonitorLogger = LLMonitorLogger() lunaryLogger = LunaryLogger()
elif callback == "promptlayer": elif callback == "promptlayer":
promptLayerLogger = PromptLayerLogger() promptLayerLogger = PromptLayerLogger()
elif callback == "langfuse": elif callback == "langfuse":
@ -6270,7 +6284,7 @@ def set_callbacks(callback_list, function_id=None):
# NOTE: DEPRECATING this in favor of using failure_handler() in Logging: # NOTE: DEPRECATING this in favor of using failure_handler() in Logging:
def handle_failure(exception, traceback_exception, start_time, end_time, args, kwargs): def handle_failure(exception, traceback_exception, start_time, end_time, args, kwargs):
global sentry_sdk_instance, capture_exception, add_breadcrumb, posthog, slack_app, alerts_channel, aispendLogger, berrispendLogger, supabaseClient, liteDebuggerClient, llmonitorLogger global sentry_sdk_instance, capture_exception, add_breadcrumb, posthog, slack_app, alerts_channel, aispendLogger, berrispendLogger, supabaseClient, liteDebuggerClient, lunaryLogger
try: try:
# print_verbose(f"handle_failure args: {args}") # print_verbose(f"handle_failure args: {args}")
# print_verbose(f"handle_failure kwargs: {kwargs}") # print_verbose(f"handle_failure kwargs: {kwargs}")

View file

@ -1503,7 +1503,7 @@
"litellm_provider": "bedrock", "litellm_provider": "bedrock",
"mode": "chat" "mode": "chat"
}, },
"mistral.mixtral-8x7b-instruct": { "mistral.mixtral-8x7b-instruct-v0:1": {
"max_tokens": 8191, "max_tokens": 8191,
"max_input_tokens": 32000, "max_input_tokens": 32000,
"max_output_tokens": 8191, "max_output_tokens": 8191,
@ -1512,7 +1512,7 @@
"litellm_provider": "bedrock", "litellm_provider": "bedrock",
"mode": "chat" "mode": "chat"
}, },
"bedrock/us-west-2/mistral.mixtral-8x7b-instruct": { "bedrock/us-west-2/mistral.mixtral-8x7b-instruct-v0:1": {
"max_tokens": 8191, "max_tokens": 8191,
"max_input_tokens": 32000, "max_input_tokens": 32000,
"max_output_tokens": 8191, "max_output_tokens": 8191,

View file

@ -45,8 +45,8 @@ model_list:
litellm_settings: litellm_settings:
drop_params: True drop_params: True
max_budget: 100 # max_budget: 100
budget_duration: 30d # budget_duration: 30d
num_retries: 5 num_retries: 5
request_timeout: 600 request_timeout: 600
telemetry: False telemetry: False

View file

@ -1,6 +1,6 @@
[tool.poetry] [tool.poetry]
name = "litellm" name = "litellm"
version = "1.34.14" version = "1.34.17"
description = "Library to easily interface with LLM API providers" description = "Library to easily interface with LLM API providers"
authors = ["BerriAI"] authors = ["BerriAI"]
license = "MIT" license = "MIT"
@ -80,7 +80,7 @@ requires = ["poetry-core", "wheel"]
build-backend = "poetry.core.masonry.api" build-backend = "poetry.core.masonry.api"
[tool.commitizen] [tool.commitizen]
version = "1.34.14" version = "1.34.17"
version_files = [ version_files = [
"pyproject.toml:^version" "pyproject.toml:^version"
] ]

View file

@ -65,7 +65,7 @@ async def update_key(session, get_key):
"Authorization": f"Bearer sk-1234", "Authorization": f"Bearer sk-1234",
"Content-Type": "application/json", "Content-Type": "application/json",
} }
data = {"key": get_key, "models": ["gpt-4"]} data = {"key": get_key, "models": ["gpt-4"], "duration": "120s"}
async with session.post(url, headers=headers, json=data) as response: async with session.post(url, headers=headers, json=data) as response:
status = response.status status = response.status

View file

@ -2,7 +2,8 @@
## Tests /chat/completions by generating a key and then making a chat completions request ## Tests /chat/completions by generating a key and then making a chat completions request
import pytest import pytest
import asyncio import asyncio
import aiohttp import aiohttp, openai
from openai import OpenAI
async def generate_key(session): async def generate_key(session):
@ -114,14 +115,14 @@ async def completion(session, key):
async with session.post(url, headers=headers, json=data) as response: async with session.post(url, headers=headers, json=data) as response:
status = response.status status = response.status
response_text = await response.text()
print(response_text)
print()
if status != 200: if status != 200:
raise Exception(f"Request did not return a 200 status code: {status}") raise Exception(f"Request did not return a 200 status code: {status}")
response = await response.json()
return response
@pytest.mark.asyncio @pytest.mark.asyncio
async def test_completion(): async def test_completion():
@ -137,7 +138,17 @@ async def test_completion():
await completion(session=session, key=key) await completion(session=session, key=key)
key_gen = await new_user(session=session) key_gen = await new_user(session=session)
key_2 = key_gen["key"] key_2 = key_gen["key"]
await completion(session=session, key=key_2) # response = await completion(session=session, key=key_2)
## validate openai format ##
client = OpenAI(api_key=key_2, base_url="http://0.0.0.0:4000")
client.completions.create(
model="gpt-4",
prompt="Say this is a test",
max_tokens=7,
temperature=0,
)
async def embeddings(session, key): async def embeddings(session, key):

File diff suppressed because one or more lines are too long

View file

@ -0,0 +1 @@
self.__BUILD_MANIFEST={__rewrites:{afterFiles:[],beforeFiles:[],fallback:[]},"/_error":["static/chunks/pages/_error-d6107f1aac0c574c.js"],sortedPages:["/_app","/_error"]},self.__BUILD_MANIFEST_CB&&self.__BUILD_MANIFEST_CB();

View file

@ -0,0 +1 @@
self.__SSG_MANIFEST=new Set([]);self.__SSG_MANIFEST_CB&&self.__SSG_MANIFEST_CB()

File diff suppressed because one or more lines are too long

View file

@ -1 +0,0 @@
<!DOCTYPE html><html id="__next_error__"><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width, initial-scale=1"/><link rel="preload" as="script" fetchPriority="low" href="/ui/_next/static/chunks/webpack-d1ad37b1875df240.js" crossorigin=""/><script src="/ui/_next/static/chunks/fd9d1056-a507ee9e75a3be72.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/69-589b47e7a69d316f.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/main-app-9b4fb13a7db53edf.js" async="" crossorigin=""></script><title>🚅 LiteLLM</title><meta name="description" content="LiteLLM Proxy Admin UI"/><link rel="icon" href="/ui/favicon.ico" type="image/x-icon" sizes="16x16"/><meta name="next-size-adjust"/><script src="/ui/_next/static/chunks/polyfills-c67a75d1b6f99dc8.js" crossorigin="" noModule=""></script></head><body><script src="/ui/_next/static/chunks/webpack-d1ad37b1875df240.js" crossorigin="" async=""></script><script>(self.__next_f=self.__next_f||[]).push([0]);self.__next_f.push([2,null])</script><script>self.__next_f.push([1,"1:HL[\"/ui/_next/static/media/c9a5bc6a7c948fb0-s.p.woff2\",\"font\",{\"crossOrigin\":\"\",\"type\":\"font/woff2\"}]\n2:HL[\"/ui/_next/static/css/f8da5a6a5b29d249.css\",\"style\",{\"crossOrigin\":\"\"}]\n0:\"$L3\"\n"])</script><script>self.__next_f.push([1,"4:I[47690,[],\"\"]\n6:I[77831,[],\"\"]\n7:I[90177,[\"798\",\"static/chunks/798-4baed68da0c5497d.js\",\"931\",\"static/chunks/app/page-37392d6753f8a3d0.js\"],\"\"]\n8:I[5613,[],\"\"]\n9:I[31778,[],\"\"]\nb:I[48955,[],\"\"]\nc:[]\n"])</script><script>self.__next_f.push([1,"3:[[[\"$\",\"link\",\"0\",{\"rel\":\"stylesheet\",\"href\":\"/ui/_next/static/css/f8da5a6a5b29d249.css\",\"precedence\":\"next\",\"crossOrigin\":\"\"}]],[\"$\",\"$L4\",null,{\"buildId\":\"L9N6TOWJaqSp22Vj96YE4\",\"assetPrefix\":\"/ui\",\"initialCanonicalUrl\":\"/\",\"initialTree\":[\"\",{\"children\":[\"__PAGE__\",{}]},\"$undefined\",\"$undefined\",true],\"initialSeedData\":[\"\",{\"children\":[\"__PAGE__\",{},[\"$L5\",[\"$\",\"$L6\",null,{\"propsForComponent\":{\"params\":{}},\"Component\":\"$7\",\"isStaticGeneration\":true}],null]]},[null,[\"$\",\"html\",null,{\"lang\":\"en\",\"children\":[\"$\",\"body\",null,{\"className\":\"__className_c23dc8\",\"children\":[\"$\",\"$L8\",null,{\"parallelRouterKey\":\"children\",\"segmentPath\":[\"children\"],\"loading\":\"$undefined\",\"loadingStyles\":\"$undefined\",\"loadingScripts\":\"$undefined\",\"hasLoading\":false,\"error\":\"$undefined\",\"errorStyles\":\"$undefined\",\"errorScripts\":\"$undefined\",\"template\":[\"$\",\"$L9\",null,{}],\"templateStyles\":\"$undefined\",\"templateScripts\":\"$undefined\",\"notFound\":[[\"$\",\"title\",null,{\"children\":\"404: This page could not be found.\"}],[\"$\",\"div\",null,{\"style\":{\"fontFamily\":\"system-ui,\\\"Segoe UI\\\",Roboto,Helvetica,Arial,sans-serif,\\\"Apple Color Emoji\\\",\\\"Segoe UI Emoji\\\"\",\"height\":\"100vh\",\"textAlign\":\"center\",\"display\":\"flex\",\"flexDirection\":\"column\",\"alignItems\":\"center\",\"justifyContent\":\"center\"},\"children\":[\"$\",\"div\",null,{\"children\":[[\"$\",\"style\",null,{\"dangerouslySetInnerHTML\":{\"__html\":\"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}\"}}],[\"$\",\"h1\",null,{\"className\":\"next-error-h1\",\"style\":{\"display\":\"inline-block\",\"margin\":\"0 20px 0 0\",\"padding\":\"0 23px 0 0\",\"fontSize\":24,\"fontWeight\":500,\"verticalAlign\":\"top\",\"lineHeight\":\"49px\"},\"children\":\"404\"}],[\"$\",\"div\",null,{\"style\":{\"display\":\"inline-block\"},\"children\":[\"$\",\"h2\",null,{\"style\":{\"fontSize\":14,\"fontWeight\":400,\"lineHeight\":\"49px\",\"margin\":0},\"children\":\"This page could not be found.\"}]}]]}]}]],\"notFoundStyles\":[],\"styles\":null}]}]}],null]],\"initialHead\":[false,\"$La\"],\"globalErrorComponent\":\"$b\",\"missingSlots\":\"$Wc\"}]]\n"])</script><script>self.__next_f.push([1,"a:[[\"$\",\"meta\",\"0\",{\"name\":\"viewport\",\"content\":\"width=device-width, initial-scale=1\"}],[\"$\",\"meta\",\"1\",{\"charSet\":\"utf-8\"}],[\"$\",\"title\",\"2\",{\"children\":\"🚅 LiteLLM\"}],[\"$\",\"meta\",\"3\",{\"name\":\"description\",\"content\":\"LiteLLM Proxy Admin UI\"}],[\"$\",\"link\",\"4\",{\"rel\":\"icon\",\"href\":\"/ui/favicon.ico\",\"type\":\"image/x-icon\",\"sizes\":\"16x16\"}],[\"$\",\"meta\",\"5\",{\"name\":\"next-size-adjust\"}]]\n5:null\n"])</script><script>self.__next_f.push([1,""])</script></body></html>

View file

@ -1,7 +0,0 @@
2:I[77831,[],""]
3:I[90177,["798","static/chunks/798-4baed68da0c5497d.js","931","static/chunks/app/page-37392d6753f8a3d0.js"],""]
4:I[5613,[],""]
5:I[31778,[],""]
0:["L9N6TOWJaqSp22Vj96YE4",[[["",{"children":["__PAGE__",{}]},"$undefined","$undefined",true],["",{"children":["__PAGE__",{},["$L1",["$","$L2",null,{"propsForComponent":{"params":{}},"Component":"$3","isStaticGeneration":true}],null]]},[null,["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_c23dc8","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[],"styles":null}]}]}],null]],[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/f8da5a6a5b29d249.css","precedence":"next","crossOrigin":""}]],"$L6"]]]]
6:[["$","meta","0",{"name":"viewport","content":"width=device-width, initial-scale=1"}],["$","meta","1",{"charSet":"utf-8"}],["$","title","2",{"children":"🚅 LiteLLM"}],["$","meta","3",{"name":"description","content":"LiteLLM Proxy Admin UI"}],["$","link","4",{"rel":"icon","href":"/ui/favicon.ico","type":"image/x-icon","sizes":"16x16"}],["$","meta","5",{"name":"next-size-adjust"}]]
1:null

View file

@ -91,18 +91,19 @@ const CreateKey: React.FC<CreateKeyProps> = ({
} }
}; };
const sendSlackAlert = async () => {
try { const handleModelSelection = (selectedModels: string[]) => {
console.log("Sending Slack alert..."); if (selectedModels.includes("all_models")) {
const response = await slackBudgetAlertsHealthCheck(accessToken); // Select all models except "All Models"
console.log("slackBudgetAlertsHealthCheck Response:", response); const allModelsExceptAll = team ? team.models : userModels;
console.log("Testing Slack alert successful"); form.setFieldsValue({
} catch (error) { models: allModelsExceptAll
console.error("Error sending Slack alert:", error); });
} }
}; };
const handleCopy = () => { const handleCopy = () => {
message.success('API Key copied to clipboard'); message.success('API Key copied to clipboard');
}; };
@ -153,8 +154,11 @@ const CreateKey: React.FC<CreateKeyProps> = ({
mode="multiple" mode="multiple"
placeholder="Select models" placeholder="Select models"
style={{ width: "100%" }} style={{ width: "100%" }}
onChange={(selectedModels) => handleModelSelection(selectedModels)}
> >
<Option key="all_models" value="all_models">
All Models
</Option>
{team && team.models ? ( {team && team.models ? (
team.models.map((model: string) => ( team.models.map((model: string) => (
<Option key={model} value={model}> <Option key={model} value={model}>

View file

@ -216,6 +216,17 @@ const handleEditSubmit = async (formValues: Record<string, any>) => {
setIsDeleteModalOpen(true); setIsDeleteModalOpen(true);
}; };
const handleModelSelection = (selectedModels: string[]) => {
if (selectedModels.includes("all_models")) {
// Select all models except "All Models"
const allModelsExceptAll = userModels.filter(model => model !== "all");
form.setFieldsValue({
models: allModelsExceptAll
});
}
};
const confirmDelete = async () => { const confirmDelete = async () => {
if (teamToDelete == null || teams == null || accessToken == null) { if (teamToDelete == null || teams == null || accessToken == null) {
return; return;
@ -473,7 +484,11 @@ const handleEditSubmit = async (formValues: Record<string, any>) => {
mode="multiple" mode="multiple"
placeholder="Select models" placeholder="Select models"
style={{ width: "100%" }} style={{ width: "100%" }}
onChange={(selectedModels) => handleModelSelection(selectedModels)}
> >
<Select2.Option key="all_models" value="all_models">
All Models
</Select2.Option>
{userModels.map((model) => ( {userModels.map((model) => (
<Select2.Option key={model} value={model}> <Select2.Option key={model} value={model}>
{model} {model}
@ -481,6 +496,7 @@ const handleEditSubmit = async (formValues: Record<string, any>) => {
))} ))}
</Select2> </Select2>
</Form.Item> </Form.Item>
<Form.Item label="Max Budget (USD)" name="max_budget"> <Form.Item label="Max Budget (USD)" name="max_budget">
<InputNumber step={0.01} precision={2} width={200} /> <InputNumber step={0.01} precision={2} width={200} />
</Form.Item> </Form.Item>