forked from phoenix/litellm-mirror
Merge branch 'main' into litellm_edit_teams
This commit is contained in:
commit
58cc11a312
52 changed files with 1343 additions and 608 deletions
|
@ -34,6 +34,7 @@ jobs:
|
||||||
pip install "boto3>=1.28.57"
|
pip install "boto3>=1.28.57"
|
||||||
pip install "aioboto3>=12.3.0"
|
pip install "aioboto3>=12.3.0"
|
||||||
pip install langchain
|
pip install langchain
|
||||||
|
pip install lunary==0.2.5
|
||||||
pip install "langfuse>=2.0.0"
|
pip install "langfuse>=2.0.0"
|
||||||
pip install numpydoc
|
pip install numpydoc
|
||||||
pip install traceloop-sdk==0.0.69
|
pip install traceloop-sdk==0.0.69
|
||||||
|
|
34
README.md
34
README.md
|
@ -25,6 +25,7 @@
|
||||||
</h4>
|
</h4>
|
||||||
|
|
||||||
LiteLLM manages:
|
LiteLLM manages:
|
||||||
|
|
||||||
- Translate inputs to provider's `completion`, `embedding`, and `image_generation` endpoints
|
- Translate inputs to provider's `completion`, `embedding`, and `image_generation` endpoints
|
||||||
- [Consistent output](https://docs.litellm.ai/docs/completion/output), text responses will always be available at `['choices'][0]['message']['content']`
|
- [Consistent output](https://docs.litellm.ai/docs/completion/output), text responses will always be available at `['choices'][0]['message']['content']`
|
||||||
- Retry/fallback logic across multiple deployments (e.g. Azure/OpenAI) - [Router](https://docs.litellm.ai/docs/routing)
|
- Retry/fallback logic across multiple deployments (e.g. Azure/OpenAI) - [Router](https://docs.litellm.ai/docs/routing)
|
||||||
|
@ -38,15 +39,14 @@ LiteLLM manages:
|
||||||
Support for more providers. Missing a provider or LLM Platform, raise a [feature request](https://github.com/BerriAI/litellm/issues/new?assignees=&labels=enhancement&projects=&template=feature_request.yml&title=%5BFeature%5D%3A+).
|
Support for more providers. Missing a provider or LLM Platform, raise a [feature request](https://github.com/BerriAI/litellm/issues/new?assignees=&labels=enhancement&projects=&template=feature_request.yml&title=%5BFeature%5D%3A+).
|
||||||
|
|
||||||
# Usage ([**Docs**](https://docs.litellm.ai/docs/))
|
# Usage ([**Docs**](https://docs.litellm.ai/docs/))
|
||||||
|
|
||||||
> [!IMPORTANT]
|
> [!IMPORTANT]
|
||||||
> LiteLLM v1.0.0 now requires `openai>=1.0.0`. Migration guide [here](https://docs.litellm.ai/docs/migration)
|
> LiteLLM v1.0.0 now requires `openai>=1.0.0`. Migration guide [here](https://docs.litellm.ai/docs/migration)
|
||||||
|
|
||||||
|
|
||||||
<a target="_blank" href="https://colab.research.google.com/github/BerriAI/litellm/blob/main/cookbook/liteLLM_Getting_Started.ipynb">
|
<a target="_blank" href="https://colab.research.google.com/github/BerriAI/litellm/blob/main/cookbook/liteLLM_Getting_Started.ipynb">
|
||||||
<img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/>
|
<img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/>
|
||||||
</a>
|
</a>
|
||||||
|
|
||||||
|
|
||||||
```shell
|
```shell
|
||||||
pip install litellm
|
pip install litellm
|
||||||
```
|
```
|
||||||
|
@ -88,8 +88,10 @@ print(response)
|
||||||
```
|
```
|
||||||
|
|
||||||
## Streaming ([Docs](https://docs.litellm.ai/docs/completion/stream))
|
## Streaming ([Docs](https://docs.litellm.ai/docs/completion/stream))
|
||||||
|
|
||||||
liteLLM supports streaming the model response back, pass `stream=True` to get a streaming iterator in response.
|
liteLLM supports streaming the model response back, pass `stream=True` to get a streaming iterator in response.
|
||||||
Streaming is supported for all models (Bedrock, Huggingface, TogetherAI, Azure, OpenAI, etc.)
|
Streaming is supported for all models (Bedrock, Huggingface, TogetherAI, Azure, OpenAI, etc.)
|
||||||
|
|
||||||
```python
|
```python
|
||||||
from litellm import completion
|
from litellm import completion
|
||||||
response = completion(model="gpt-3.5-turbo", messages=messages, stream=True)
|
response = completion(model="gpt-3.5-turbo", messages=messages, stream=True)
|
||||||
|
@ -103,20 +105,22 @@ for part in response:
|
||||||
```
|
```
|
||||||
|
|
||||||
## Logging Observability ([Docs](https://docs.litellm.ai/docs/observability/callbacks))
|
## Logging Observability ([Docs](https://docs.litellm.ai/docs/observability/callbacks))
|
||||||
LiteLLM exposes pre defined callbacks to send data to Langfuse, DynamoDB, s3 Buckets, LLMonitor, Helicone, Promptlayer, Traceloop, Athina, Slack
|
|
||||||
|
LiteLLM exposes pre defined callbacks to send data to Lunary, Langfuse, DynamoDB, s3 Buckets, Helicone, Promptlayer, Traceloop, Athina, Slack
|
||||||
|
|
||||||
```python
|
```python
|
||||||
from litellm import completion
|
from litellm import completion
|
||||||
|
|
||||||
## set env variables for logging tools
|
## set env variables for logging tools
|
||||||
|
os.environ["LUNARY_PUBLIC_KEY"] = "your-lunary-public-key"
|
||||||
os.environ["LANGFUSE_PUBLIC_KEY"] = ""
|
os.environ["LANGFUSE_PUBLIC_KEY"] = ""
|
||||||
os.environ["LANGFUSE_SECRET_KEY"] = ""
|
os.environ["LANGFUSE_SECRET_KEY"] = ""
|
||||||
os.environ["LLMONITOR_APP_ID"] = "your-llmonitor-app-id"
|
|
||||||
os.environ["ATHINA_API_KEY"] = "your-athina-api-key"
|
os.environ["ATHINA_API_KEY"] = "your-athina-api-key"
|
||||||
|
|
||||||
os.environ["OPENAI_API_KEY"]
|
os.environ["OPENAI_API_KEY"]
|
||||||
|
|
||||||
# set callbacks
|
# set callbacks
|
||||||
litellm.success_callback = ["langfuse", "llmonitor", "athina"] # log input/output to langfuse, llmonitor, supabase, athina etc
|
litellm.success_callback = ["lunary", "langfuse", "athina"] # log input/output to lunary, langfuse, supabase, athina etc
|
||||||
|
|
||||||
#openai call
|
#openai call
|
||||||
response = completion(model="gpt-3.5-turbo", messages=[{"role": "user", "content": "Hi 👋 - i'm openai"}])
|
response = completion(model="gpt-3.5-turbo", messages=[{"role": "user", "content": "Hi 👋 - i'm openai"}])
|
||||||
|
@ -127,6 +131,7 @@ response = completion(model="gpt-3.5-turbo", messages=[{"role": "user", "content
|
||||||
Set Budgets & Rate limits across multiple projects
|
Set Budgets & Rate limits across multiple projects
|
||||||
|
|
||||||
The proxy provides:
|
The proxy provides:
|
||||||
|
|
||||||
1. [Hooks for auth](https://docs.litellm.ai/docs/proxy/virtual_keys#custom-auth)
|
1. [Hooks for auth](https://docs.litellm.ai/docs/proxy/virtual_keys#custom-auth)
|
||||||
2. [Hooks for logging](https://docs.litellm.ai/docs/proxy/logging#step-1---create-your-custom-litellm-callback-class)
|
2. [Hooks for logging](https://docs.litellm.ai/docs/proxy/logging#step-1---create-your-custom-litellm-callback-class)
|
||||||
3. [Cost tracking](https://docs.litellm.ai/docs/proxy/virtual_keys#tracking-spend)
|
3. [Cost tracking](https://docs.litellm.ai/docs/proxy/virtual_keys#tracking-spend)
|
||||||
|
@ -141,6 +146,7 @@ pip install 'litellm[proxy]'
|
||||||
```
|
```
|
||||||
|
|
||||||
### Step 1: Start litellm proxy
|
### Step 1: Start litellm proxy
|
||||||
|
|
||||||
```shell
|
```shell
|
||||||
$ litellm --model huggingface/bigcode/starcoder
|
$ litellm --model huggingface/bigcode/starcoder
|
||||||
|
|
||||||
|
@ -148,6 +154,7 @@ $ litellm --model huggingface/bigcode/starcoder
|
||||||
```
|
```
|
||||||
|
|
||||||
### Step 2: Make ChatCompletions Request to Proxy
|
### Step 2: Make ChatCompletions Request to Proxy
|
||||||
|
|
||||||
```python
|
```python
|
||||||
import openai # openai v1.0.0+
|
import openai # openai v1.0.0+
|
||||||
client = openai.OpenAI(api_key="anything",base_url="http://0.0.0.0:4000") # set proxy to base_url
|
client = openai.OpenAI(api_key="anything",base_url="http://0.0.0.0:4000") # set proxy to base_url
|
||||||
|
@ -163,6 +170,7 @@ print(response)
|
||||||
```
|
```
|
||||||
|
|
||||||
## Proxy Key Management ([Docs](https://docs.litellm.ai/docs/proxy/virtual_keys))
|
## Proxy Key Management ([Docs](https://docs.litellm.ai/docs/proxy/virtual_keys))
|
||||||
|
|
||||||
UI on `/ui` on your proxy server
|
UI on `/ui` on your proxy server
|
||||||

|

|
||||||
|
|
||||||
|
@ -170,6 +178,7 @@ Set budgets and rate limits across multiple projects
|
||||||
`POST /key/generate`
|
`POST /key/generate`
|
||||||
|
|
||||||
### Request
|
### Request
|
||||||
|
|
||||||
```shell
|
```shell
|
||||||
curl 'http://0.0.0.0:4000/key/generate' \
|
curl 'http://0.0.0.0:4000/key/generate' \
|
||||||
--header 'Authorization: Bearer sk-1234' \
|
--header 'Authorization: Bearer sk-1234' \
|
||||||
|
@ -178,6 +187,7 @@ curl 'http://0.0.0.0:4000/key/generate' \
|
||||||
```
|
```
|
||||||
|
|
||||||
### Expected Response
|
### Expected Response
|
||||||
|
|
||||||
```shell
|
```shell
|
||||||
{
|
{
|
||||||
"key": "sk-kdEXbIqZRwEeEiHwdg7sFA", # Bearer token
|
"key": "sk-kdEXbIqZRwEeEiHwdg7sFA", # Bearer token
|
||||||
|
@ -186,12 +196,13 @@ curl 'http://0.0.0.0:4000/key/generate' \
|
||||||
```
|
```
|
||||||
|
|
||||||
## Supported Providers ([Docs](https://docs.litellm.ai/docs/providers))
|
## Supported Providers ([Docs](https://docs.litellm.ai/docs/providers))
|
||||||
|
|
||||||
| Provider | [Completion](https://docs.litellm.ai/docs/#basic-usage) | [Streaming](https://docs.litellm.ai/docs/completion/stream#streaming-responses) | [Async Completion](https://docs.litellm.ai/docs/completion/stream#async-completion) | [Async Streaming](https://docs.litellm.ai/docs/completion/stream#async-streaming) | [Async Embedding](https://docs.litellm.ai/docs/embedding/supported_embedding) | [Async Image Generation](https://docs.litellm.ai/docs/image_generation) |
|
| Provider | [Completion](https://docs.litellm.ai/docs/#basic-usage) | [Streaming](https://docs.litellm.ai/docs/completion/stream#streaming-responses) | [Async Completion](https://docs.litellm.ai/docs/completion/stream#async-completion) | [Async Streaming](https://docs.litellm.ai/docs/completion/stream#async-streaming) | [Async Embedding](https://docs.litellm.ai/docs/embedding/supported_embedding) | [Async Image Generation](https://docs.litellm.ai/docs/image_generation) |
|
||||||
| ------------- | ------------- | ------------- | ------------- | ------------- | ------------- | ------------- |
|
| ----------------------------------------------------------------------------------- | ------------------------------------------------------- | ------------------------------------------------------------------------------- | ----------------------------------------------------------------------------------- | --------------------------------------------------------------------------------- | ----------------------------------------------------------------------------- | ----------------------------------------------------------------------- |
|
||||||
| [openai](https://docs.litellm.ai/docs/providers/openai) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ |
|
| [openai](https://docs.litellm.ai/docs/providers/openai) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ |
|
||||||
| [azure](https://docs.litellm.ai/docs/providers/azure) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ |
|
| [azure](https://docs.litellm.ai/docs/providers/azure) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ |
|
||||||
| [aws - sagemaker](https://docs.litellm.ai/docs/providers/aws_sagemaker) | ✅ | ✅ | ✅ | ✅ | ✅ |
|
| [aws - sagemaker](https://docs.litellm.ai/docs/providers/aws_sagemaker) | ✅ | ✅ | ✅ | ✅ | ✅ |
|
||||||
| [aws - bedrock](https://docs.litellm.ai/docs/providers/bedrock) | ✅ | ✅ | ✅ | ✅ |✅ |
|
| [aws - bedrock](https://docs.litellm.ai/docs/providers/bedrock) | ✅ | ✅ | ✅ | ✅ | ✅ |
|
||||||
| [google - vertex_ai [Gemini]](https://docs.litellm.ai/docs/providers/vertex) | ✅ | ✅ | ✅ | ✅ |
|
| [google - vertex_ai [Gemini]](https://docs.litellm.ai/docs/providers/vertex) | ✅ | ✅ | ✅ | ✅ |
|
||||||
| [google - palm](https://docs.litellm.ai/docs/providers/palm) | ✅ | ✅ | ✅ | ✅ |
|
| [google - palm](https://docs.litellm.ai/docs/providers/palm) | ✅ | ✅ | ✅ | ✅ |
|
||||||
| [google AI Studio - gemini](https://docs.litellm.ai/docs/providers/gemini) | ✅ | | ✅ | | |
|
| [google AI Studio - gemini](https://docs.litellm.ai/docs/providers/gemini) | ✅ | | ✅ | | |
|
||||||
|
@ -217,25 +228,28 @@ curl 'http://0.0.0.0:4000/key/generate' \
|
||||||
| [voyage ai](https://docs.litellm.ai/docs/providers/voyage) | | | | | ✅ |
|
| [voyage ai](https://docs.litellm.ai/docs/providers/voyage) | | | | | ✅ |
|
||||||
| [xinference [Xorbits Inference]](https://docs.litellm.ai/docs/providers/xinference) | | | | | ✅ |
|
| [xinference [Xorbits Inference]](https://docs.litellm.ai/docs/providers/xinference) | | | | | ✅ |
|
||||||
|
|
||||||
|
|
||||||
[**Read the Docs**](https://docs.litellm.ai/docs/)
|
[**Read the Docs**](https://docs.litellm.ai/docs/)
|
||||||
|
|
||||||
## Contributing
|
## Contributing
|
||||||
|
|
||||||
To contribute: Clone the repo locally -> Make a change -> Submit a PR with the change.
|
To contribute: Clone the repo locally -> Make a change -> Submit a PR with the change.
|
||||||
|
|
||||||
Here's how to modify the repo locally:
|
Here's how to modify the repo locally:
|
||||||
Step 1: Clone the repo
|
Step 1: Clone the repo
|
||||||
|
|
||||||
```
|
```
|
||||||
git clone https://github.com/BerriAI/litellm.git
|
git clone https://github.com/BerriAI/litellm.git
|
||||||
```
|
```
|
||||||
|
|
||||||
Step 2: Navigate into the project, and install dependencies:
|
Step 2: Navigate into the project, and install dependencies:
|
||||||
|
|
||||||
```
|
```
|
||||||
cd litellm
|
cd litellm
|
||||||
poetry install
|
poetry install
|
||||||
```
|
```
|
||||||
|
|
||||||
Step 3: Test your change:
|
Step 3: Test your change:
|
||||||
|
|
||||||
```
|
```
|
||||||
cd litellm/tests # pwd: Documents/litellm/litellm/tests
|
cd litellm/tests # pwd: Documents/litellm/litellm/tests
|
||||||
poetry run flake8
|
poetry run flake8
|
||||||
|
@ -243,6 +257,7 @@ poetry run pytest .
|
||||||
```
|
```
|
||||||
|
|
||||||
Step 4: Submit a PR with your changes! 🚀
|
Step 4: Submit a PR with your changes! 🚀
|
||||||
|
|
||||||
- push your fork to your GitHub repo
|
- push your fork to your GitHub repo
|
||||||
- submit a PR from there
|
- submit a PR from there
|
||||||
|
|
||||||
|
@ -260,12 +275,14 @@ This covers:
|
||||||
- ✅ **Secure access with Single Sign-On**
|
- ✅ **Secure access with Single Sign-On**
|
||||||
|
|
||||||
# Support / talk with founders
|
# Support / talk with founders
|
||||||
|
|
||||||
- [Schedule Demo 👋](https://calendly.com/d/4mp-gd3-k5k/berriai-1-1-onboarding-litellm-hosted-version)
|
- [Schedule Demo 👋](https://calendly.com/d/4mp-gd3-k5k/berriai-1-1-onboarding-litellm-hosted-version)
|
||||||
- [Community Discord 💭](https://discord.gg/wuPM9dRgDw)
|
- [Community Discord 💭](https://discord.gg/wuPM9dRgDw)
|
||||||
- Our numbers 📞 +1 (770) 8783-106 / +1 (412) 618-6238
|
- Our numbers 📞 +1 (770) 8783-106 / +1 (412) 618-6238
|
||||||
- Our emails ✉️ ishaan@berri.ai / krrish@berri.ai
|
- Our emails ✉️ ishaan@berri.ai / krrish@berri.ai
|
||||||
|
|
||||||
# Why did we build this
|
# Why did we build this
|
||||||
|
|
||||||
- **Need for simplicity**: Our code started to get extremely complicated managing & translating calls between Azure, OpenAI and Cohere.
|
- **Need for simplicity**: Our code started to get extremely complicated managing & translating calls between Azure, OpenAI and Cohere.
|
||||||
|
|
||||||
# Contributors
|
# Contributors
|
||||||
|
@ -282,4 +299,3 @@ This covers:
|
||||||
<a href="https://github.com/BerriAI/litellm/graphs/contributors">
|
<a href="https://github.com/BerriAI/litellm/graphs/contributors">
|
||||||
<img src="https://contrib.rocks/image?repo=BerriAI/litellm" />
|
<img src="https://contrib.rocks/image?repo=BerriAI/litellm" />
|
||||||
</a>
|
</a>
|
||||||
|
|
||||||
|
|
348
cookbook/logging_observability/LiteLLM_Lunary.ipynb
vendored
Normal file
348
cookbook/logging_observability/LiteLLM_Lunary.ipynb
vendored
Normal file
|
@ -0,0 +1,348 @@
|
||||||
|
{
|
||||||
|
"cells": [
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {
|
||||||
|
"id": "4FbDOmcj2VkM"
|
||||||
|
},
|
||||||
|
"source": [
|
||||||
|
"## Use LiteLLM with Langfuse\n",
|
||||||
|
"https://docs.litellm.ai/docs/observability/langfuse_integration"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {
|
||||||
|
"id": "21W8Woog26Ns"
|
||||||
|
},
|
||||||
|
"source": [
|
||||||
|
"## Install Dependencies"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {
|
||||||
|
"id": "xrjKLBxhxu2L"
|
||||||
|
},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"%pip install litellm lunary"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {
|
||||||
|
"id": "jHEu-TjZ29PJ"
|
||||||
|
},
|
||||||
|
"source": [
|
||||||
|
"## Set Env Variables"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 3,
|
||||||
|
"metadata": {
|
||||||
|
"id": "QWd9rTysxsWO"
|
||||||
|
},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"import litellm\n",
|
||||||
|
"from litellm import completion\n",
|
||||||
|
"import os\n",
|
||||||
|
"\n",
|
||||||
|
"# from https://app.lunary.ai/\n",
|
||||||
|
"os.environ[\"LUNARY_PUBLIC_KEY\"] = \"\"\n",
|
||||||
|
"\n",
|
||||||
|
"\n",
|
||||||
|
"# LLM provider keys\n",
|
||||||
|
"# You can use any of the litellm supported providers: https://docs.litellm.ai/docs/providers\n",
|
||||||
|
"os.environ['OPENAI_API_KEY'] = \"\"\n"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {
|
||||||
|
"id": "NodQl0hp3Lma"
|
||||||
|
},
|
||||||
|
"source": [
|
||||||
|
"## Set Lunary as a callback for sending data\n",
|
||||||
|
"## OpenAI completion call"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 4,
|
||||||
|
"metadata": {
|
||||||
|
"colab": {
|
||||||
|
"base_uri": "https://localhost:8080/"
|
||||||
|
},
|
||||||
|
"id": "vNAuwJY1yp_F",
|
||||||
|
"outputId": "c3a71e26-13f5-4379-fac9-409290ba79bb"
|
||||||
|
},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "stdout",
|
||||||
|
"output_type": "stream",
|
||||||
|
"text": [
|
||||||
|
"[Choices(finish_reason='stop', index=0, message=Message(content='Hello! How can I assist you today?', role='assistant'))]ModelResponse(id='chatcmpl-8xIWykI0GiJSmYtXYuB8Z363kpIBm', choices=[Choices(finish_reason='stop', index=0, message=Message(content='Hello! How can I assist you today?', role='assistant'))], created=1709143276, model='gpt-3.5-turbo-0125', object='chat.completion', system_fingerprint='fp_86156a94a0', usage=Usage(completion_tokens=9, prompt_tokens=15, total_tokens=24))\n",
|
||||||
|
"\n",
|
||||||
|
"[Lunary] Add event: {\n",
|
||||||
|
" \"event\": \"start\",\n",
|
||||||
|
" \"type\": \"llm\",\n",
|
||||||
|
" \"name\": \"gpt-3.5-turbo\",\n",
|
||||||
|
" \"runId\": \"a363776a-bd07-4474-bce2-193067f01b2e\",\n",
|
||||||
|
" \"timestamp\": \"2024-02-28T18:01:15.188153+00:00\",\n",
|
||||||
|
" \"input\": {\n",
|
||||||
|
" \"role\": \"user\",\n",
|
||||||
|
" \"content\": \"Hi \\ud83d\\udc4b - i'm openai\"\n",
|
||||||
|
" },\n",
|
||||||
|
" \"extra\": {},\n",
|
||||||
|
" \"runtime\": \"litellm\",\n",
|
||||||
|
" \"metadata\": {}\n",
|
||||||
|
"}\n",
|
||||||
|
"\n",
|
||||||
|
"\n",
|
||||||
|
"[Lunary] Add event: {\n",
|
||||||
|
" \"event\": \"end\",\n",
|
||||||
|
" \"type\": \"llm\",\n",
|
||||||
|
" \"runId\": \"a363776a-bd07-4474-bce2-193067f01b2e\",\n",
|
||||||
|
" \"timestamp\": \"2024-02-28T18:01:16.846581+00:00\",\n",
|
||||||
|
" \"output\": {\n",
|
||||||
|
" \"role\": \"assistant\",\n",
|
||||||
|
" \"content\": \"Hello! How can I assist you today?\"\n",
|
||||||
|
" },\n",
|
||||||
|
" \"runtime\": \"litellm\",\n",
|
||||||
|
" \"tokensUsage\": {\n",
|
||||||
|
" \"completion\": 9,\n",
|
||||||
|
" \"prompt\": 15\n",
|
||||||
|
" }\n",
|
||||||
|
"}\n",
|
||||||
|
"\n",
|
||||||
|
"\n"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "stderr",
|
||||||
|
"output_type": "stream",
|
||||||
|
"text": [
|
||||||
|
"--- Logging error ---\n",
|
||||||
|
"Traceback (most recent call last):\n",
|
||||||
|
" File \"/Users/vince/Library/Caches/pypoetry/virtualenvs/litellm-7WKnDWGw-py3.12/lib/python3.12/site-packages/urllib3/connectionpool.py\", line 537, in _make_request\n",
|
||||||
|
" response = conn.getresponse()\n",
|
||||||
|
" ^^^^^^^^^^^^^^^^^^\n",
|
||||||
|
" File \"/Users/vince/Library/Caches/pypoetry/virtualenvs/litellm-7WKnDWGw-py3.12/lib/python3.12/site-packages/urllib3/connection.py\", line 466, in getresponse\n",
|
||||||
|
" httplib_response = super().getresponse()\n",
|
||||||
|
" ^^^^^^^^^^^^^^^^^^^^^\n",
|
||||||
|
" File \"/opt/homebrew/Cellar/python@3.12/3.12.2_1/Frameworks/Python.framework/Versions/3.12/lib/python3.12/http/client.py\", line 1423, in getresponse\n",
|
||||||
|
" response.begin()\n",
|
||||||
|
" File \"/opt/homebrew/Cellar/python@3.12/3.12.2_1/Frameworks/Python.framework/Versions/3.12/lib/python3.12/http/client.py\", line 331, in begin\n",
|
||||||
|
" version, status, reason = self._read_status()\n",
|
||||||
|
" ^^^^^^^^^^^^^^^^^^^\n",
|
||||||
|
" File \"/opt/homebrew/Cellar/python@3.12/3.12.2_1/Frameworks/Python.framework/Versions/3.12/lib/python3.12/http/client.py\", line 292, in _read_status\n",
|
||||||
|
" line = str(self.fp.readline(_MAXLINE + 1), \"iso-8859-1\")\n",
|
||||||
|
" ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n",
|
||||||
|
" File \"/opt/homebrew/Cellar/python@3.12/3.12.2_1/Frameworks/Python.framework/Versions/3.12/lib/python3.12/socket.py\", line 707, in readinto\n",
|
||||||
|
" return self._sock.recv_into(b)\n",
|
||||||
|
" ^^^^^^^^^^^^^^^^^^^^^^^\n",
|
||||||
|
"TimeoutError: timed out\n",
|
||||||
|
"\n",
|
||||||
|
"The above exception was the direct cause of the following exception:\n",
|
||||||
|
"\n",
|
||||||
|
"Traceback (most recent call last):\n",
|
||||||
|
" File \"/Users/vince/Library/Caches/pypoetry/virtualenvs/litellm-7WKnDWGw-py3.12/lib/python3.12/site-packages/requests/adapters.py\", line 486, in send\n",
|
||||||
|
" resp = conn.urlopen(\n",
|
||||||
|
" ^^^^^^^^^^^^^\n",
|
||||||
|
" File \"/Users/vince/Library/Caches/pypoetry/virtualenvs/litellm-7WKnDWGw-py3.12/lib/python3.12/site-packages/urllib3/connectionpool.py\", line 847, in urlopen\n",
|
||||||
|
" retries = retries.increment(\n",
|
||||||
|
" ^^^^^^^^^^^^^^^^^^\n",
|
||||||
|
" File \"/Users/vince/Library/Caches/pypoetry/virtualenvs/litellm-7WKnDWGw-py3.12/lib/python3.12/site-packages/urllib3/util/retry.py\", line 470, in increment\n",
|
||||||
|
" raise reraise(type(error), error, _stacktrace)\n",
|
||||||
|
" ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n",
|
||||||
|
" File \"/Users/vince/Library/Caches/pypoetry/virtualenvs/litellm-7WKnDWGw-py3.12/lib/python3.12/site-packages/urllib3/util/util.py\", line 39, in reraise\n",
|
||||||
|
" raise value\n",
|
||||||
|
" File \"/Users/vince/Library/Caches/pypoetry/virtualenvs/litellm-7WKnDWGw-py3.12/lib/python3.12/site-packages/urllib3/connectionpool.py\", line 793, in urlopen\n",
|
||||||
|
" response = self._make_request(\n",
|
||||||
|
" ^^^^^^^^^^^^^^^^^^^\n",
|
||||||
|
" File \"/Users/vince/Library/Caches/pypoetry/virtualenvs/litellm-7WKnDWGw-py3.12/lib/python3.12/site-packages/urllib3/connectionpool.py\", line 539, in _make_request\n",
|
||||||
|
" self._raise_timeout(err=e, url=url, timeout_value=read_timeout)\n",
|
||||||
|
" File \"/Users/vince/Library/Caches/pypoetry/virtualenvs/litellm-7WKnDWGw-py3.12/lib/python3.12/site-packages/urllib3/connectionpool.py\", line 370, in _raise_timeout\n",
|
||||||
|
" raise ReadTimeoutError(\n",
|
||||||
|
"urllib3.exceptions.ReadTimeoutError: HTTPConnectionPool(host='localhost', port=3333): Read timed out. (read timeout=5)\n",
|
||||||
|
"\n",
|
||||||
|
"During handling of the above exception, another exception occurred:\n",
|
||||||
|
"\n",
|
||||||
|
"Traceback (most recent call last):\n",
|
||||||
|
" File \"/Users/vince/Library/Caches/pypoetry/virtualenvs/litellm-7WKnDWGw-py3.12/lib/python3.12/site-packages/lunary/consumer.py\", line 59, in send_batch\n",
|
||||||
|
" response = requests.post(\n",
|
||||||
|
" ^^^^^^^^^^^^^^\n",
|
||||||
|
" File \"/Users/vince/Library/Caches/pypoetry/virtualenvs/litellm-7WKnDWGw-py3.12/lib/python3.12/site-packages/requests/api.py\", line 115, in post\n",
|
||||||
|
" return request(\"post\", url, data=data, json=json, **kwargs)\n",
|
||||||
|
" ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n",
|
||||||
|
" File \"/Users/vince/Library/Caches/pypoetry/virtualenvs/litellm-7WKnDWGw-py3.12/lib/python3.12/site-packages/requests/api.py\", line 59, in request\n",
|
||||||
|
" return session.request(method=method, url=url, **kwargs)\n",
|
||||||
|
" ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n",
|
||||||
|
" File \"/Users/vince/Library/Caches/pypoetry/virtualenvs/litellm-7WKnDWGw-py3.12/lib/python3.12/site-packages/requests/sessions.py\", line 589, in request\n",
|
||||||
|
" resp = self.send(prep, **send_kwargs)\n",
|
||||||
|
" ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n",
|
||||||
|
" File \"/Users/vince/Library/Caches/pypoetry/virtualenvs/litellm-7WKnDWGw-py3.12/lib/python3.12/site-packages/requests/sessions.py\", line 703, in send\n",
|
||||||
|
" r = adapter.send(request, **kwargs)\n",
|
||||||
|
" ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n",
|
||||||
|
" File \"/Users/vince/Library/Caches/pypoetry/virtualenvs/litellm-7WKnDWGw-py3.12/lib/python3.12/site-packages/requests/adapters.py\", line 532, in send\n",
|
||||||
|
" raise ReadTimeout(e, request=request)\n",
|
||||||
|
"requests.exceptions.ReadTimeout: HTTPConnectionPool(host='localhost', port=3333): Read timed out. (read timeout=5)\n",
|
||||||
|
"\n",
|
||||||
|
"During handling of the above exception, another exception occurred:\n",
|
||||||
|
"\n",
|
||||||
|
"Traceback (most recent call last):\n",
|
||||||
|
" File \"/opt/homebrew/Cellar/python@3.12/3.12.2_1/Frameworks/Python.framework/Versions/3.12/lib/python3.12/logging/__init__.py\", line 1160, in emit\n",
|
||||||
|
" msg = self.format(record)\n",
|
||||||
|
" ^^^^^^^^^^^^^^^^^^^\n",
|
||||||
|
" File \"/opt/homebrew/Cellar/python@3.12/3.12.2_1/Frameworks/Python.framework/Versions/3.12/lib/python3.12/logging/__init__.py\", line 999, in format\n",
|
||||||
|
" return fmt.format(record)\n",
|
||||||
|
" ^^^^^^^^^^^^^^^^^^\n",
|
||||||
|
" File \"/opt/homebrew/Cellar/python@3.12/3.12.2_1/Frameworks/Python.framework/Versions/3.12/lib/python3.12/logging/__init__.py\", line 703, in format\n",
|
||||||
|
" record.message = record.getMessage()\n",
|
||||||
|
" ^^^^^^^^^^^^^^^^^^^\n",
|
||||||
|
" File \"/opt/homebrew/Cellar/python@3.12/3.12.2_1/Frameworks/Python.framework/Versions/3.12/lib/python3.12/logging/__init__.py\", line 392, in getMessage\n",
|
||||||
|
" msg = msg % self.args\n",
|
||||||
|
" ~~~~^~~~~~~~~~~\n",
|
||||||
|
"TypeError: not all arguments converted during string formatting\n",
|
||||||
|
"Call stack:\n",
|
||||||
|
" File \"/opt/homebrew/Cellar/python@3.12/3.12.2_1/Frameworks/Python.framework/Versions/3.12/lib/python3.12/threading.py\", line 1030, in _bootstrap\n",
|
||||||
|
" self._bootstrap_inner()\n",
|
||||||
|
" File \"/opt/homebrew/Cellar/python@3.12/3.12.2_1/Frameworks/Python.framework/Versions/3.12/lib/python3.12/threading.py\", line 1073, in _bootstrap_inner\n",
|
||||||
|
" self.run()\n",
|
||||||
|
" File \"/Users/vince/Library/Caches/pypoetry/virtualenvs/litellm-7WKnDWGw-py3.12/lib/python3.12/site-packages/lunary/consumer.py\", line 24, in run\n",
|
||||||
|
" self.send_batch()\n",
|
||||||
|
" File \"/Users/vince/Library/Caches/pypoetry/virtualenvs/litellm-7WKnDWGw-py3.12/lib/python3.12/site-packages/lunary/consumer.py\", line 73, in send_batch\n",
|
||||||
|
" logging.error(\"[Lunary] Error sending events\", e)\n",
|
||||||
|
"Message: '[Lunary] Error sending events'\n",
|
||||||
|
"Arguments: (ReadTimeout(ReadTimeoutError(\"HTTPConnectionPool(host='localhost', port=3333): Read timed out. (read timeout=5)\")),)\n"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"# set langfuse as a callback, litellm will send the data to langfuse\n",
|
||||||
|
"litellm.success_callback = [\"lunary\"]\n",
|
||||||
|
"\n",
|
||||||
|
"# openai call\n",
|
||||||
|
"response = completion(\n",
|
||||||
|
" model=\"gpt-3.5-turbo\",\n",
|
||||||
|
" messages=[\n",
|
||||||
|
" {\"role\": \"user\", \"content\": \"Hi 👋 - i'm openai\"}\n",
|
||||||
|
" ]\n",
|
||||||
|
")\n",
|
||||||
|
"\n",
|
||||||
|
"print(response)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"# Using LiteLLM with Lunary Templates\n",
|
||||||
|
"\n",
|
||||||
|
"You can use LiteLLM seamlessly with Lunary templates to manage your prompts and completions.\n",
|
||||||
|
"\n",
|
||||||
|
"Assuming you have created a template \"test-template\" with a variable \"question\", you can use it like this:"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 5,
|
||||||
|
"metadata": {
|
||||||
|
"colab": {
|
||||||
|
"base_uri": "https://localhost:8080/"
|
||||||
|
},
|
||||||
|
"id": "2PMSLc_FziJL",
|
||||||
|
"outputId": "1c37605e-b406-4ffc-aafd-e1983489c6be"
|
||||||
|
},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "stdout",
|
||||||
|
"output_type": "stream",
|
||||||
|
"text": [
|
||||||
|
"[Choices(finish_reason='stop', index=0, message=Message(content='Hello! How can I assist you today?', role='assistant'))]ModelResponse(id='chatcmpl-8xIXegwpudg4YKnLB6pmpFGXqTHcH', choices=[Choices(finish_reason='stop', index=0, message=Message(content='Hello! How can I assist you today?', role='assistant'))], created=1709143318, model='gpt-4-0125-preview', object='chat.completion', system_fingerprint='fp_c8aa5a06d6', usage=Usage(completion_tokens=9, prompt_tokens=21, total_tokens=30))\n",
|
||||||
|
"\n",
|
||||||
|
"[Lunary] Add event: {\n",
|
||||||
|
" \"event\": \"start\",\n",
|
||||||
|
" \"type\": \"llm\",\n",
|
||||||
|
" \"name\": \"gpt-4-turbo-preview\",\n",
|
||||||
|
" \"runId\": \"3a5b698d-cb55-4b3b-ab6d-04d2b99e40cb\",\n",
|
||||||
|
" \"timestamp\": \"2024-02-28T18:01:56.746249+00:00\",\n",
|
||||||
|
" \"input\": [\n",
|
||||||
|
" {\n",
|
||||||
|
" \"role\": \"system\",\n",
|
||||||
|
" \"content\": \"You are an helpful assistant.\"\n",
|
||||||
|
" },\n",
|
||||||
|
" {\n",
|
||||||
|
" \"role\": \"user\",\n",
|
||||||
|
" \"content\": \"Hi! Hello!\"\n",
|
||||||
|
" }\n",
|
||||||
|
" ],\n",
|
||||||
|
" \"extra\": {\n",
|
||||||
|
" \"temperature\": 1,\n",
|
||||||
|
" \"max_tokens\": 100\n",
|
||||||
|
" },\n",
|
||||||
|
" \"runtime\": \"litellm\",\n",
|
||||||
|
" \"metadata\": {}\n",
|
||||||
|
"}\n",
|
||||||
|
"\n",
|
||||||
|
"\n",
|
||||||
|
"[Lunary] Add event: {\n",
|
||||||
|
" \"event\": \"end\",\n",
|
||||||
|
" \"type\": \"llm\",\n",
|
||||||
|
" \"runId\": \"3a5b698d-cb55-4b3b-ab6d-04d2b99e40cb\",\n",
|
||||||
|
" \"timestamp\": \"2024-02-28T18:01:58.741244+00:00\",\n",
|
||||||
|
" \"output\": {\n",
|
||||||
|
" \"role\": \"assistant\",\n",
|
||||||
|
" \"content\": \"Hello! How can I assist you today?\"\n",
|
||||||
|
" },\n",
|
||||||
|
" \"runtime\": \"litellm\",\n",
|
||||||
|
" \"tokensUsage\": {\n",
|
||||||
|
" \"completion\": 9,\n",
|
||||||
|
" \"prompt\": 21\n",
|
||||||
|
" }\n",
|
||||||
|
"}\n",
|
||||||
|
"\n",
|
||||||
|
"\n"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"import lunary\n",
|
||||||
|
"from litellm import completion\n",
|
||||||
|
"\n",
|
||||||
|
"template = lunary.render_template(\"test-template\", {\"question\": \"Hello!\"})\n",
|
||||||
|
"\n",
|
||||||
|
"response = completion(**template)\n",
|
||||||
|
"\n",
|
||||||
|
"print(response)"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"metadata": {
|
||||||
|
"colab": {
|
||||||
|
"provenance": []
|
||||||
|
},
|
||||||
|
"kernelspec": {
|
||||||
|
"display_name": "Python 3",
|
||||||
|
"name": "python3"
|
||||||
|
},
|
||||||
|
"language_info": {
|
||||||
|
"codemirror_mode": {
|
||||||
|
"name": "ipython",
|
||||||
|
"version": 3
|
||||||
|
},
|
||||||
|
"file_extension": ".py",
|
||||||
|
"mimetype": "text/x-python",
|
||||||
|
"name": "python",
|
||||||
|
"nbconvert_exporter": "python",
|
||||||
|
"pygments_lexer": "ipython3",
|
||||||
|
"version": "3.12.2"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"nbformat": 4,
|
||||||
|
"nbformat_minor": 0
|
||||||
|
}
|
|
@ -33,7 +33,7 @@
|
||||||
- Call all models using the OpenAI format - `completion(model, messages)`
|
- Call all models using the OpenAI format - `completion(model, messages)`
|
||||||
- Text responses will always be available at `['choices'][0]['message']['content']`
|
- Text responses will always be available at `['choices'][0]['message']['content']`
|
||||||
- **Error Handling** Using Model Fallbacks (if `GPT-4` fails, try `llama2`)
|
- **Error Handling** Using Model Fallbacks (if `GPT-4` fails, try `llama2`)
|
||||||
- **Logging** - Log Requests, Responses and Errors to `Supabase`, `Posthog`, `Mixpanel`, `Sentry`, `LLMonitor`,`Athina`, `Helicone` (Any of the supported providers here: https://litellm.readthedocs.io/en/latest/advanced/
|
- **Logging** - Log Requests, Responses and Errors to `Supabase`, `Posthog`, `Mixpanel`, `Sentry`, `Lunary`,`Athina`, `Helicone` (Any of the supported providers here: https://litellm.readthedocs.io/en/latest/advanced/
|
||||||
|
|
||||||
**Example: Logs sent to Supabase**
|
**Example: Logs sent to Supabase**
|
||||||
<img width="1015" alt="Screenshot 2023-08-11 at 4 02 46 PM" src="https://github.com/ishaan-jaff/proxy-server/assets/29436595/237557b8-ba09-4917-982c-8f3e1b2c8d08">
|
<img width="1015" alt="Screenshot 2023-08-11 at 4 02 46 PM" src="https://github.com/ishaan-jaff/proxy-server/assets/29436595/237557b8-ba09-4917-982c-8f3e1b2c8d08">
|
||||||
|
|
|
@ -30,13 +30,15 @@ Email us @ krrish@berri.ai
|
||||||
Next Steps 👉 [Call all supported models - e.g. Claude-2, Llama2-70b, etc.](./proxy_api.md#supported-models)
|
Next Steps 👉 [Call all supported models - e.g. Claude-2, Llama2-70b, etc.](./proxy_api.md#supported-models)
|
||||||
|
|
||||||
More details 👉
|
More details 👉
|
||||||
* [Completion() function details](./completion/)
|
|
||||||
* [All supported models / providers on LiteLLM](./providers/)
|
- [Completion() function details](./completion/)
|
||||||
* [Build your own OpenAI proxy](https://github.com/BerriAI/liteLLM-proxy/tree/main)
|
- [All supported models / providers on LiteLLM](./providers/)
|
||||||
|
- [Build your own OpenAI proxy](https://github.com/BerriAI/liteLLM-proxy/tree/main)
|
||||||
|
|
||||||
## streaming
|
## streaming
|
||||||
|
|
||||||
Same example from before. Just pass in `stream=True` in the completion args.
|
Same example from before. Just pass in `stream=True` in the completion args.
|
||||||
|
|
||||||
```python
|
```python
|
||||||
from litellm import completion
|
from litellm import completion
|
||||||
|
|
||||||
|
@ -56,8 +58,9 @@ print(response)
|
||||||
```
|
```
|
||||||
|
|
||||||
More details 👉
|
More details 👉
|
||||||
* [streaming + async](./completion/stream.md)
|
|
||||||
* [tutorial for streaming Llama2 on TogetherAI](./tutorials/TogetherAI_liteLLM.md)
|
- [streaming + async](./completion/stream.md)
|
||||||
|
- [tutorial for streaming Llama2 on TogetherAI](./tutorials/TogetherAI_liteLLM.md)
|
||||||
|
|
||||||
## exception handling
|
## exception handling
|
||||||
|
|
||||||
|
@ -76,25 +79,28 @@ except OpenAIError as e:
|
||||||
```
|
```
|
||||||
|
|
||||||
## Logging Observability - Log LLM Input/Output ([Docs](https://docs.litellm.ai/docs/observability/callbacks))
|
## Logging Observability - Log LLM Input/Output ([Docs](https://docs.litellm.ai/docs/observability/callbacks))
|
||||||
LiteLLM exposes pre defined callbacks to send data to Langfuse, LLMonitor, Helicone, Promptlayer, Traceloop, Slack
|
|
||||||
|
LiteLLM exposes pre defined callbacks to send data to Lunary, Langfuse, Helicone, Promptlayer, Traceloop, Slack
|
||||||
|
|
||||||
```python
|
```python
|
||||||
from litellm import completion
|
from litellm import completion
|
||||||
|
|
||||||
## set env variables for logging tools
|
## set env variables for logging tools
|
||||||
|
os.environ["LUNARY_PUBLIC_KEY"] = "your-lunary-public-key"
|
||||||
os.environ["LANGFUSE_PUBLIC_KEY"] = ""
|
os.environ["LANGFUSE_PUBLIC_KEY"] = ""
|
||||||
os.environ["LANGFUSE_SECRET_KEY"] = ""
|
os.environ["LANGFUSE_SECRET_KEY"] = ""
|
||||||
os.environ["LLMONITOR_APP_ID"] = "your-llmonitor-app-id"
|
|
||||||
|
|
||||||
os.environ["OPENAI_API_KEY"]
|
os.environ["OPENAI_API_KEY"]
|
||||||
|
|
||||||
# set callbacks
|
# set callbacks
|
||||||
litellm.success_callback = ["langfuse", "llmonitor"] # log input/output to langfuse, llmonitor, supabase
|
litellm.success_callback = ["lunary", "langfuse"] # log input/output to langfuse, lunary, supabase
|
||||||
|
|
||||||
#openai call
|
#openai call
|
||||||
response = completion(model="gpt-3.5-turbo", messages=[{"role": "user", "content": "Hi 👋 - i'm openai"}])
|
response = completion(model="gpt-3.5-turbo", messages=[{"role": "user", "content": "Hi 👋 - i'm openai"}])
|
||||||
```
|
```
|
||||||
|
|
||||||
More details 👉
|
More details 👉
|
||||||
* [exception mapping](./exception_mapping.md)
|
|
||||||
* [retries + model fallbacks for completion()](./completion/reliable_completions.md)
|
- [exception mapping](./exception_mapping.md)
|
||||||
* [tutorial for model fallbacks with completion()](./tutorials/fallbacks.md)
|
- [retries + model fallbacks for completion()](./completion/reliable_completions.md)
|
||||||
|
- [tutorial for model fallbacks with completion()](./tutorials/fallbacks.md)
|
||||||
|
|
|
@ -5,7 +5,6 @@ import TabItem from '@theme/TabItem';
|
||||||
|
|
||||||
https://github.com/BerriAI/litellm
|
https://github.com/BerriAI/litellm
|
||||||
|
|
||||||
|
|
||||||
## **Call 100+ LLMs using the same Input/Output Format**
|
## **Call 100+ LLMs using the same Input/Output Format**
|
||||||
|
|
||||||
- Translate inputs to provider's `completion`, `embedding`, and `image_generation` endpoints
|
- Translate inputs to provider's `completion`, `embedding`, and `image_generation` endpoints
|
||||||
|
@ -21,6 +20,7 @@ You can use litellm through either:
|
||||||
## LiteLLM Python SDK
|
## LiteLLM Python SDK
|
||||||
|
|
||||||
### Basic usage
|
### Basic usage
|
||||||
|
|
||||||
<a target="_blank" href="https://colab.research.google.com/github/BerriAI/litellm/blob/main/cookbook/liteLLM_Getting_Started.ipynb">
|
<a target="_blank" href="https://colab.research.google.com/github/BerriAI/litellm/blob/main/cookbook/liteLLM_Getting_Started.ipynb">
|
||||||
<img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/>
|
<img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/>
|
||||||
</a>
|
</a>
|
||||||
|
@ -28,6 +28,7 @@ You can use litellm through either:
|
||||||
```shell
|
```shell
|
||||||
pip install litellm
|
pip install litellm
|
||||||
```
|
```
|
||||||
|
|
||||||
<Tabs>
|
<Tabs>
|
||||||
<TabItem value="openai" label="OpenAI">
|
<TabItem value="openai" label="OpenAI">
|
||||||
|
|
||||||
|
@ -120,7 +121,6 @@ response = completion(
|
||||||
|
|
||||||
</TabItem>
|
</TabItem>
|
||||||
|
|
||||||
|
|
||||||
<TabItem value="ollama" label="Ollama">
|
<TabItem value="ollama" label="Ollama">
|
||||||
|
|
||||||
```python
|
```python
|
||||||
|
@ -132,6 +132,7 @@ response = completion(
|
||||||
api_base="http://localhost:11434"
|
api_base="http://localhost:11434"
|
||||||
)
|
)
|
||||||
```
|
```
|
||||||
|
|
||||||
</TabItem>
|
</TabItem>
|
||||||
<TabItem value="or" label="Openrouter">
|
<TabItem value="or" label="Openrouter">
|
||||||
|
|
||||||
|
@ -147,12 +148,14 @@ response = completion(
|
||||||
messages = [{ "content": "Hello, how are you?","role": "user"}],
|
messages = [{ "content": "Hello, how are you?","role": "user"}],
|
||||||
)
|
)
|
||||||
```
|
```
|
||||||
|
|
||||||
</TabItem>
|
</TabItem>
|
||||||
|
|
||||||
</Tabs>
|
</Tabs>
|
||||||
|
|
||||||
### Streaming
|
### Streaming
|
||||||
Set `stream=True` in the `completion` args.
|
Set `stream=True` in the `completion` args.
|
||||||
|
|
||||||
<Tabs>
|
<Tabs>
|
||||||
<TabItem value="openai" label="OpenAI">
|
<TabItem value="openai" label="OpenAI">
|
||||||
|
|
||||||
|
@ -250,7 +253,6 @@ response = completion(
|
||||||
|
|
||||||
</TabItem>
|
</TabItem>
|
||||||
|
|
||||||
|
|
||||||
<TabItem value="ollama" label="Ollama">
|
<TabItem value="ollama" label="Ollama">
|
||||||
|
|
||||||
```python
|
```python
|
||||||
|
@ -263,6 +265,7 @@ response = completion(
|
||||||
stream=True,
|
stream=True,
|
||||||
)
|
)
|
||||||
```
|
```
|
||||||
|
|
||||||
</TabItem>
|
</TabItem>
|
||||||
<TabItem value="or" label="Openrouter">
|
<TabItem value="or" label="Openrouter">
|
||||||
|
|
||||||
|
@ -279,6 +282,7 @@ response = completion(
|
||||||
stream=True,
|
stream=True,
|
||||||
)
|
)
|
||||||
```
|
```
|
||||||
|
|
||||||
</TabItem>
|
</TabItem>
|
||||||
|
|
||||||
</Tabs>
|
</Tabs>
|
||||||
|
@ -300,19 +304,20 @@ except OpenAIError as e:
|
||||||
```
|
```
|
||||||
|
|
||||||
### Logging Observability - Log LLM Input/Output ([Docs](https://docs.litellm.ai/docs/observability/callbacks))
|
### Logging Observability - Log LLM Input/Output ([Docs](https://docs.litellm.ai/docs/observability/callbacks))
|
||||||
LiteLLM exposes pre defined callbacks to send data to Langfuse, LLMonitor, Helicone, Promptlayer, Traceloop, Slack
|
LiteLLM exposes pre defined callbacks to send data to Lunary, Langfuse, Helicone, Promptlayer, Traceloop, Slack
|
||||||
|
|
||||||
```python
|
```python
|
||||||
from litellm import completion
|
from litellm import completion
|
||||||
|
|
||||||
## set env variables for logging tools
|
## set env variables for logging tools
|
||||||
os.environ["LANGFUSE_PUBLIC_KEY"] = ""
|
os.environ["LANGFUSE_PUBLIC_KEY"] = ""
|
||||||
os.environ["LANGFUSE_SECRET_KEY"] = ""
|
os.environ["LANGFUSE_SECRET_KEY"] = ""
|
||||||
os.environ["LLMONITOR_APP_ID"] = "your-llmonitor-app-id"
|
os.environ["LUNARY_PUBLIC_KEY"] = "your-lunary-public-key"
|
||||||
|
|
||||||
os.environ["OPENAI_API_KEY"]
|
os.environ["OPENAI_API_KEY"]
|
||||||
|
|
||||||
# set callbacks
|
# set callbacks
|
||||||
litellm.success_callback = ["langfuse", "llmonitor"] # log input/output to langfuse, llmonitor, supabase
|
litellm.success_callback = ["lunary", "langfuse"] # log input/output to lunary, langfuse, supabase
|
||||||
|
|
||||||
#openai call
|
#openai call
|
||||||
response = completion(model="gpt-3.5-turbo", messages=[{"role": "user", "content": "Hi 👋 - i'm openai"}])
|
response = completion(model="gpt-3.5-turbo", messages=[{"role": "user", "content": "Hi 👋 - i'm openai"}])
|
||||||
|
@ -358,6 +363,7 @@ Track spend across multiple projects/people
|
||||||

|

|
||||||
|
|
||||||
The proxy provides:
|
The proxy provides:
|
||||||
|
|
||||||
1. [Hooks for auth](https://docs.litellm.ai/docs/proxy/virtual_keys#custom-auth)
|
1. [Hooks for auth](https://docs.litellm.ai/docs/proxy/virtual_keys#custom-auth)
|
||||||
2. [Hooks for logging](https://docs.litellm.ai/docs/proxy/logging#step-1---create-your-custom-litellm-callback-class)
|
2. [Hooks for logging](https://docs.litellm.ai/docs/proxy/logging#step-1---create-your-custom-litellm-callback-class)
|
||||||
3. [Cost tracking](https://docs.litellm.ai/docs/proxy/virtual_keys#tracking-spend)
|
3. [Cost tracking](https://docs.litellm.ai/docs/proxy/virtual_keys#tracking-spend)
|
||||||
|
@ -372,6 +378,7 @@ pip install 'litellm[proxy]'
|
||||||
```
|
```
|
||||||
|
|
||||||
#### Step 1: Start litellm proxy
|
#### Step 1: Start litellm proxy
|
||||||
|
|
||||||
```shell
|
```shell
|
||||||
$ litellm --model huggingface/bigcode/starcoder
|
$ litellm --model huggingface/bigcode/starcoder
|
||||||
|
|
||||||
|
@ -379,6 +386,7 @@ $ litellm --model huggingface/bigcode/starcoder
|
||||||
```
|
```
|
||||||
|
|
||||||
#### Step 2: Make ChatCompletions Request to Proxy
|
#### Step 2: Make ChatCompletions Request to Proxy
|
||||||
|
|
||||||
```python
|
```python
|
||||||
import openai # openai v1.0.0+
|
import openai # openai v1.0.0+
|
||||||
client = openai.OpenAI(api_key="anything",base_url="http://0.0.0.0:4000") # set proxy to base_url
|
client = openai.OpenAI(api_key="anything",base_url="http://0.0.0.0:4000") # set proxy to base_url
|
||||||
|
@ -394,6 +402,7 @@ print(response)
|
||||||
```
|
```
|
||||||
|
|
||||||
## More details
|
## More details
|
||||||
* [exception mapping](./exception_mapping.md)
|
|
||||||
* [retries + model fallbacks for completion()](./completion/reliable_completions.md)
|
- [exception mapping](./exception_mapping.md)
|
||||||
* [proxy virtual keys & spend management](./tutorials/fallbacks.md)
|
- [retries + model fallbacks for completion()](./completion/reliable_completions.md)
|
||||||
|
- [proxy virtual keys & spend management](./tutorials/fallbacks.md)
|
||||||
|
|
|
@ -7,7 +7,7 @@ liteLLM provides `input_callbacks`, `success_callbacks` and `failure_callbacks`,
|
||||||
liteLLM supports:
|
liteLLM supports:
|
||||||
|
|
||||||
- [Custom Callback Functions](https://docs.litellm.ai/docs/observability/custom_callback)
|
- [Custom Callback Functions](https://docs.litellm.ai/docs/observability/custom_callback)
|
||||||
- [LLMonitor](https://llmonitor.com/docs)
|
- [Lunary](https://lunary.ai/docs)
|
||||||
- [Helicone](https://docs.helicone.ai/introduction)
|
- [Helicone](https://docs.helicone.ai/introduction)
|
||||||
- [Traceloop](https://traceloop.com/docs)
|
- [Traceloop](https://traceloop.com/docs)
|
||||||
- [Athina](https://docs.athina.ai/)
|
- [Athina](https://docs.athina.ai/)
|
||||||
|
@ -22,15 +22,15 @@ from litellm import completion
|
||||||
|
|
||||||
# set callbacks
|
# set callbacks
|
||||||
litellm.input_callback=["sentry"] # for sentry breadcrumbing - logs the input being sent to the api
|
litellm.input_callback=["sentry"] # for sentry breadcrumbing - logs the input being sent to the api
|
||||||
litellm.success_callback=["posthog", "helicone", "llmonitor", "athina"]
|
litellm.success_callback=["posthog", "helicone", "lunary", "athina"]
|
||||||
litellm.failure_callback=["sentry", "llmonitor"]
|
litellm.failure_callback=["sentry", "lunary"]
|
||||||
|
|
||||||
## set env variables
|
## set env variables
|
||||||
os.environ['SENTRY_DSN'], os.environ['SENTRY_API_TRACE_RATE']= ""
|
os.environ['SENTRY_DSN'], os.environ['SENTRY_API_TRACE_RATE']= ""
|
||||||
os.environ['POSTHOG_API_KEY'], os.environ['POSTHOG_API_URL'] = "api-key", "api-url"
|
os.environ['POSTHOG_API_KEY'], os.environ['POSTHOG_API_URL'] = "api-key", "api-url"
|
||||||
os.environ["HELICONE_API_KEY"] = ""
|
os.environ["HELICONE_API_KEY"] = ""
|
||||||
os.environ["TRACELOOP_API_KEY"] = ""
|
os.environ["TRACELOOP_API_KEY"] = ""
|
||||||
os.environ["LLMONITOR_APP_ID"] = ""
|
os.environ["LUNARY_PUBLIC_KEY"] = ""
|
||||||
os.environ["ATHINA_API_KEY"] = ""
|
os.environ["ATHINA_API_KEY"] = ""
|
||||||
|
|
||||||
response = completion(model="gpt-3.5-turbo", messages=messages)
|
response = completion(model="gpt-3.5-turbo", messages=messages)
|
||||||
|
|
|
@ -1,65 +0,0 @@
|
||||||
# LLMonitor Tutorial
|
|
||||||
|
|
||||||
[LLMonitor](https://llmonitor.com/) is an open-source observability platform that provides cost tracking, user tracking and powerful agent tracing.
|
|
||||||
|
|
||||||
<video controls width='900' >
|
|
||||||
<source src='https://llmonitor.com/videos/demo-annotated.mp4'/>
|
|
||||||
</video>
|
|
||||||
|
|
||||||
## Use LLMonitor to log requests across all LLM Providers (OpenAI, Azure, Anthropic, Cohere, Replicate, PaLM)
|
|
||||||
|
|
||||||
liteLLM provides `callbacks`, making it easy for you to log data depending on the status of your responses.
|
|
||||||
|
|
||||||
:::info
|
|
||||||
We want to learn how we can make the callbacks better! Meet the [founders](https://calendly.com/d/4mp-gd3-k5k/berriai-1-1-onboarding-litellm-hosted-version) or
|
|
||||||
join our [discord](https://discord.gg/wuPM9dRgDw)
|
|
||||||
:::
|
|
||||||
|
|
||||||
### Using Callbacks
|
|
||||||
|
|
||||||
First, sign up to get an app ID on the [LLMonitor dashboard](https://llmonitor.com).
|
|
||||||
|
|
||||||
Use just 2 lines of code, to instantly log your responses **across all providers** with llmonitor:
|
|
||||||
|
|
||||||
```python
|
|
||||||
litellm.success_callback = ["llmonitor"]
|
|
||||||
litellm.failure_callback = ["llmonitor"]
|
|
||||||
```
|
|
||||||
|
|
||||||
Complete code
|
|
||||||
|
|
||||||
```python
|
|
||||||
from litellm import completion
|
|
||||||
|
|
||||||
## set env variables
|
|
||||||
os.environ["LLMONITOR_APP_ID"] = "your-llmonitor-app-id"
|
|
||||||
# Optional: os.environ["LLMONITOR_API_URL"] = "self-hosting-url"
|
|
||||||
|
|
||||||
os.environ["OPENAI_API_KEY"], os.environ["COHERE_API_KEY"] = "", ""
|
|
||||||
|
|
||||||
# set callbacks
|
|
||||||
litellm.success_callback = ["llmonitor"]
|
|
||||||
litellm.failure_callback = ["llmonitor"]
|
|
||||||
|
|
||||||
#openai call
|
|
||||||
response = completion(
|
|
||||||
model="gpt-3.5-turbo",
|
|
||||||
messages=[{"role": "user", "content": "Hi 👋 - i'm openai"}],
|
|
||||||
user="ishaan_litellm"
|
|
||||||
)
|
|
||||||
|
|
||||||
#cohere call
|
|
||||||
response = completion(
|
|
||||||
model="command-nightly",
|
|
||||||
messages=[{"role": "user", "content": "Hi 👋 - i'm cohere"}],
|
|
||||||
user="ishaan_litellm"
|
|
||||||
)
|
|
||||||
```
|
|
||||||
|
|
||||||
## Support & Talk to Founders
|
|
||||||
|
|
||||||
- [Schedule Demo 👋](https://calendly.com/d/4mp-gd3-k5k/berriai-1-1-onboarding-litellm-hosted-version)
|
|
||||||
- [Community Discord 💭](https://discord.gg/wuPM9dRgDw)
|
|
||||||
- Our numbers 📞 +1 (770) 8783-106 / +1 (412) 618-6238
|
|
||||||
- Our emails ✉️ ishaan@berri.ai / krrish@berri.ai
|
|
||||||
- Meet the LLMonitor team on [Discord](http://discord.com/invite/8PafSG58kK) or via [email](mailto:vince@llmonitor.com).
|
|
82
docs/my-website/docs/observability/lunary_integration.md
Normal file
82
docs/my-website/docs/observability/lunary_integration.md
Normal file
|
@ -0,0 +1,82 @@
|
||||||
|
# Lunary - Logging and tracing LLM input/output
|
||||||
|
|
||||||
|
[Lunary](https://lunary.ai/) is an open-source AI developer platform providing observability, prompt management, and evaluation tools for AI developers.
|
||||||
|
|
||||||
|
<video controls width='900' >
|
||||||
|
<source src='https://lunary.ai/videos/demo-annotated.mp4'/>
|
||||||
|
</video>
|
||||||
|
|
||||||
|
## Use Lunary to log requests across all LLM Providers (OpenAI, Azure, Anthropic, Cohere, Replicate, PaLM)
|
||||||
|
|
||||||
|
liteLLM provides `callbacks`, making it easy for you to log data depending on the status of your responses.
|
||||||
|
|
||||||
|
:::info
|
||||||
|
We want to learn how we can make the callbacks better! Meet the [founders](https://calendly.com/d/4mp-gd3-k5k/berriai-1-1-onboarding-litellm-hosted-version) or
|
||||||
|
join our [discord](https://discord.gg/wuPM9dRgDw)
|
||||||
|
:::
|
||||||
|
|
||||||
|
### Using Callbacks
|
||||||
|
|
||||||
|
First, sign up to get a public key on the [Lunary dashboard](https://lunary.ai).
|
||||||
|
|
||||||
|
Use just 2 lines of code, to instantly log your responses **across all providers** with lunary:
|
||||||
|
|
||||||
|
```python
|
||||||
|
litellm.success_callback = ["lunary"]
|
||||||
|
litellm.failure_callback = ["lunary"]
|
||||||
|
```
|
||||||
|
|
||||||
|
Complete code
|
||||||
|
|
||||||
|
```python
|
||||||
|
from litellm import completion
|
||||||
|
|
||||||
|
## set env variables
|
||||||
|
os.environ["LUNARY_PUBLIC_KEY"] = "your-lunary-public-key"
|
||||||
|
|
||||||
|
os.environ["OPENAI_API_KEY"] = ""
|
||||||
|
|
||||||
|
# set callbacks
|
||||||
|
litellm.success_callback = ["lunary"]
|
||||||
|
litellm.failure_callback = ["lunary"]
|
||||||
|
|
||||||
|
#openai call
|
||||||
|
response = completion(
|
||||||
|
model="gpt-3.5-turbo",
|
||||||
|
messages=[{"role": "user", "content": "Hi 👋 - i'm openai"}],
|
||||||
|
user="ishaan_litellm"
|
||||||
|
)
|
||||||
|
```
|
||||||
|
|
||||||
|
## Templates
|
||||||
|
|
||||||
|
You can use Lunary to manage prompt templates and use them across all your LLM providers.
|
||||||
|
|
||||||
|
Make sure to have `lunary` installed:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
pip install lunary
|
||||||
|
```
|
||||||
|
|
||||||
|
Then, use the following code to pull templates into Lunary:
|
||||||
|
|
||||||
|
```python
|
||||||
|
from litellm import completion
|
||||||
|
from lunary
|
||||||
|
|
||||||
|
template = lunary.render_template("template-slug", {
|
||||||
|
"name": "John", # Inject variables
|
||||||
|
})
|
||||||
|
|
||||||
|
litellm.success_callback = ["lunary"]
|
||||||
|
|
||||||
|
result = completion(**template)
|
||||||
|
```
|
||||||
|
|
||||||
|
## Support & Talk to Founders
|
||||||
|
|
||||||
|
- Meet the Lunary team via [email](mailto:hello@lunary.ai).
|
||||||
|
- [Schedule Demo 👋](https://calendly.com/d/4mp-gd3-k5k/berriai-1-1-onboarding-litellm-hosted-version)
|
||||||
|
- [Community Discord 💭](https://discord.gg/wuPM9dRgDw)
|
||||||
|
- Our numbers 📞 +1 (770) 8783-106 / +1 (412) 618-6238
|
||||||
|
- Our emails ✉️ ishaan@berri.ai / krrish@berri.ai
|
|
@ -175,6 +175,15 @@ print(response)
|
||||||
|
|
||||||
## Usage - Function Calling
|
## Usage - Function Calling
|
||||||
|
|
||||||
|
:::info
|
||||||
|
|
||||||
|
Claude returns it's output as an XML Tree. [Here is how we translate it](https://github.com/BerriAI/litellm/blob/49642a5b00a53b1babc1a753426a8afcac85dbbe/litellm/llms/prompt_templates/factory.py#L734).
|
||||||
|
|
||||||
|
You can see the raw response via `response._hidden_params["original_response"]`.
|
||||||
|
|
||||||
|
Claude hallucinates, e.g. returning the list param `value` as `<value>\n<item>apple</item>\n<item>banana</item>\n</value>` or `<value>\n<list>\n<item>apple</item>\n<item>banana</item>\n</list>\n</value>`.
|
||||||
|
:::
|
||||||
|
|
||||||
```python
|
```python
|
||||||
from litellm import completion
|
from litellm import completion
|
||||||
|
|
||||||
|
|
|
@ -146,6 +146,15 @@ print(response)
|
||||||
|
|
||||||
## Usage - Function Calling
|
## Usage - Function Calling
|
||||||
|
|
||||||
|
:::info
|
||||||
|
|
||||||
|
Claude returns it's output as an XML Tree. [Here is how we translate it](https://github.com/BerriAI/litellm/blob/49642a5b00a53b1babc1a753426a8afcac85dbbe/litellm/llms/prompt_templates/factory.py#L734).
|
||||||
|
|
||||||
|
You can see the raw response via `response._hidden_params["original_response"]`.
|
||||||
|
|
||||||
|
Claude hallucinates, e.g. returning the list param `value` as `<value>\n<item>apple</item>\n<item>banana</item>\n</value>` or `<value>\n<list>\n<item>apple</item>\n<item>banana</item>\n</list>\n</value>`.
|
||||||
|
:::
|
||||||
|
|
||||||
```python
|
```python
|
||||||
from litellm import completion
|
from litellm import completion
|
||||||
|
|
||||||
|
|
|
@ -176,8 +176,7 @@ general_settings:
|
||||||
master_key: sk-1234
|
master_key: sk-1234
|
||||||
|
|
||||||
litellm_settings:
|
litellm_settings:
|
||||||
max_budget: 10 # global budget for proxy
|
max_end_user_budget: 0.0001 # budget for 'user' passed to /chat/completions
|
||||||
max_user_budget: 0.0001 # budget for 'user' passed to /chat/completions
|
|
||||||
```
|
```
|
||||||
|
|
||||||
2. Make a /chat/completions call, pass 'user' - First call Works
|
2. Make a /chat/completions call, pass 'user' - First call Works
|
||||||
|
|
|
@ -22,10 +22,10 @@ const sidebars = {
|
||||||
type: "category",
|
type: "category",
|
||||||
label: "💥 OpenAI Proxy Server",
|
label: "💥 OpenAI Proxy Server",
|
||||||
link: {
|
link: {
|
||||||
type: 'generated-index',
|
type: "generated-index",
|
||||||
title: '💥 OpenAI Proxy Server',
|
title: "💥 OpenAI Proxy Server",
|
||||||
description: `Proxy Server to call 100+ LLMs in a unified interface & track spend, set budgets per virtual key/user`,
|
description: `Proxy Server to call 100+ LLMs in a unified interface & track spend, set budgets per virtual key/user`,
|
||||||
slug: '/simple_proxy',
|
slug: "/simple_proxy",
|
||||||
},
|
},
|
||||||
items: [
|
items: [
|
||||||
"proxy/quick_start",
|
"proxy/quick_start",
|
||||||
|
@ -33,9 +33,9 @@ const sidebars = {
|
||||||
"proxy/prod",
|
"proxy/prod",
|
||||||
"proxy/configs",
|
"proxy/configs",
|
||||||
{
|
{
|
||||||
type: 'link',
|
type: "link",
|
||||||
label: '📖 All Endpoints',
|
label: "📖 All Endpoints",
|
||||||
href: 'https://litellm-api.up.railway.app/',
|
href: "https://litellm-api.up.railway.app/",
|
||||||
},
|
},
|
||||||
"proxy/enterprise",
|
"proxy/enterprise",
|
||||||
"proxy/user_keys",
|
"proxy/user_keys",
|
||||||
|
@ -46,12 +46,9 @@ const sidebars = {
|
||||||
"proxy/cost_tracking",
|
"proxy/cost_tracking",
|
||||||
"proxy/token_auth",
|
"proxy/token_auth",
|
||||||
{
|
{
|
||||||
"type": "category",
|
type: "category",
|
||||||
"label": "🔥 Load Balancing",
|
label: "🔥 Load Balancing",
|
||||||
"items": [
|
items: ["proxy/load_balancing", "proxy/reliability"],
|
||||||
"proxy/load_balancing",
|
|
||||||
"proxy/reliability",
|
|
||||||
]
|
|
||||||
},
|
},
|
||||||
"proxy/model_management",
|
"proxy/model_management",
|
||||||
"proxy/health",
|
"proxy/health",
|
||||||
|
@ -60,13 +57,9 @@ const sidebars = {
|
||||||
"proxy/prompt_injection",
|
"proxy/prompt_injection",
|
||||||
"proxy/caching",
|
"proxy/caching",
|
||||||
{
|
{
|
||||||
"type": "category",
|
type: "category",
|
||||||
"label": "Logging, Alerting",
|
label: "Logging, Alerting",
|
||||||
"items": [
|
items: ["proxy/logging", "proxy/alerting", "proxy/streaming_logging"],
|
||||||
"proxy/logging",
|
|
||||||
"proxy/alerting",
|
|
||||||
"proxy/streaming_logging",
|
|
||||||
]
|
|
||||||
},
|
},
|
||||||
"proxy/grafana_metrics",
|
"proxy/grafana_metrics",
|
||||||
"proxy/call_hooks",
|
"proxy/call_hooks",
|
||||||
|
@ -78,10 +71,10 @@ const sidebars = {
|
||||||
type: "category",
|
type: "category",
|
||||||
label: "Completion()",
|
label: "Completion()",
|
||||||
link: {
|
link: {
|
||||||
type: 'generated-index',
|
type: "generated-index",
|
||||||
title: 'Completion()',
|
title: "Completion()",
|
||||||
description: 'Details on the completion() function',
|
description: "Details on the completion() function",
|
||||||
slug: '/completion',
|
slug: "/completion",
|
||||||
},
|
},
|
||||||
items: [
|
items: [
|
||||||
"completion/input",
|
"completion/input",
|
||||||
|
@ -112,10 +105,11 @@ const sidebars = {
|
||||||
type: "category",
|
type: "category",
|
||||||
label: "Supported Models & Providers",
|
label: "Supported Models & Providers",
|
||||||
link: {
|
link: {
|
||||||
type: 'generated-index',
|
type: "generated-index",
|
||||||
title: 'Providers',
|
title: "Providers",
|
||||||
description: 'Learn how to deploy + call models from different providers on LiteLLM',
|
description:
|
||||||
slug: '/providers',
|
"Learn how to deploy + call models from different providers on LiteLLM",
|
||||||
|
slug: "/providers",
|
||||||
},
|
},
|
||||||
items: [
|
items: [
|
||||||
"providers/openai",
|
"providers/openai",
|
||||||
|
@ -150,7 +144,7 @@ const sidebars = {
|
||||||
"providers/openrouter",
|
"providers/openrouter",
|
||||||
"providers/custom_openai_proxy",
|
"providers/custom_openai_proxy",
|
||||||
"providers/petals",
|
"providers/petals",
|
||||||
]
|
],
|
||||||
},
|
},
|
||||||
"proxy/custom_pricing",
|
"proxy/custom_pricing",
|
||||||
"routing",
|
"routing",
|
||||||
|
@ -165,9 +159,10 @@ const sidebars = {
|
||||||
type: "category",
|
type: "category",
|
||||||
label: "Logging & Observability",
|
label: "Logging & Observability",
|
||||||
items: [
|
items: [
|
||||||
'debugging/local_debugging',
|
"debugging/local_debugging",
|
||||||
"observability/callbacks",
|
"observability/callbacks",
|
||||||
"observability/custom_callback",
|
"observability/custom_callback",
|
||||||
|
"observability/lunary_integration",
|
||||||
"observability/langfuse_integration",
|
"observability/langfuse_integration",
|
||||||
"observability/sentry",
|
"observability/sentry",
|
||||||
"observability/promptlayer_integration",
|
"observability/promptlayer_integration",
|
||||||
|
@ -176,7 +171,6 @@ const sidebars = {
|
||||||
"observability/slack_integration",
|
"observability/slack_integration",
|
||||||
"observability/traceloop_integration",
|
"observability/traceloop_integration",
|
||||||
"observability/athina_integration",
|
"observability/athina_integration",
|
||||||
"observability/llmonitor_integration",
|
|
||||||
"observability/helicone_integration",
|
"observability/helicone_integration",
|
||||||
"observability/supabase_integration",
|
"observability/supabase_integration",
|
||||||
`observability/telemetry`,
|
`observability/telemetry`,
|
||||||
|
@ -184,19 +178,19 @@ const sidebars = {
|
||||||
},
|
},
|
||||||
"caching/redis_cache",
|
"caching/redis_cache",
|
||||||
{
|
{
|
||||||
type: 'category',
|
type: "category",
|
||||||
label: 'Tutorials',
|
label: "Tutorials",
|
||||||
items: [
|
items: [
|
||||||
'tutorials/azure_openai',
|
'tutorials/azure_openai',
|
||||||
'tutorials/instructor',
|
'tutorials/instructor',
|
||||||
'tutorials/oobabooga',
|
'tutorials/oobabooga',
|
||||||
"tutorials/gradio_integration",
|
"tutorials/gradio_integration",
|
||||||
'tutorials/huggingface_codellama',
|
"tutorials/huggingface_codellama",
|
||||||
'tutorials/huggingface_tutorial',
|
"tutorials/huggingface_tutorial",
|
||||||
'tutorials/TogetherAI_liteLLM',
|
"tutorials/TogetherAI_liteLLM",
|
||||||
'tutorials/finetuned_chat_gpt',
|
"tutorials/finetuned_chat_gpt",
|
||||||
'tutorials/sagemaker_llms',
|
"tutorials/sagemaker_llms",
|
||||||
'tutorials/text_completion',
|
"tutorials/text_completion",
|
||||||
"tutorials/first_playground",
|
"tutorials/first_playground",
|
||||||
"tutorials/model_fallbacks",
|
"tutorials/model_fallbacks",
|
||||||
],
|
],
|
||||||
|
@ -204,24 +198,23 @@ const sidebars = {
|
||||||
{
|
{
|
||||||
type: "category",
|
type: "category",
|
||||||
label: "LangChain, LlamaIndex Integration",
|
label: "LangChain, LlamaIndex Integration",
|
||||||
items: [
|
items: ["langchain/langchain"],
|
||||||
"langchain/langchain"
|
|
||||||
],
|
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
type: 'category',
|
type: "category",
|
||||||
label: 'Extras',
|
label: "Extras",
|
||||||
items: [
|
items: [
|
||||||
'extras/contributing',
|
"extras/contributing",
|
||||||
"proxy_server",
|
"proxy_server",
|
||||||
{
|
{
|
||||||
type: "category",
|
type: "category",
|
||||||
label: "❤️ 🚅 Projects built on LiteLLM",
|
label: "❤️ 🚅 Projects built on LiteLLM",
|
||||||
link: {
|
link: {
|
||||||
type: 'generated-index',
|
type: "generated-index",
|
||||||
title: 'Projects built on LiteLLM',
|
title: "Projects built on LiteLLM",
|
||||||
description: 'Learn how to deploy + call models from different providers on LiteLLM',
|
description:
|
||||||
slug: '/project',
|
"Learn how to deploy + call models from different providers on LiteLLM",
|
||||||
|
slug: "/project",
|
||||||
},
|
},
|
||||||
items: [
|
items: [
|
||||||
"projects/Docq.AI",
|
"projects/Docq.AI",
|
||||||
|
@ -237,7 +230,7 @@ const sidebars = {
|
||||||
"projects/GPT Migrate",
|
"projects/GPT Migrate",
|
||||||
"projects/YiVal",
|
"projects/YiVal",
|
||||||
"projects/LiteLLM Proxy",
|
"projects/LiteLLM Proxy",
|
||||||
]
|
],
|
||||||
},
|
},
|
||||||
],
|
],
|
||||||
},
|
},
|
||||||
|
|
|
@ -5,7 +5,6 @@ import TabItem from '@theme/TabItem';
|
||||||
|
|
||||||
https://github.com/BerriAI/litellm
|
https://github.com/BerriAI/litellm
|
||||||
|
|
||||||
|
|
||||||
## **Call 100+ LLMs using the same Input/Output Format**
|
## **Call 100+ LLMs using the same Input/Output Format**
|
||||||
|
|
||||||
- Translate inputs to provider's `completion`, `embedding`, and `image_generation` endpoints
|
- Translate inputs to provider's `completion`, `embedding`, and `image_generation` endpoints
|
||||||
|
@ -14,6 +13,7 @@ https://github.com/BerriAI/litellm
|
||||||
- Track spend & set budgets per project [OpenAI Proxy Server](https://docs.litellm.ai/docs/simple_proxy)
|
- Track spend & set budgets per project [OpenAI Proxy Server](https://docs.litellm.ai/docs/simple_proxy)
|
||||||
|
|
||||||
## Basic usage
|
## Basic usage
|
||||||
|
|
||||||
<a target="_blank" href="https://colab.research.google.com/github/BerriAI/litellm/blob/main/cookbook/liteLLM_Getting_Started.ipynb">
|
<a target="_blank" href="https://colab.research.google.com/github/BerriAI/litellm/blob/main/cookbook/liteLLM_Getting_Started.ipynb">
|
||||||
<img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/>
|
<img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/>
|
||||||
</a>
|
</a>
|
||||||
|
@ -21,6 +21,7 @@ https://github.com/BerriAI/litellm
|
||||||
```shell
|
```shell
|
||||||
pip install litellm
|
pip install litellm
|
||||||
```
|
```
|
||||||
|
|
||||||
<Tabs>
|
<Tabs>
|
||||||
<TabItem value="openai" label="OpenAI">
|
<TabItem value="openai" label="OpenAI">
|
||||||
|
|
||||||
|
@ -113,7 +114,6 @@ response = completion(
|
||||||
|
|
||||||
</TabItem>
|
</TabItem>
|
||||||
|
|
||||||
|
|
||||||
<TabItem value="ollama" label="Ollama">
|
<TabItem value="ollama" label="Ollama">
|
||||||
|
|
||||||
```python
|
```python
|
||||||
|
@ -125,6 +125,7 @@ response = completion(
|
||||||
api_base="http://localhost:11434"
|
api_base="http://localhost:11434"
|
||||||
)
|
)
|
||||||
```
|
```
|
||||||
|
|
||||||
</TabItem>
|
</TabItem>
|
||||||
<TabItem value="or" label="Openrouter">
|
<TabItem value="or" label="Openrouter">
|
||||||
|
|
||||||
|
@ -140,11 +141,13 @@ response = completion(
|
||||||
messages = [{ "content": "Hello, how are you?","role": "user"}],
|
messages = [{ "content": "Hello, how are you?","role": "user"}],
|
||||||
)
|
)
|
||||||
```
|
```
|
||||||
|
|
||||||
</TabItem>
|
</TabItem>
|
||||||
|
|
||||||
</Tabs>
|
</Tabs>
|
||||||
|
|
||||||
## Streaming
|
## Streaming
|
||||||
|
|
||||||
Set `stream=True` in the `completion` args.
|
Set `stream=True` in the `completion` args.
|
||||||
<Tabs>
|
<Tabs>
|
||||||
<TabItem value="openai" label="OpenAI">
|
<TabItem value="openai" label="OpenAI">
|
||||||
|
@ -243,7 +246,6 @@ response = completion(
|
||||||
|
|
||||||
</TabItem>
|
</TabItem>
|
||||||
|
|
||||||
|
|
||||||
<TabItem value="ollama" label="Ollama">
|
<TabItem value="ollama" label="Ollama">
|
||||||
|
|
||||||
```python
|
```python
|
||||||
|
@ -256,6 +258,7 @@ response = completion(
|
||||||
stream=True,
|
stream=True,
|
||||||
)
|
)
|
||||||
```
|
```
|
||||||
|
|
||||||
</TabItem>
|
</TabItem>
|
||||||
<TabItem value="or" label="Openrouter">
|
<TabItem value="or" label="Openrouter">
|
||||||
|
|
||||||
|
@ -272,6 +275,7 @@ response = completion(
|
||||||
stream=True,
|
stream=True,
|
||||||
)
|
)
|
||||||
```
|
```
|
||||||
|
|
||||||
</TabItem>
|
</TabItem>
|
||||||
|
|
||||||
</Tabs>
|
</Tabs>
|
||||||
|
@ -293,25 +297,28 @@ except OpenAIError as e:
|
||||||
```
|
```
|
||||||
|
|
||||||
## Logging Observability - Log LLM Input/Output ([Docs](https://docs.litellm.ai/docs/observability/callbacks))
|
## Logging Observability - Log LLM Input/Output ([Docs](https://docs.litellm.ai/docs/observability/callbacks))
|
||||||
LiteLLM exposes pre defined callbacks to send data to Langfuse, LLMonitor, Helicone, Promptlayer, Traceloop, Slack
|
|
||||||
|
LiteLLM exposes pre defined callbacks to send data to Lunary, Langfuse, Helicone, Promptlayer, Traceloop, Slack
|
||||||
|
|
||||||
```python
|
```python
|
||||||
from litellm import completion
|
from litellm import completion
|
||||||
|
|
||||||
## set env variables for logging tools
|
## set env variables for logging tools
|
||||||
os.environ["LANGFUSE_PUBLIC_KEY"] = ""
|
os.environ["LANGFUSE_PUBLIC_KEY"] = ""
|
||||||
os.environ["LANGFUSE_SECRET_KEY"] = ""
|
os.environ["LANGFUSE_SECRET_KEY"] = ""
|
||||||
os.environ["LLMONITOR_APP_ID"] = "your-llmonitor-app-id"
|
os.environ["LUNARY_PUBLIC_KEY"] = "your-lunary-public-key"
|
||||||
|
|
||||||
os.environ["OPENAI_API_KEY"]
|
os.environ["OPENAI_API_KEY"]
|
||||||
|
|
||||||
# set callbacks
|
# set callbacks
|
||||||
litellm.success_callback = ["langfuse", "llmonitor"] # log input/output to langfuse, llmonitor, supabase
|
litellm.success_callback = ["langfuse", "lunary"] # log input/output to lunary, langfuse, supabase
|
||||||
|
|
||||||
#openai call
|
#openai call
|
||||||
response = completion(model="gpt-3.5-turbo", messages=[{"role": "user", "content": "Hi 👋 - i'm openai"}])
|
response = completion(model="gpt-3.5-turbo", messages=[{"role": "user", "content": "Hi 👋 - i'm openai"}])
|
||||||
```
|
```
|
||||||
|
|
||||||
## Track Costs, Usage, Latency for streaming
|
## Track Costs, Usage, Latency for streaming
|
||||||
|
|
||||||
Use a callback function for this - more info on custom callbacks: https://docs.litellm.ai/docs/observability/custom_callback
|
Use a callback function for this - more info on custom callbacks: https://docs.litellm.ai/docs/observability/custom_callback
|
||||||
|
|
||||||
```python
|
```python
|
||||||
|
@ -351,6 +358,7 @@ Track spend across multiple projects/people
|
||||||

|

|
||||||
|
|
||||||
The proxy provides:
|
The proxy provides:
|
||||||
|
|
||||||
1. [Hooks for auth](https://docs.litellm.ai/docs/proxy/virtual_keys#custom-auth)
|
1. [Hooks for auth](https://docs.litellm.ai/docs/proxy/virtual_keys#custom-auth)
|
||||||
2. [Hooks for logging](https://docs.litellm.ai/docs/proxy/logging#step-1---create-your-custom-litellm-callback-class)
|
2. [Hooks for logging](https://docs.litellm.ai/docs/proxy/logging#step-1---create-your-custom-litellm-callback-class)
|
||||||
3. [Cost tracking](https://docs.litellm.ai/docs/proxy/virtual_keys#tracking-spend)
|
3. [Cost tracking](https://docs.litellm.ai/docs/proxy/virtual_keys#tracking-spend)
|
||||||
|
@ -365,6 +373,7 @@ pip install 'litellm[proxy]'
|
||||||
```
|
```
|
||||||
|
|
||||||
#### Step 1: Start litellm proxy
|
#### Step 1: Start litellm proxy
|
||||||
|
|
||||||
```shell
|
```shell
|
||||||
$ litellm --model huggingface/bigcode/starcoder
|
$ litellm --model huggingface/bigcode/starcoder
|
||||||
|
|
||||||
|
@ -372,6 +381,7 @@ $ litellm --model huggingface/bigcode/starcoder
|
||||||
```
|
```
|
||||||
|
|
||||||
#### Step 2: Make ChatCompletions Request to Proxy
|
#### Step 2: Make ChatCompletions Request to Proxy
|
||||||
|
|
||||||
```python
|
```python
|
||||||
import openai # openai v1.0.0+
|
import openai # openai v1.0.0+
|
||||||
client = openai.OpenAI(api_key="anything",base_url="http://0.0.0.0:8000") # set proxy to base_url
|
client = openai.OpenAI(api_key="anything",base_url="http://0.0.0.0:8000") # set proxy to base_url
|
||||||
|
@ -387,6 +397,7 @@ print(response)
|
||||||
```
|
```
|
||||||
|
|
||||||
## More details
|
## More details
|
||||||
* [exception mapping](./exception_mapping.md)
|
|
||||||
* [retries + model fallbacks for completion()](./completion/reliable_completions.md)
|
- [exception mapping](./exception_mapping.md)
|
||||||
* [proxy virtual keys & spend management](./tutorials/fallbacks.md)
|
- [retries + model fallbacks for completion()](./completion/reliable_completions.md)
|
||||||
|
- [proxy virtual keys & spend management](./tutorials/fallbacks.md)
|
||||||
|
|
|
@ -6,7 +6,7 @@ liteLLM provides `success_callbacks` and `failure_callbacks`, making it easy for
|
||||||
|
|
||||||
liteLLM supports:
|
liteLLM supports:
|
||||||
|
|
||||||
- [LLMonitor](https://llmonitor.com/docs)
|
- [Lunary](https://lunary.ai/docs)
|
||||||
- [Helicone](https://docs.helicone.ai/introduction)
|
- [Helicone](https://docs.helicone.ai/introduction)
|
||||||
- [Sentry](https://docs.sentry.io/platforms/python/)
|
- [Sentry](https://docs.sentry.io/platforms/python/)
|
||||||
- [PostHog](https://posthog.com/docs/libraries/python)
|
- [PostHog](https://posthog.com/docs/libraries/python)
|
||||||
|
@ -18,8 +18,8 @@ liteLLM supports:
|
||||||
from litellm import completion
|
from litellm import completion
|
||||||
|
|
||||||
# set callbacks
|
# set callbacks
|
||||||
litellm.success_callback=["posthog", "helicone", "llmonitor"]
|
litellm.success_callback=["posthog", "helicone", "lunary"]
|
||||||
litellm.failure_callback=["sentry", "llmonitor"]
|
litellm.failure_callback=["sentry", "lunary"]
|
||||||
|
|
||||||
## set env variables
|
## set env variables
|
||||||
os.environ['SENTRY_DSN'], os.environ['SENTRY_API_TRACE_RATE']= ""
|
os.environ['SENTRY_DSN'], os.environ['SENTRY_API_TRACE_RATE']= ""
|
||||||
|
|
|
@ -174,6 +174,7 @@ upperbound_key_generate_params: Optional[Dict] = None
|
||||||
default_user_params: Optional[Dict] = None
|
default_user_params: Optional[Dict] = None
|
||||||
default_team_settings: Optional[List] = None
|
default_team_settings: Optional[List] = None
|
||||||
max_user_budget: Optional[float] = None
|
max_user_budget: Optional[float] = None
|
||||||
|
max_end_user_budget: Optional[float] = None
|
||||||
#### RELIABILITY ####
|
#### RELIABILITY ####
|
||||||
request_timeout: Optional[float] = 6000
|
request_timeout: Optional[float] = 6000
|
||||||
num_retries: Optional[int] = None # per model endpoint
|
num_retries: Optional[int] = None # per model endpoint
|
||||||
|
|
|
@ -1,127 +0,0 @@
|
||||||
#### What this does ####
|
|
||||||
# On success + failure, log events to aispend.io
|
|
||||||
import datetime
|
|
||||||
import traceback
|
|
||||||
import dotenv
|
|
||||||
import os
|
|
||||||
import requests
|
|
||||||
|
|
||||||
dotenv.load_dotenv() # Loading env variables using dotenv
|
|
||||||
|
|
||||||
|
|
||||||
# convert to {completion: xx, tokens: xx}
|
|
||||||
def parse_usage(usage):
|
|
||||||
return {
|
|
||||||
"completion": usage["completion_tokens"] if "completion_tokens" in usage else 0,
|
|
||||||
"prompt": usage["prompt_tokens"] if "prompt_tokens" in usage else 0,
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
def parse_messages(input):
|
|
||||||
if input is None:
|
|
||||||
return None
|
|
||||||
|
|
||||||
def clean_message(message):
|
|
||||||
# if is strin, return as is
|
|
||||||
if isinstance(message, str):
|
|
||||||
return message
|
|
||||||
|
|
||||||
if "message" in message:
|
|
||||||
return clean_message(message["message"])
|
|
||||||
text = message["content"]
|
|
||||||
if text == None:
|
|
||||||
text = message.get("function_call", None)
|
|
||||||
|
|
||||||
return {
|
|
||||||
"role": message["role"],
|
|
||||||
"text": text,
|
|
||||||
}
|
|
||||||
|
|
||||||
if isinstance(input, list):
|
|
||||||
if len(input) == 1:
|
|
||||||
return clean_message(input[0])
|
|
||||||
else:
|
|
||||||
return [clean_message(msg) for msg in input]
|
|
||||||
else:
|
|
||||||
return clean_message(input)
|
|
||||||
|
|
||||||
|
|
||||||
class LLMonitorLogger:
|
|
||||||
# Class variables or attributes
|
|
||||||
def __init__(self):
|
|
||||||
# Instance variables
|
|
||||||
self.api_url = os.getenv("LLMONITOR_API_URL") or "https://app.llmonitor.com"
|
|
||||||
self.app_id = os.getenv("LLMONITOR_APP_ID")
|
|
||||||
|
|
||||||
def log_event(
|
|
||||||
self,
|
|
||||||
type,
|
|
||||||
event,
|
|
||||||
run_id,
|
|
||||||
model,
|
|
||||||
print_verbose,
|
|
||||||
input=None,
|
|
||||||
user_id=None,
|
|
||||||
response_obj=None,
|
|
||||||
start_time=datetime.datetime.now(),
|
|
||||||
end_time=datetime.datetime.now(),
|
|
||||||
error=None,
|
|
||||||
):
|
|
||||||
# Method definition
|
|
||||||
try:
|
|
||||||
print_verbose(f"LLMonitor Logging - Logging request for model {model}")
|
|
||||||
|
|
||||||
if response_obj:
|
|
||||||
usage = (
|
|
||||||
parse_usage(response_obj["usage"])
|
|
||||||
if "usage" in response_obj
|
|
||||||
else None
|
|
||||||
)
|
|
||||||
output = response_obj["choices"] if "choices" in response_obj else None
|
|
||||||
else:
|
|
||||||
usage = None
|
|
||||||
output = None
|
|
||||||
|
|
||||||
if error:
|
|
||||||
error_obj = {"stack": error}
|
|
||||||
|
|
||||||
else:
|
|
||||||
error_obj = None
|
|
||||||
|
|
||||||
data = [
|
|
||||||
{
|
|
||||||
"type": type,
|
|
||||||
"name": model,
|
|
||||||
"runId": run_id,
|
|
||||||
"app": self.app_id,
|
|
||||||
"event": "start",
|
|
||||||
"timestamp": start_time.isoformat(),
|
|
||||||
"userId": user_id,
|
|
||||||
"input": parse_messages(input),
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"type": type,
|
|
||||||
"runId": run_id,
|
|
||||||
"app": self.app_id,
|
|
||||||
"event": event,
|
|
||||||
"error": error_obj,
|
|
||||||
"timestamp": end_time.isoformat(),
|
|
||||||
"userId": user_id,
|
|
||||||
"output": parse_messages(output),
|
|
||||||
"tokensUsage": usage,
|
|
||||||
},
|
|
||||||
]
|
|
||||||
|
|
||||||
print_verbose(f"LLMonitor Logging - final data object: {data}")
|
|
||||||
|
|
||||||
response = requests.post(
|
|
||||||
self.api_url + "/api/report",
|
|
||||||
headers={"Content-Type": "application/json"},
|
|
||||||
json={"events": data},
|
|
||||||
)
|
|
||||||
|
|
||||||
print_verbose(f"LLMonitor Logging - response: {response}")
|
|
||||||
except:
|
|
||||||
# traceback.print_exc()
|
|
||||||
print_verbose(f"LLMonitor Logging Error - {traceback.format_exc()}")
|
|
||||||
pass
|
|
157
litellm/integrations/lunary.py
Normal file
157
litellm/integrations/lunary.py
Normal file
|
@ -0,0 +1,157 @@
|
||||||
|
#### What this does ####
|
||||||
|
# On success + failure, log events to lunary.ai
|
||||||
|
from datetime import datetime, timezone
|
||||||
|
import traceback
|
||||||
|
import dotenv
|
||||||
|
import importlib
|
||||||
|
from pkg_resources import parse_version
|
||||||
|
import sys
|
||||||
|
|
||||||
|
dotenv.load_dotenv()
|
||||||
|
|
||||||
|
# convert to {completion: xx, tokens: xx}
|
||||||
|
def parse_usage(usage):
|
||||||
|
return {
|
||||||
|
"completion": usage["completion_tokens"] if "completion_tokens" in usage else 0,
|
||||||
|
"prompt": usage["prompt_tokens"] if "prompt_tokens" in usage else 0,
|
||||||
|
}
|
||||||
|
|
||||||
|
def parse_messages(input):
|
||||||
|
if input is None:
|
||||||
|
return None
|
||||||
|
|
||||||
|
def clean_message(message):
|
||||||
|
# if is strin, return as is
|
||||||
|
if isinstance(message, str):
|
||||||
|
return message
|
||||||
|
|
||||||
|
if "message" in message:
|
||||||
|
return clean_message(message["message"])
|
||||||
|
|
||||||
|
|
||||||
|
serialized = {
|
||||||
|
"role": message.get("role"),
|
||||||
|
"content": message.get("content"),
|
||||||
|
}
|
||||||
|
|
||||||
|
# Only add tool_calls and function_call to res if they are set
|
||||||
|
if message.get("tool_calls"):
|
||||||
|
serialized["tool_calls"] = message.get("tool_calls")
|
||||||
|
if message.get("function_call"):
|
||||||
|
serialized["function_call"] = message.get("function_call")
|
||||||
|
|
||||||
|
return serialized
|
||||||
|
|
||||||
|
if isinstance(input, list):
|
||||||
|
if len(input) == 1:
|
||||||
|
return clean_message(input[0])
|
||||||
|
else:
|
||||||
|
return [clean_message(msg) for msg in input]
|
||||||
|
else:
|
||||||
|
return clean_message(input)
|
||||||
|
|
||||||
|
|
||||||
|
class LunaryLogger:
|
||||||
|
# Class variables or attributes
|
||||||
|
def __init__(self):
|
||||||
|
try:
|
||||||
|
import lunary
|
||||||
|
version = importlib.metadata.version("lunary")
|
||||||
|
# if version < 0.1.43 then raise ImportError
|
||||||
|
if parse_version(version) < parse_version("0.1.43"):
|
||||||
|
print("Lunary version outdated. Required: > 0.1.43. Upgrade via 'pip install lunary --upgrade'")
|
||||||
|
raise ImportError
|
||||||
|
|
||||||
|
self.lunary_client = lunary
|
||||||
|
except ImportError:
|
||||||
|
print("Lunary not installed. Please install it using 'pip install lunary'")
|
||||||
|
raise ImportError
|
||||||
|
|
||||||
|
def log_event(
|
||||||
|
self,
|
||||||
|
kwargs,
|
||||||
|
type,
|
||||||
|
event,
|
||||||
|
run_id,
|
||||||
|
model,
|
||||||
|
print_verbose,
|
||||||
|
extra=None,
|
||||||
|
input=None,
|
||||||
|
user_id=None,
|
||||||
|
response_obj=None,
|
||||||
|
start_time=datetime.now(timezone.utc),
|
||||||
|
end_time=datetime.now(timezone.utc),
|
||||||
|
error=None,
|
||||||
|
):
|
||||||
|
# Method definition
|
||||||
|
try:
|
||||||
|
print_verbose(f"Lunary Logging - Logging request for model {model}")
|
||||||
|
|
||||||
|
litellm_params = kwargs.get("litellm_params", {})
|
||||||
|
metadata = (
|
||||||
|
litellm_params.get("metadata", {}) or {}
|
||||||
|
)
|
||||||
|
|
||||||
|
tags = litellm_params.pop("tags", None) or []
|
||||||
|
|
||||||
|
if extra:
|
||||||
|
extra.pop("extra_body", None)
|
||||||
|
extra.pop("user", None)
|
||||||
|
template_id = extra.pop("extra_headers", {}).get("Template-Id", None)
|
||||||
|
|
||||||
|
# keep only serializable types
|
||||||
|
for param, value in extra.items():
|
||||||
|
if not isinstance(value, (str, int, bool, float)):
|
||||||
|
try:
|
||||||
|
extra[param] = str(value)
|
||||||
|
except:
|
||||||
|
pass
|
||||||
|
|
||||||
|
if response_obj:
|
||||||
|
usage = (
|
||||||
|
parse_usage(response_obj["usage"])
|
||||||
|
if "usage" in response_obj
|
||||||
|
else None
|
||||||
|
)
|
||||||
|
|
||||||
|
output = response_obj["choices"] if "choices" in response_obj else None
|
||||||
|
|
||||||
|
else:
|
||||||
|
usage = None
|
||||||
|
output = None
|
||||||
|
|
||||||
|
if error:
|
||||||
|
error_obj = {"stack": error}
|
||||||
|
else:
|
||||||
|
error_obj = None
|
||||||
|
|
||||||
|
self.lunary_client.track_event(
|
||||||
|
type,
|
||||||
|
"start",
|
||||||
|
run_id,
|
||||||
|
user_id=user_id,
|
||||||
|
name=model,
|
||||||
|
input=parse_messages(input),
|
||||||
|
timestamp=start_time.astimezone(timezone.utc).isoformat(),
|
||||||
|
template_id=template_id,
|
||||||
|
metadata=metadata,
|
||||||
|
runtime="litellm",
|
||||||
|
tags=tags,
|
||||||
|
extra=extra,
|
||||||
|
)
|
||||||
|
|
||||||
|
self.lunary_client.track_event(
|
||||||
|
type,
|
||||||
|
event,
|
||||||
|
run_id,
|
||||||
|
timestamp=end_time.astimezone(timezone.utc).isoformat(),
|
||||||
|
runtime="litellm",
|
||||||
|
error=error_obj,
|
||||||
|
output=parse_messages(output),
|
||||||
|
token_usage=usage
|
||||||
|
)
|
||||||
|
|
||||||
|
except:
|
||||||
|
# traceback.print_exc()
|
||||||
|
print_verbose(f"Lunary Logging Error - {traceback.format_exc()}")
|
||||||
|
pass
|
|
@ -3,7 +3,7 @@ import json
|
||||||
from enum import Enum
|
from enum import Enum
|
||||||
import requests, copy
|
import requests, copy
|
||||||
import time, uuid
|
import time, uuid
|
||||||
from typing import Callable, Optional
|
from typing import Callable, Optional, List
|
||||||
from litellm.utils import ModelResponse, Usage, map_finish_reason, CustomStreamWrapper
|
from litellm.utils import ModelResponse, Usage, map_finish_reason, CustomStreamWrapper
|
||||||
import litellm
|
import litellm
|
||||||
from .prompt_templates.factory import (
|
from .prompt_templates.factory import (
|
||||||
|
@ -118,6 +118,7 @@ def completion(
|
||||||
):
|
):
|
||||||
headers = validate_environment(api_key, headers)
|
headers = validate_environment(api_key, headers)
|
||||||
_is_function_call = False
|
_is_function_call = False
|
||||||
|
json_schemas: dict = {}
|
||||||
messages = copy.deepcopy(messages)
|
messages = copy.deepcopy(messages)
|
||||||
optional_params = copy.deepcopy(optional_params)
|
optional_params = copy.deepcopy(optional_params)
|
||||||
if model in custom_prompt_dict:
|
if model in custom_prompt_dict:
|
||||||
|
@ -161,6 +162,10 @@ def completion(
|
||||||
## Handle Tool Calling
|
## Handle Tool Calling
|
||||||
if "tools" in optional_params:
|
if "tools" in optional_params:
|
||||||
_is_function_call = True
|
_is_function_call = True
|
||||||
|
for tool in optional_params["tools"]:
|
||||||
|
json_schemas[tool["function"]["name"]] = tool["function"].get(
|
||||||
|
"parameters", None
|
||||||
|
)
|
||||||
tool_calling_system_prompt = construct_tool_use_system_prompt(
|
tool_calling_system_prompt = construct_tool_use_system_prompt(
|
||||||
tools=optional_params["tools"]
|
tools=optional_params["tools"]
|
||||||
)
|
)
|
||||||
|
@ -248,7 +253,12 @@ def completion(
|
||||||
0
|
0
|
||||||
].strip()
|
].strip()
|
||||||
function_arguments_str = f"<invoke>{function_arguments_str}</invoke>"
|
function_arguments_str = f"<invoke>{function_arguments_str}</invoke>"
|
||||||
function_arguments = parse_xml_params(function_arguments_str)
|
function_arguments = parse_xml_params(
|
||||||
|
function_arguments_str,
|
||||||
|
json_schema=json_schemas.get(
|
||||||
|
function_name, None
|
||||||
|
), # check if we have a json schema for this function name
|
||||||
|
)
|
||||||
_message = litellm.Message(
|
_message = litellm.Message(
|
||||||
tool_calls=[
|
tool_calls=[
|
||||||
{
|
{
|
||||||
|
@ -263,6 +273,9 @@ def completion(
|
||||||
content=None,
|
content=None,
|
||||||
)
|
)
|
||||||
model_response.choices[0].message = _message # type: ignore
|
model_response.choices[0].message = _message # type: ignore
|
||||||
|
model_response._hidden_params["original_response"] = (
|
||||||
|
text_content # allow user to access raw anthropic tool calling response
|
||||||
|
)
|
||||||
else:
|
else:
|
||||||
model_response.choices[0].message.content = text_content # type: ignore
|
model_response.choices[0].message.content = text_content # type: ignore
|
||||||
model_response.choices[0].finish_reason = map_finish_reason(
|
model_response.choices[0].finish_reason = map_finish_reason(
|
||||||
|
|
|
@ -691,6 +691,7 @@ def completion(
|
||||||
):
|
):
|
||||||
exception_mapping_worked = False
|
exception_mapping_worked = False
|
||||||
_is_function_call = False
|
_is_function_call = False
|
||||||
|
json_schemas: dict = {}
|
||||||
try:
|
try:
|
||||||
# pop aws_secret_access_key, aws_access_key_id, aws_region_name from kwargs, since completion calls fail with them
|
# pop aws_secret_access_key, aws_access_key_id, aws_region_name from kwargs, since completion calls fail with them
|
||||||
aws_secret_access_key = optional_params.pop("aws_secret_access_key", None)
|
aws_secret_access_key = optional_params.pop("aws_secret_access_key", None)
|
||||||
|
@ -757,6 +758,10 @@ def completion(
|
||||||
## Handle Tool Calling
|
## Handle Tool Calling
|
||||||
if "tools" in inference_params:
|
if "tools" in inference_params:
|
||||||
_is_function_call = True
|
_is_function_call = True
|
||||||
|
for tool in inference_params["tools"]:
|
||||||
|
json_schemas[tool["function"]["name"]] = tool["function"].get(
|
||||||
|
"parameters", None
|
||||||
|
)
|
||||||
tool_calling_system_prompt = construct_tool_use_system_prompt(
|
tool_calling_system_prompt = construct_tool_use_system_prompt(
|
||||||
tools=inference_params["tools"]
|
tools=inference_params["tools"]
|
||||||
)
|
)
|
||||||
|
@ -943,7 +948,12 @@ def completion(
|
||||||
function_arguments_str = (
|
function_arguments_str = (
|
||||||
f"<invoke>{function_arguments_str}</invoke>"
|
f"<invoke>{function_arguments_str}</invoke>"
|
||||||
)
|
)
|
||||||
function_arguments = parse_xml_params(function_arguments_str)
|
function_arguments = parse_xml_params(
|
||||||
|
function_arguments_str,
|
||||||
|
json_schema=json_schemas.get(
|
||||||
|
function_name, None
|
||||||
|
), # check if we have a json schema for this function name)
|
||||||
|
)
|
||||||
_message = litellm.Message(
|
_message = litellm.Message(
|
||||||
tool_calls=[
|
tool_calls=[
|
||||||
{
|
{
|
||||||
|
@ -958,6 +968,9 @@ def completion(
|
||||||
content=None,
|
content=None,
|
||||||
)
|
)
|
||||||
model_response.choices[0].message = _message # type: ignore
|
model_response.choices[0].message = _message # type: ignore
|
||||||
|
model_response._hidden_params["original_response"] = (
|
||||||
|
outputText # allow user to access raw anthropic tool calling response
|
||||||
|
)
|
||||||
if _is_function_call == True and stream is not None and stream == True:
|
if _is_function_call == True and stream is not None and stream == True:
|
||||||
print_verbose(
|
print_verbose(
|
||||||
f"INSIDE BEDROCK STREAMING TOOL CALLING CONDITION BLOCK"
|
f"INSIDE BEDROCK STREAMING TOOL CALLING CONDITION BLOCK"
|
||||||
|
|
|
@ -731,18 +731,53 @@ def contains_tag(tag: str, string: str) -> bool:
|
||||||
return bool(re.search(f"<{tag}>(.+?)</{tag}>", string, re.DOTALL))
|
return bool(re.search(f"<{tag}>(.+?)</{tag}>", string, re.DOTALL))
|
||||||
|
|
||||||
|
|
||||||
def parse_xml_params(xml_content):
|
def parse_xml_params(xml_content, json_schema: Optional[dict] = None):
|
||||||
|
"""
|
||||||
|
Compare the xml output to the json schema
|
||||||
|
|
||||||
|
check if a value is a list - if so, get it's child elements
|
||||||
|
"""
|
||||||
root = ET.fromstring(xml_content)
|
root = ET.fromstring(xml_content)
|
||||||
params = {}
|
params = {}
|
||||||
|
|
||||||
|
if json_schema is not None: # check if we have a json schema for this function call
|
||||||
|
# iterate over all properties in the schema
|
||||||
|
for prop in json_schema["properties"]:
|
||||||
|
# If property is an array, get the nested items
|
||||||
|
_element = root.find(f"parameters/{prop}")
|
||||||
|
if json_schema["properties"][prop]["type"] == "array":
|
||||||
|
items = []
|
||||||
|
if _element is not None:
|
||||||
|
for value in _element:
|
||||||
|
try:
|
||||||
|
if value.text is not None:
|
||||||
|
_value = json.loads(value.text)
|
||||||
|
else:
|
||||||
|
continue
|
||||||
|
except json.JSONDecodeError:
|
||||||
|
_value = value.text
|
||||||
|
items.append(_value)
|
||||||
|
params[prop] = items
|
||||||
|
# If property is not an array, append the value directly
|
||||||
|
elif _element is not None and _element.text is not None:
|
||||||
|
try:
|
||||||
|
_value = json.loads(_element.text)
|
||||||
|
except json.JSONDecodeError:
|
||||||
|
_value = _element.text
|
||||||
|
params[prop] = _value
|
||||||
|
else:
|
||||||
for child in root.findall(".//parameters/*"):
|
for child in root.findall(".//parameters/*"):
|
||||||
|
if child is not None and child.text is not None:
|
||||||
try:
|
try:
|
||||||
# Attempt to decode the element's text as JSON
|
# Attempt to decode the element's text as JSON
|
||||||
params[child.tag] = json.loads(child.text)
|
params[child.tag] = json.loads(child.text) # type: ignore
|
||||||
except json.JSONDecodeError:
|
except json.JSONDecodeError:
|
||||||
# If JSON decoding fails, use the original text
|
# If JSON decoding fails, use the original text
|
||||||
params[child.tag] = child.text
|
params[child.tag] = child.text # type: ignore
|
||||||
|
|
||||||
return params
|
return params
|
||||||
|
|
||||||
|
|
||||||
###
|
###
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -2952,7 +2952,26 @@ async def atext_completion(*args, **kwargs):
|
||||||
model=model,
|
model=model,
|
||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
return response
|
transformed_logprobs = None
|
||||||
|
# only supported for TGI models
|
||||||
|
try:
|
||||||
|
raw_response = response._hidden_params.get("original_response", None)
|
||||||
|
transformed_logprobs = litellm.utils.transform_logprobs(raw_response)
|
||||||
|
except Exception as e:
|
||||||
|
print_verbose(f"LiteLLM non blocking exception: {e}")
|
||||||
|
text_completion_response = TextCompletionResponse()
|
||||||
|
text_completion_response["id"] = response.get("id", None)
|
||||||
|
text_completion_response["object"] = "text_completion"
|
||||||
|
text_completion_response["created"] = response.get("created", None)
|
||||||
|
text_completion_response["model"] = response.get("model", None)
|
||||||
|
text_choices = TextChoices()
|
||||||
|
text_choices["text"] = response["choices"][0]["message"]["content"]
|
||||||
|
text_choices["index"] = response["choices"][0]["index"]
|
||||||
|
text_choices["logprobs"] = transformed_logprobs
|
||||||
|
text_choices["finish_reason"] = response["choices"][0]["finish_reason"]
|
||||||
|
text_completion_response["choices"] = [text_choices]
|
||||||
|
text_completion_response["usage"] = response.get("usage", None)
|
||||||
|
return text_completion_response
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
custom_llm_provider = custom_llm_provider or "openai"
|
custom_llm_provider = custom_llm_provider or "openai"
|
||||||
raise exception_type(
|
raise exception_type(
|
||||||
|
@ -3165,6 +3184,7 @@ def text_completion(
|
||||||
transformed_logprobs = litellm.utils.transform_logprobs(raw_response)
|
transformed_logprobs = litellm.utils.transform_logprobs(raw_response)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print_verbose(f"LiteLLM non blocking exception: {e}")
|
print_verbose(f"LiteLLM non blocking exception: {e}")
|
||||||
|
|
||||||
text_completion_response["id"] = response.get("id", None)
|
text_completion_response["id"] = response.get("id", None)
|
||||||
text_completion_response["object"] = "text_completion"
|
text_completion_response["object"] = "text_completion"
|
||||||
text_completion_response["created"] = response.get("created", None)
|
text_completion_response["created"] = response.get("created", None)
|
||||||
|
@ -3176,6 +3196,7 @@ def text_completion(
|
||||||
text_choices["finish_reason"] = response["choices"][0]["finish_reason"]
|
text_choices["finish_reason"] = response["choices"][0]["finish_reason"]
|
||||||
text_completion_response["choices"] = [text_choices]
|
text_completion_response["choices"] = [text_choices]
|
||||||
text_completion_response["usage"] = response.get("usage", None)
|
text_completion_response["usage"] = response.get("usage", None)
|
||||||
|
|
||||||
return text_completion_response
|
return text_completion_response
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -1503,7 +1503,7 @@
|
||||||
"litellm_provider": "bedrock",
|
"litellm_provider": "bedrock",
|
||||||
"mode": "chat"
|
"mode": "chat"
|
||||||
},
|
},
|
||||||
"mistral.mixtral-8x7b-instruct": {
|
"mistral.mixtral-8x7b-instruct-v0:1": {
|
||||||
"max_tokens": 8191,
|
"max_tokens": 8191,
|
||||||
"max_input_tokens": 32000,
|
"max_input_tokens": 32000,
|
||||||
"max_output_tokens": 8191,
|
"max_output_tokens": 8191,
|
||||||
|
@ -1512,7 +1512,7 @@
|
||||||
"litellm_provider": "bedrock",
|
"litellm_provider": "bedrock",
|
||||||
"mode": "chat"
|
"mode": "chat"
|
||||||
},
|
},
|
||||||
"bedrock/us-west-2/mistral.mixtral-8x7b-instruct": {
|
"bedrock/us-west-2/mistral.mixtral-8x7b-instruct-v0:1": {
|
||||||
"max_tokens": 8191,
|
"max_tokens": 8191,
|
||||||
"max_input_tokens": 32000,
|
"max_input_tokens": 32000,
|
||||||
"max_output_tokens": 8191,
|
"max_output_tokens": 8191,
|
||||||
|
|
File diff suppressed because one or more lines are too long
|
@ -0,0 +1 @@
|
||||||
|
self.__BUILD_MANIFEST={__rewrites:{afterFiles:[],beforeFiles:[],fallback:[]},"/_error":["static/chunks/pages/_error-d6107f1aac0c574c.js"],sortedPages:["/_app","/_error"]},self.__BUILD_MANIFEST_CB&&self.__BUILD_MANIFEST_CB();
|
|
@ -0,0 +1 @@
|
||||||
|
self.__SSG_MANIFEST=new Set([]);self.__SSG_MANIFEST_CB&&self.__SSG_MANIFEST_CB()
|
File diff suppressed because one or more lines are too long
|
@ -1 +0,0 @@
|
||||||
<!DOCTYPE html><html id="__next_error__"><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width, initial-scale=1"/><link rel="preload" as="script" fetchPriority="low" href="/ui/_next/static/chunks/webpack-d1ad37b1875df240.js" crossorigin=""/><script src="/ui/_next/static/chunks/fd9d1056-a507ee9e75a3be72.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/69-589b47e7a69d316f.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/main-app-9b4fb13a7db53edf.js" async="" crossorigin=""></script><title>🚅 LiteLLM</title><meta name="description" content="LiteLLM Proxy Admin UI"/><link rel="icon" href="/ui/favicon.ico" type="image/x-icon" sizes="16x16"/><meta name="next-size-adjust"/><script src="/ui/_next/static/chunks/polyfills-c67a75d1b6f99dc8.js" crossorigin="" noModule=""></script></head><body><script src="/ui/_next/static/chunks/webpack-d1ad37b1875df240.js" crossorigin="" async=""></script><script>(self.__next_f=self.__next_f||[]).push([0]);self.__next_f.push([2,null])</script><script>self.__next_f.push([1,"1:HL[\"/ui/_next/static/media/c9a5bc6a7c948fb0-s.p.woff2\",\"font\",{\"crossOrigin\":\"\",\"type\":\"font/woff2\"}]\n2:HL[\"/ui/_next/static/css/f8da5a6a5b29d249.css\",\"style\",{\"crossOrigin\":\"\"}]\n0:\"$L3\"\n"])</script><script>self.__next_f.push([1,"4:I[47690,[],\"\"]\n6:I[77831,[],\"\"]\n7:I[90177,[\"798\",\"static/chunks/798-4baed68da0c5497d.js\",\"931\",\"static/chunks/app/page-37392d6753f8a3d0.js\"],\"\"]\n8:I[5613,[],\"\"]\n9:I[31778,[],\"\"]\nb:I[48955,[],\"\"]\nc:[]\n"])</script><script>self.__next_f.push([1,"3:[[[\"$\",\"link\",\"0\",{\"rel\":\"stylesheet\",\"href\":\"/ui/_next/static/css/f8da5a6a5b29d249.css\",\"precedence\":\"next\",\"crossOrigin\":\"\"}]],[\"$\",\"$L4\",null,{\"buildId\":\"L9N6TOWJaqSp22Vj96YE4\",\"assetPrefix\":\"/ui\",\"initialCanonicalUrl\":\"/\",\"initialTree\":[\"\",{\"children\":[\"__PAGE__\",{}]},\"$undefined\",\"$undefined\",true],\"initialSeedData\":[\"\",{\"children\":[\"__PAGE__\",{},[\"$L5\",[\"$\",\"$L6\",null,{\"propsForComponent\":{\"params\":{}},\"Component\":\"$7\",\"isStaticGeneration\":true}],null]]},[null,[\"$\",\"html\",null,{\"lang\":\"en\",\"children\":[\"$\",\"body\",null,{\"className\":\"__className_c23dc8\",\"children\":[\"$\",\"$L8\",null,{\"parallelRouterKey\":\"children\",\"segmentPath\":[\"children\"],\"loading\":\"$undefined\",\"loadingStyles\":\"$undefined\",\"loadingScripts\":\"$undefined\",\"hasLoading\":false,\"error\":\"$undefined\",\"errorStyles\":\"$undefined\",\"errorScripts\":\"$undefined\",\"template\":[\"$\",\"$L9\",null,{}],\"templateStyles\":\"$undefined\",\"templateScripts\":\"$undefined\",\"notFound\":[[\"$\",\"title\",null,{\"children\":\"404: This page could not be found.\"}],[\"$\",\"div\",null,{\"style\":{\"fontFamily\":\"system-ui,\\\"Segoe UI\\\",Roboto,Helvetica,Arial,sans-serif,\\\"Apple Color Emoji\\\",\\\"Segoe UI Emoji\\\"\",\"height\":\"100vh\",\"textAlign\":\"center\",\"display\":\"flex\",\"flexDirection\":\"column\",\"alignItems\":\"center\",\"justifyContent\":\"center\"},\"children\":[\"$\",\"div\",null,{\"children\":[[\"$\",\"style\",null,{\"dangerouslySetInnerHTML\":{\"__html\":\"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}\"}}],[\"$\",\"h1\",null,{\"className\":\"next-error-h1\",\"style\":{\"display\":\"inline-block\",\"margin\":\"0 20px 0 0\",\"padding\":\"0 23px 0 0\",\"fontSize\":24,\"fontWeight\":500,\"verticalAlign\":\"top\",\"lineHeight\":\"49px\"},\"children\":\"404\"}],[\"$\",\"div\",null,{\"style\":{\"display\":\"inline-block\"},\"children\":[\"$\",\"h2\",null,{\"style\":{\"fontSize\":14,\"fontWeight\":400,\"lineHeight\":\"49px\",\"margin\":0},\"children\":\"This page could not be found.\"}]}]]}]}]],\"notFoundStyles\":[],\"styles\":null}]}]}],null]],\"initialHead\":[false,\"$La\"],\"globalErrorComponent\":\"$b\",\"missingSlots\":\"$Wc\"}]]\n"])</script><script>self.__next_f.push([1,"a:[[\"$\",\"meta\",\"0\",{\"name\":\"viewport\",\"content\":\"width=device-width, initial-scale=1\"}],[\"$\",\"meta\",\"1\",{\"charSet\":\"utf-8\"}],[\"$\",\"title\",\"2\",{\"children\":\"🚅 LiteLLM\"}],[\"$\",\"meta\",\"3\",{\"name\":\"description\",\"content\":\"LiteLLM Proxy Admin UI\"}],[\"$\",\"link\",\"4\",{\"rel\":\"icon\",\"href\":\"/ui/favicon.ico\",\"type\":\"image/x-icon\",\"sizes\":\"16x16\"}],[\"$\",\"meta\",\"5\",{\"name\":\"next-size-adjust\"}]]\n5:null\n"])</script><script>self.__next_f.push([1,""])</script></body></html>
|
|
|
@ -1,7 +0,0 @@
|
||||||
2:I[77831,[],""]
|
|
||||||
3:I[90177,["798","static/chunks/798-4baed68da0c5497d.js","931","static/chunks/app/page-37392d6753f8a3d0.js"],""]
|
|
||||||
4:I[5613,[],""]
|
|
||||||
5:I[31778,[],""]
|
|
||||||
0:["L9N6TOWJaqSp22Vj96YE4",[[["",{"children":["__PAGE__",{}]},"$undefined","$undefined",true],["",{"children":["__PAGE__",{},["$L1",["$","$L2",null,{"propsForComponent":{"params":{}},"Component":"$3","isStaticGeneration":true}],null]]},[null,["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_c23dc8","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[],"styles":null}]}]}],null]],[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/f8da5a6a5b29d249.css","precedence":"next","crossOrigin":""}]],"$L6"]]]]
|
|
||||||
6:[["$","meta","0",{"name":"viewport","content":"width=device-width, initial-scale=1"}],["$","meta","1",{"charSet":"utf-8"}],["$","title","2",{"children":"🚅 LiteLLM"}],["$","meta","3",{"name":"description","content":"LiteLLM Proxy Admin UI"}],["$","link","4",{"rel":"icon","href":"/ui/favicon.ico","type":"image/x-icon","sizes":"16x16"}],["$","meta","5",{"name":"next-size-adjust"}]]
|
|
||||||
1:null
|
|
|
@ -5,10 +5,15 @@ model_list:
|
||||||
api_key: my-fake-key
|
api_key: my-fake-key
|
||||||
api_base: https://exampleopenaiendpoint-production.up.railway.app/
|
api_base: https://exampleopenaiendpoint-production.up.railway.app/
|
||||||
|
|
||||||
|
litellm_settings:
|
||||||
|
max_budget: 600020
|
||||||
|
budget_duration: 30d
|
||||||
|
|
||||||
general_settings:
|
general_settings:
|
||||||
master_key: sk-1234
|
master_key: sk-1234
|
||||||
proxy_batch_write_at: 5 # 👈 Frequency of batch writing logs to server (in seconds)
|
proxy_batch_write_at: 60 # 👈 Frequency of batch writing logs to server (in seconds)
|
||||||
enable_jwt_auth: True
|
enable_jwt_auth: True
|
||||||
|
alerting: ["slack"]
|
||||||
litellm_jwtauth:
|
litellm_jwtauth:
|
||||||
admin_jwt_scope: "litellm_proxy_admin"
|
admin_jwt_scope: "litellm_proxy_admin"
|
||||||
team_jwt_scope: "litellm_team"
|
team_jwt_scope: "litellm_team"
|
||||||
|
|
|
@ -18,6 +18,7 @@ from litellm.proxy._types import (
|
||||||
from typing import Optional, Literal, Union
|
from typing import Optional, Literal, Union
|
||||||
from litellm.proxy.utils import PrismaClient
|
from litellm.proxy.utils import PrismaClient
|
||||||
from litellm.caching import DualCache
|
from litellm.caching import DualCache
|
||||||
|
import litellm
|
||||||
|
|
||||||
all_routes = LiteLLMRoutes.openai_routes.value + LiteLLMRoutes.management_routes.value
|
all_routes = LiteLLMRoutes.openai_routes.value + LiteLLMRoutes.management_routes.value
|
||||||
|
|
||||||
|
@ -26,6 +27,7 @@ def common_checks(
|
||||||
request_body: dict,
|
request_body: dict,
|
||||||
team_object: LiteLLM_TeamTable,
|
team_object: LiteLLM_TeamTable,
|
||||||
end_user_object: Optional[LiteLLM_EndUserTable],
|
end_user_object: Optional[LiteLLM_EndUserTable],
|
||||||
|
global_proxy_spend: Optional[float],
|
||||||
general_settings: dict,
|
general_settings: dict,
|
||||||
route: str,
|
route: str,
|
||||||
) -> bool:
|
) -> bool:
|
||||||
|
@ -37,6 +39,7 @@ def common_checks(
|
||||||
3. If team is in budget
|
3. If team is in budget
|
||||||
4. If end_user ('user' passed to /chat/completions, /embeddings endpoint) is in budget
|
4. If end_user ('user' passed to /chat/completions, /embeddings endpoint) is in budget
|
||||||
5. [OPTIONAL] If 'enforce_end_user' enabled - did developer pass in 'user' param for openai endpoints
|
5. [OPTIONAL] If 'enforce_end_user' enabled - did developer pass in 'user' param for openai endpoints
|
||||||
|
6. [OPTIONAL] If 'litellm.max_budget' is set (>0), is proxy under budget
|
||||||
"""
|
"""
|
||||||
_model = request_body.get("model", None)
|
_model = request_body.get("model", None)
|
||||||
if team_object.blocked == True:
|
if team_object.blocked == True:
|
||||||
|
@ -66,7 +69,7 @@ def common_checks(
|
||||||
end_user_budget = end_user_object.litellm_budget_table.max_budget
|
end_user_budget = end_user_object.litellm_budget_table.max_budget
|
||||||
if end_user_budget is not None and end_user_object.spend > end_user_budget:
|
if end_user_budget is not None and end_user_object.spend > end_user_budget:
|
||||||
raise Exception(
|
raise Exception(
|
||||||
f"End User={end_user_object.user_id} over budget. Spend={end_user_object.spend}, Budget={end_user_budget}"
|
f"ExceededBudget: End User={end_user_object.user_id} over budget. Spend={end_user_object.spend}, Budget={end_user_budget}"
|
||||||
)
|
)
|
||||||
# 5. [OPTIONAL] If 'enforce_user_param' enabled - did developer pass in 'user' param for openai endpoints
|
# 5. [OPTIONAL] If 'enforce_user_param' enabled - did developer pass in 'user' param for openai endpoints
|
||||||
if (
|
if (
|
||||||
|
@ -77,7 +80,12 @@ def common_checks(
|
||||||
raise Exception(
|
raise Exception(
|
||||||
f"'user' param not passed in. 'enforce_user_param'={general_settings['enforce_user_param']}"
|
f"'user' param not passed in. 'enforce_user_param'={general_settings['enforce_user_param']}"
|
||||||
)
|
)
|
||||||
|
# 6. [OPTIONAL] If 'litellm.max_budget' is set (>0), is proxy under budget
|
||||||
|
if litellm.max_budget > 0 and global_proxy_spend is not None:
|
||||||
|
if global_proxy_spend > litellm.max_budget:
|
||||||
|
raise Exception(
|
||||||
|
f"ExceededBudget: LiteLLM Proxy has exceeded its budget. Current spend: {global_proxy_spend}; Max Budget: {litellm.max_budget}"
|
||||||
|
)
|
||||||
return True
|
return True
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -114,6 +114,7 @@ class JWTHandler:
|
||||||
public_key: Optional[dict] = None
|
public_key: Optional[dict] = None
|
||||||
|
|
||||||
if len(keys) == 1:
|
if len(keys) == 1:
|
||||||
|
if kid is None or keys["kid"] == kid:
|
||||||
public_key = keys[0]
|
public_key = keys[0]
|
||||||
elif len(keys) > 1:
|
elif len(keys) > 1:
|
||||||
for key in keys:
|
for key in keys:
|
||||||
|
|
|
@ -437,12 +437,49 @@ async def user_api_key_auth(
|
||||||
key=end_user_id, value=end_user_object
|
key=end_user_id, value=end_user_object
|
||||||
)
|
)
|
||||||
|
|
||||||
|
global_proxy_spend = None
|
||||||
|
|
||||||
|
if litellm.max_budget > 0: # user set proxy max budget
|
||||||
|
# check cache
|
||||||
|
global_proxy_spend = await user_api_key_cache.async_get_cache(
|
||||||
|
key="{}:spend".format(litellm_proxy_admin_name)
|
||||||
|
)
|
||||||
|
if global_proxy_spend is None and prisma_client is not None:
|
||||||
|
# get from db
|
||||||
|
sql_query = """SELECT SUM(spend) as total_spend FROM "MonthlyGlobalSpend";"""
|
||||||
|
|
||||||
|
response = await prisma_client.db.query_raw(query=sql_query)
|
||||||
|
|
||||||
|
global_proxy_spend = response[0]["total_spend"]
|
||||||
|
|
||||||
|
await user_api_key_cache.async_set_cache(
|
||||||
|
key="{}:spend".format(litellm_proxy_admin_name),
|
||||||
|
value=global_proxy_spend,
|
||||||
|
ttl=60,
|
||||||
|
)
|
||||||
|
if global_proxy_spend is not None:
|
||||||
|
user_info = {
|
||||||
|
"user_id": litellm_proxy_admin_name,
|
||||||
|
"max_budget": litellm.max_budget,
|
||||||
|
"spend": global_proxy_spend,
|
||||||
|
"user_email": "",
|
||||||
|
}
|
||||||
|
asyncio.create_task(
|
||||||
|
proxy_logging_obj.budget_alerts(
|
||||||
|
user_max_budget=litellm.max_budget,
|
||||||
|
user_current_spend=global_proxy_spend,
|
||||||
|
type="user_and_proxy_budget",
|
||||||
|
user_info=user_info,
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
# run through common checks
|
# run through common checks
|
||||||
_ = common_checks(
|
_ = common_checks(
|
||||||
request_body=request_data,
|
request_body=request_data,
|
||||||
team_object=team_object,
|
team_object=team_object,
|
||||||
end_user_object=end_user_object,
|
end_user_object=end_user_object,
|
||||||
general_settings=general_settings,
|
general_settings=general_settings,
|
||||||
|
global_proxy_spend=global_proxy_spend,
|
||||||
route=route,
|
route=route,
|
||||||
)
|
)
|
||||||
# save user object in cache
|
# save user object in cache
|
||||||
|
@ -656,17 +693,8 @@ async def user_api_key_auth(
|
||||||
)
|
)
|
||||||
|
|
||||||
# Check 2. If user_id for this token is in budget
|
# Check 2. If user_id for this token is in budget
|
||||||
## Check 2.1 If global proxy is in budget
|
|
||||||
## Check 2.2 [OPTIONAL - checked only if litellm.max_user_budget is not None] If 'user' passed in /chat/completions is in budget
|
|
||||||
if valid_token.user_id is not None:
|
if valid_token.user_id is not None:
|
||||||
user_id_list = [valid_token.user_id, litellm_proxy_budget_name]
|
user_id_list = [valid_token.user_id]
|
||||||
if (
|
|
||||||
litellm.max_user_budget is not None
|
|
||||||
): # Check if 'user' passed in /chat/completions is in budget, only checked if litellm.max_user_budget is set
|
|
||||||
user_passed_to_chat_completions = request_data.get("user", None)
|
|
||||||
if user_passed_to_chat_completions is not None:
|
|
||||||
user_id_list.append(user_passed_to_chat_completions)
|
|
||||||
|
|
||||||
for id in user_id_list:
|
for id in user_id_list:
|
||||||
value = user_api_key_cache.get_cache(key=id)
|
value = user_api_key_cache.get_cache(key=id)
|
||||||
if value is not None:
|
if value is not None:
|
||||||
|
@ -675,13 +703,12 @@ async def user_api_key_auth(
|
||||||
user_id_information.append(value)
|
user_id_information.append(value)
|
||||||
if user_id_information is None or (
|
if user_id_information is None or (
|
||||||
isinstance(user_id_information, list)
|
isinstance(user_id_information, list)
|
||||||
and len(user_id_information) < 2
|
and len(user_id_information) < 1
|
||||||
):
|
):
|
||||||
if prisma_client is not None:
|
if prisma_client is not None:
|
||||||
user_id_information = await prisma_client.get_data(
|
user_id_information = await prisma_client.get_data(
|
||||||
user_id_list=[
|
user_id_list=[
|
||||||
valid_token.user_id,
|
valid_token.user_id,
|
||||||
litellm_proxy_budget_name,
|
|
||||||
],
|
],
|
||||||
table_name="user",
|
table_name="user",
|
||||||
query_type="find_all",
|
query_type="find_all",
|
||||||
|
@ -881,11 +908,54 @@ async def user_api_key_auth(
|
||||||
blocked=valid_token.team_blocked,
|
blocked=valid_token.team_blocked,
|
||||||
models=valid_token.team_models,
|
models=valid_token.team_models,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
_end_user_object = None
|
||||||
|
if "user" in request_data:
|
||||||
|
_id = "end_user_id:{}".format(request_data["user"])
|
||||||
|
_end_user_object = await user_api_key_cache.async_get_cache(key=_id)
|
||||||
|
if _end_user_object is not None:
|
||||||
|
_end_user_object = LiteLLM_EndUserTable(**_end_user_object)
|
||||||
|
|
||||||
|
global_proxy_spend = None
|
||||||
|
if litellm.max_budget > 0: # user set proxy max budget
|
||||||
|
# check cache
|
||||||
|
global_proxy_spend = await user_api_key_cache.async_get_cache(
|
||||||
|
key="{}:spend".format(litellm_proxy_admin_name)
|
||||||
|
)
|
||||||
|
if global_proxy_spend is None:
|
||||||
|
# get from db
|
||||||
|
sql_query = """SELECT SUM(spend) as total_spend FROM "MonthlyGlobalSpend";"""
|
||||||
|
|
||||||
|
response = await prisma_client.db.query_raw(query=sql_query)
|
||||||
|
|
||||||
|
global_proxy_spend = response[0]["total_spend"]
|
||||||
|
await user_api_key_cache.async_set_cache(
|
||||||
|
key="{}:spend".format(litellm_proxy_admin_name),
|
||||||
|
value=global_proxy_spend,
|
||||||
|
ttl=60,
|
||||||
|
)
|
||||||
|
|
||||||
|
if global_proxy_spend is not None:
|
||||||
|
user_info = {
|
||||||
|
"user_id": litellm_proxy_admin_name,
|
||||||
|
"max_budget": litellm.max_budget,
|
||||||
|
"spend": global_proxy_spend,
|
||||||
|
"user_email": "",
|
||||||
|
}
|
||||||
|
asyncio.create_task(
|
||||||
|
proxy_logging_obj.budget_alerts(
|
||||||
|
user_max_budget=litellm.max_budget,
|
||||||
|
user_current_spend=global_proxy_spend,
|
||||||
|
type="user_and_proxy_budget",
|
||||||
|
user_info=user_info,
|
||||||
|
)
|
||||||
|
)
|
||||||
_ = common_checks(
|
_ = common_checks(
|
||||||
request_body=request_data,
|
request_body=request_data,
|
||||||
team_object=_team_obj,
|
team_object=_team_obj,
|
||||||
end_user_object=None,
|
end_user_object=_end_user_object,
|
||||||
general_settings=general_settings,
|
general_settings=general_settings,
|
||||||
|
global_proxy_spend=global_proxy_spend,
|
||||||
route=route,
|
route=route,
|
||||||
)
|
)
|
||||||
# Token passed all checks
|
# Token passed all checks
|
||||||
|
@ -1553,7 +1623,7 @@ async def update_cache(
|
||||||
|
|
||||||
async def _update_user_cache():
|
async def _update_user_cache():
|
||||||
## UPDATE CACHE FOR USER ID + GLOBAL PROXY
|
## UPDATE CACHE FOR USER ID + GLOBAL PROXY
|
||||||
user_ids = [user_id, litellm_proxy_budget_name, end_user_id]
|
user_ids = [user_id]
|
||||||
try:
|
try:
|
||||||
for _id in user_ids:
|
for _id in user_ids:
|
||||||
# Fetch the existing cost for the given user
|
# Fetch the existing cost for the given user
|
||||||
|
@ -1594,14 +1664,26 @@ async def update_cache(
|
||||||
user_api_key_cache.set_cache(
|
user_api_key_cache.set_cache(
|
||||||
key=_id, value=existing_spend_obj.json()
|
key=_id, value=existing_spend_obj.json()
|
||||||
)
|
)
|
||||||
|
## UPDATE GLOBAL PROXY ##
|
||||||
|
global_proxy_spend = await user_api_key_cache.async_get_cache(
|
||||||
|
key="{}:spend".format(litellm_proxy_admin_name)
|
||||||
|
)
|
||||||
|
if global_proxy_spend is None:
|
||||||
|
await user_api_key_cache.async_set_cache(
|
||||||
|
key="{}:spend".format(litellm_proxy_admin_name), value=response_cost
|
||||||
|
)
|
||||||
|
elif response_cost is not None and global_proxy_spend is not None:
|
||||||
|
increment = global_proxy_spend + response_cost
|
||||||
|
await user_api_key_cache.async_set_cache(
|
||||||
|
key="{}:spend".format(litellm_proxy_admin_name), value=increment
|
||||||
|
)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
verbose_proxy_logger.debug(
|
verbose_proxy_logger.debug(
|
||||||
f"An error occurred updating user cache: {str(e)}\n\n{traceback.format_exc()}"
|
f"An error occurred updating user cache: {str(e)}\n\n{traceback.format_exc()}"
|
||||||
)
|
)
|
||||||
|
|
||||||
async def _update_end_user_cache():
|
async def _update_end_user_cache():
|
||||||
## UPDATE CACHE FOR USER ID + GLOBAL PROXY
|
_id = "end_user_id:{}".format(end_user_id)
|
||||||
_id = end_user_id
|
|
||||||
try:
|
try:
|
||||||
# Fetch the existing cost for the given user
|
# Fetch the existing cost for the given user
|
||||||
existing_spend_obj = await user_api_key_cache.async_get_cache(key=_id)
|
existing_spend_obj = await user_api_key_cache.async_get_cache(key=_id)
|
||||||
|
@ -1609,14 +1691,14 @@ async def update_cache(
|
||||||
# if user does not exist in LiteLLM_UserTable, create a new user
|
# if user does not exist in LiteLLM_UserTable, create a new user
|
||||||
existing_spend = 0
|
existing_spend = 0
|
||||||
max_user_budget = None
|
max_user_budget = None
|
||||||
if litellm.max_user_budget is not None:
|
if litellm.max_end_user_budget is not None:
|
||||||
max_user_budget = litellm.max_user_budget
|
max_end_user_budget = litellm.max_end_user_budget
|
||||||
existing_spend_obj = LiteLLM_EndUserTable(
|
existing_spend_obj = LiteLLM_EndUserTable(
|
||||||
user_id=_id,
|
user_id=_id,
|
||||||
spend=0,
|
spend=0,
|
||||||
blocked=False,
|
blocked=False,
|
||||||
litellm_budget_table=LiteLLM_BudgetTable(
|
litellm_budget_table=LiteLLM_BudgetTable(
|
||||||
max_budget=max_user_budget
|
max_budget=max_end_user_budget
|
||||||
),
|
),
|
||||||
)
|
)
|
||||||
verbose_proxy_logger.debug(
|
verbose_proxy_logger.debug(
|
||||||
|
@ -2909,6 +2991,11 @@ def model_list(
|
||||||
dependencies=[Depends(user_api_key_auth)],
|
dependencies=[Depends(user_api_key_auth)],
|
||||||
tags=["completions"],
|
tags=["completions"],
|
||||||
)
|
)
|
||||||
|
@router.post(
|
||||||
|
"/openai/deployments/{model:path}/completions",
|
||||||
|
dependencies=[Depends(user_api_key_auth)],
|
||||||
|
tags=["completions"],
|
||||||
|
)
|
||||||
async def completion(
|
async def completion(
|
||||||
request: Request,
|
request: Request,
|
||||||
fastapi_response: Response,
|
fastapi_response: Response,
|
||||||
|
@ -4049,7 +4136,6 @@ async def generate_key_fn(
|
||||||
)
|
)
|
||||||
_budget_id = getattr(_budget, "budget_id", None)
|
_budget_id = getattr(_budget, "budget_id", None)
|
||||||
data_json = data.json() # type: ignore
|
data_json = data.json() # type: ignore
|
||||||
|
|
||||||
# if we get max_budget passed to /key/generate, then use it as key_max_budget. Since generate_key_helper_fn is used to make new users
|
# if we get max_budget passed to /key/generate, then use it as key_max_budget. Since generate_key_helper_fn is used to make new users
|
||||||
if "max_budget" in data_json:
|
if "max_budget" in data_json:
|
||||||
data_json["key_max_budget"] = data_json.pop("max_budget", None)
|
data_json["key_max_budget"] = data_json.pop("max_budget", None)
|
||||||
|
@ -4108,6 +4194,13 @@ async def update_key_fn(request: Request, data: UpdateKeyRequest):
|
||||||
0,
|
0,
|
||||||
): # models default to [], spend defaults to 0, we should not reset these values
|
): # models default to [], spend defaults to 0, we should not reset these values
|
||||||
non_default_values[k] = v
|
non_default_values[k] = v
|
||||||
|
|
||||||
|
if "duration" in non_default_values:
|
||||||
|
duration = non_default_values.pop("duration")
|
||||||
|
duration_s = _duration_in_seconds(duration=duration)
|
||||||
|
expires = datetime.utcnow() + timedelta(seconds=duration_s)
|
||||||
|
non_default_values["expires"] = expires
|
||||||
|
|
||||||
response = await prisma_client.update_data(
|
response = await prisma_client.update_data(
|
||||||
token=key, data={**non_default_values, "token": key}
|
token=key, data={**non_default_values, "token": key}
|
||||||
)
|
)
|
||||||
|
@ -6051,7 +6144,7 @@ async def team_member_delete(
|
||||||
|
|
||||||
-D '{
|
-D '{
|
||||||
"team_id": "45e3e396-ee08-4a61-a88e-16b3ce7e0849",
|
"team_id": "45e3e396-ee08-4a61-a88e-16b3ce7e0849",
|
||||||
"member": {"role": "user", "user_id": "krrish247652@berri.ai"}
|
"user_id": "krrish247652@berri.ai"
|
||||||
}'
|
}'
|
||||||
```
|
```
|
||||||
"""
|
"""
|
||||||
|
|
|
@ -1941,9 +1941,9 @@ async def update_spend(
|
||||||
end_user_id,
|
end_user_id,
|
||||||
response_cost,
|
response_cost,
|
||||||
) in prisma_client.end_user_list_transactons.items():
|
) in prisma_client.end_user_list_transactons.items():
|
||||||
max_user_budget = None
|
max_end_user_budget = None
|
||||||
if litellm.max_user_budget is not None:
|
if litellm.max_end_user_budget is not None:
|
||||||
max_user_budget = litellm.max_user_budget
|
max_end_user_budget = litellm.max_end_user_budget
|
||||||
new_user_obj = LiteLLM_EndUserTable(
|
new_user_obj = LiteLLM_EndUserTable(
|
||||||
user_id=end_user_id, spend=response_cost, blocked=False
|
user_id=end_user_id, spend=response_cost, blocked=False
|
||||||
)
|
)
|
||||||
|
|
|
@ -195,6 +195,48 @@ def test_completion_claude_3_function_call():
|
||||||
pytest.fail(f"Error occurred: {e}")
|
pytest.fail(f"Error occurred: {e}")
|
||||||
|
|
||||||
|
|
||||||
|
def test_parse_xml_params():
|
||||||
|
from litellm.llms.prompt_templates.factory import parse_xml_params
|
||||||
|
|
||||||
|
## SCENARIO 1 ## - W/ ARRAY
|
||||||
|
xml_content = """<invoke><tool_name>return_list_of_str</tool_name>\n<parameters>\n<value>\n<item>apple</item>\n<item>banana</item>\n<item>orange</item>\n</value>\n</parameters></invoke>"""
|
||||||
|
json_schema = {
|
||||||
|
"properties": {
|
||||||
|
"value": {
|
||||||
|
"items": {"type": "string"},
|
||||||
|
"title": "Value",
|
||||||
|
"type": "array",
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"required": ["value"],
|
||||||
|
"type": "object",
|
||||||
|
}
|
||||||
|
response = parse_xml_params(xml_content=xml_content, json_schema=json_schema)
|
||||||
|
|
||||||
|
print(f"response: {response}")
|
||||||
|
assert response["value"] == ["apple", "banana", "orange"]
|
||||||
|
|
||||||
|
## SCENARIO 2 ## - W/OUT ARRAY
|
||||||
|
xml_content = """<invoke><tool_name>get_current_weather</tool_name>\n<parameters>\n<location>Boston, MA</location>\n<unit>fahrenheit</unit>\n</parameters></invoke>"""
|
||||||
|
json_schema = {
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"location": {
|
||||||
|
"type": "string",
|
||||||
|
"description": "The city and state, e.g. San Francisco, CA",
|
||||||
|
},
|
||||||
|
"unit": {"type": "string", "enum": ["celsius", "fahrenheit"]},
|
||||||
|
},
|
||||||
|
"required": ["location"],
|
||||||
|
}
|
||||||
|
|
||||||
|
response = parse_xml_params(xml_content=xml_content, json_schema=json_schema)
|
||||||
|
|
||||||
|
print(f"response: {response}")
|
||||||
|
assert response["location"] == "Boston, MA"
|
||||||
|
assert response["unit"] == "fahrenheit"
|
||||||
|
|
||||||
|
|
||||||
def test_completion_claude_3_multi_turn_conversations():
|
def test_completion_claude_3_multi_turn_conversations():
|
||||||
litellm.set_verbose = True
|
litellm.set_verbose = True
|
||||||
litellm.modify_params = True
|
litellm.modify_params = True
|
||||||
|
|
|
@ -324,7 +324,7 @@ def test_call_with_end_user_over_budget(prisma_client):
|
||||||
|
|
||||||
setattr(litellm.proxy.proxy_server, "prisma_client", prisma_client)
|
setattr(litellm.proxy.proxy_server, "prisma_client", prisma_client)
|
||||||
setattr(litellm.proxy.proxy_server, "master_key", "sk-1234")
|
setattr(litellm.proxy.proxy_server, "master_key", "sk-1234")
|
||||||
setattr(litellm, "max_user_budget", 0.00001)
|
setattr(litellm, "max_end_user_budget", 0.00001)
|
||||||
try:
|
try:
|
||||||
|
|
||||||
async def test():
|
async def test():
|
||||||
|
@ -378,7 +378,9 @@ def test_call_with_end_user_over_budget(prisma_client):
|
||||||
"user_api_key_user_id": user,
|
"user_api_key_user_id": user,
|
||||||
},
|
},
|
||||||
"proxy_server_request": {
|
"proxy_server_request": {
|
||||||
|
"body": {
|
||||||
"user": user,
|
"user": user,
|
||||||
|
}
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
"response_cost": 10,
|
"response_cost": 10,
|
||||||
|
@ -407,18 +409,20 @@ def test_call_with_proxy_over_budget(prisma_client):
|
||||||
litellm_proxy_budget_name = f"litellm-proxy-budget-{time.time()}"
|
litellm_proxy_budget_name = f"litellm-proxy-budget-{time.time()}"
|
||||||
setattr(
|
setattr(
|
||||||
litellm.proxy.proxy_server,
|
litellm.proxy.proxy_server,
|
||||||
"litellm_proxy_budget_name",
|
"litellm_proxy_admin_name",
|
||||||
litellm_proxy_budget_name,
|
litellm_proxy_budget_name,
|
||||||
)
|
)
|
||||||
|
setattr(litellm, "max_budget", 0.00001)
|
||||||
|
from litellm.proxy.proxy_server import user_api_key_cache
|
||||||
|
|
||||||
|
user_api_key_cache.set_cache(
|
||||||
|
key="{}:spend".format(litellm_proxy_budget_name), value=0
|
||||||
|
)
|
||||||
|
setattr(litellm.proxy.proxy_server, "user_api_key_cache", user_api_key_cache)
|
||||||
try:
|
try:
|
||||||
|
|
||||||
async def test():
|
async def test():
|
||||||
await litellm.proxy.proxy_server.prisma_client.connect()
|
await litellm.proxy.proxy_server.prisma_client.connect()
|
||||||
## CREATE PROXY + USER BUDGET ##
|
|
||||||
request = NewUserRequest(
|
|
||||||
max_budget=0.00001, user_id=litellm_proxy_budget_name
|
|
||||||
)
|
|
||||||
await new_user(request)
|
|
||||||
request = NewUserRequest()
|
request = NewUserRequest()
|
||||||
key = await new_user(request)
|
key = await new_user(request)
|
||||||
print(key)
|
print(key)
|
||||||
|
@ -470,6 +474,7 @@ def test_call_with_proxy_over_budget(prisma_client):
|
||||||
start_time=datetime.now(),
|
start_time=datetime.now(),
|
||||||
end_time=datetime.now(),
|
end_time=datetime.now(),
|
||||||
)
|
)
|
||||||
|
|
||||||
await asyncio.sleep(5)
|
await asyncio.sleep(5)
|
||||||
# use generated key to auth in
|
# use generated key to auth in
|
||||||
result = await user_api_key_auth(request=request, api_key=bearer_token)
|
result = await user_api_key_auth(request=request, api_key=bearer_token)
|
||||||
|
@ -571,9 +576,17 @@ def test_call_with_proxy_over_budget_stream(prisma_client):
|
||||||
litellm_proxy_budget_name = f"litellm-proxy-budget-{time.time()}"
|
litellm_proxy_budget_name = f"litellm-proxy-budget-{time.time()}"
|
||||||
setattr(
|
setattr(
|
||||||
litellm.proxy.proxy_server,
|
litellm.proxy.proxy_server,
|
||||||
"litellm_proxy_budget_name",
|
"litellm_proxy_admin_name",
|
||||||
litellm_proxy_budget_name,
|
litellm_proxy_budget_name,
|
||||||
)
|
)
|
||||||
|
setattr(litellm, "max_budget", 0.00001)
|
||||||
|
from litellm.proxy.proxy_server import user_api_key_cache
|
||||||
|
|
||||||
|
user_api_key_cache.set_cache(
|
||||||
|
key="{}:spend".format(litellm_proxy_budget_name), value=0
|
||||||
|
)
|
||||||
|
setattr(litellm.proxy.proxy_server, "user_api_key_cache", user_api_key_cache)
|
||||||
|
|
||||||
from litellm._logging import verbose_proxy_logger
|
from litellm._logging import verbose_proxy_logger
|
||||||
import logging
|
import logging
|
||||||
|
|
||||||
|
@ -584,10 +597,10 @@ def test_call_with_proxy_over_budget_stream(prisma_client):
|
||||||
async def test():
|
async def test():
|
||||||
await litellm.proxy.proxy_server.prisma_client.connect()
|
await litellm.proxy.proxy_server.prisma_client.connect()
|
||||||
## CREATE PROXY + USER BUDGET ##
|
## CREATE PROXY + USER BUDGET ##
|
||||||
request = NewUserRequest(
|
# request = NewUserRequest(
|
||||||
max_budget=0.00001, user_id=litellm_proxy_budget_name
|
# max_budget=0.00001, user_id=litellm_proxy_budget_name
|
||||||
)
|
# )
|
||||||
await new_user(request)
|
# await new_user(request)
|
||||||
request = NewUserRequest()
|
request = NewUserRequest()
|
||||||
key = await new_user(request)
|
key = await new_user(request)
|
||||||
print(key)
|
print(key)
|
||||||
|
|
|
@ -1,76 +0,0 @@
|
||||||
# #### What this tests ####
|
|
||||||
# # This tests if logging to the llmonitor integration actually works
|
|
||||||
# # Adds the parent directory to the system path
|
|
||||||
# import sys
|
|
||||||
# import os
|
|
||||||
|
|
||||||
# sys.path.insert(0, os.path.abspath("../.."))
|
|
||||||
|
|
||||||
# from litellm import completion, embedding
|
|
||||||
# import litellm
|
|
||||||
|
|
||||||
# litellm.success_callback = ["llmonitor"]
|
|
||||||
# litellm.failure_callback = ["llmonitor"]
|
|
||||||
|
|
||||||
# litellm.set_verbose = True
|
|
||||||
|
|
||||||
|
|
||||||
# def test_chat_openai():
|
|
||||||
# try:
|
|
||||||
# response = completion(
|
|
||||||
# model="gpt-3.5-turbo",
|
|
||||||
# messages=[{"role": "user", "content": "Hi 👋 - i'm openai"}],
|
|
||||||
# user="ishaan_from_litellm"
|
|
||||||
# )
|
|
||||||
|
|
||||||
# print(response)
|
|
||||||
|
|
||||||
# except Exception as e:
|
|
||||||
# print(e)
|
|
||||||
|
|
||||||
|
|
||||||
# def test_embedding_openai():
|
|
||||||
# try:
|
|
||||||
# response = embedding(model="text-embedding-ada-002", input=["test"])
|
|
||||||
# # Add any assertions here to check the response
|
|
||||||
# print(f"response: {str(response)[:50]}")
|
|
||||||
# except Exception as e:
|
|
||||||
# print(e)
|
|
||||||
|
|
||||||
|
|
||||||
# test_chat_openai()
|
|
||||||
# # test_embedding_openai()
|
|
||||||
|
|
||||||
|
|
||||||
# def test_llmonitor_logging_function_calling():
|
|
||||||
# function1 = [
|
|
||||||
# {
|
|
||||||
# "name": "get_current_weather",
|
|
||||||
# "description": "Get the current weather in a given location",
|
|
||||||
# "parameters": {
|
|
||||||
# "type": "object",
|
|
||||||
# "properties": {
|
|
||||||
# "location": {
|
|
||||||
# "type": "string",
|
|
||||||
# "description": "The city and state, e.g. San Francisco, CA",
|
|
||||||
# },
|
|
||||||
# "unit": {"type": "string", "enum": ["celsius", "fahrenheit"]},
|
|
||||||
# },
|
|
||||||
# "required": ["location"],
|
|
||||||
# },
|
|
||||||
# }
|
|
||||||
# ]
|
|
||||||
# try:
|
|
||||||
# response = completion(model="gpt-3.5-turbo",
|
|
||||||
# messages=[{
|
|
||||||
# "role": "user",
|
|
||||||
# "content": "what's the weather in boston"
|
|
||||||
# }],
|
|
||||||
# temperature=0.1,
|
|
||||||
# functions=function1,
|
|
||||||
# )
|
|
||||||
# print(response)
|
|
||||||
# except Exception as e:
|
|
||||||
# print(e)
|
|
||||||
|
|
||||||
# # test_llmonitor_logging_function_calling()
|
|
85
litellm/tests/test_lunary.py
Normal file
85
litellm/tests/test_lunary.py
Normal file
|
@ -0,0 +1,85 @@
|
||||||
|
import sys
|
||||||
|
import os
|
||||||
|
import io
|
||||||
|
|
||||||
|
sys.path.insert(0, os.path.abspath("../.."))
|
||||||
|
|
||||||
|
from litellm import completion
|
||||||
|
import litellm
|
||||||
|
|
||||||
|
litellm.failure_callback = ["lunary"]
|
||||||
|
litellm.success_callback = ["lunary"]
|
||||||
|
litellm.set_verbose = True
|
||||||
|
|
||||||
|
|
||||||
|
def test_lunary_logging():
|
||||||
|
try:
|
||||||
|
response = completion(
|
||||||
|
model="gpt-3.5-turbo",
|
||||||
|
messages=[{"role": "user", "content": "what llm are u"}],
|
||||||
|
max_tokens=10,
|
||||||
|
temperature=0.2,
|
||||||
|
user="test-user",
|
||||||
|
)
|
||||||
|
print(response)
|
||||||
|
except Exception as e:
|
||||||
|
print(e)
|
||||||
|
|
||||||
|
|
||||||
|
# test_lunary_logging()
|
||||||
|
|
||||||
|
|
||||||
|
def test_lunary_template():
|
||||||
|
import lunary
|
||||||
|
|
||||||
|
try:
|
||||||
|
template = lunary.render_template("test-template", {"question": "Hello!"})
|
||||||
|
response = completion(**template)
|
||||||
|
print(response)
|
||||||
|
except Exception as e:
|
||||||
|
print(e)
|
||||||
|
|
||||||
|
|
||||||
|
# test_lunary_template()
|
||||||
|
|
||||||
|
|
||||||
|
def test_lunary_logging_with_metadata():
|
||||||
|
try:
|
||||||
|
response = completion(
|
||||||
|
model="gpt-3.5-turbo",
|
||||||
|
messages=[{"role": "user", "content": "what llm are u"}],
|
||||||
|
max_tokens=10,
|
||||||
|
temperature=0.2,
|
||||||
|
metadata={
|
||||||
|
"run_name": "litellmRUN",
|
||||||
|
"project_name": "litellm-completion",
|
||||||
|
},
|
||||||
|
)
|
||||||
|
print(response)
|
||||||
|
except Exception as e:
|
||||||
|
print(e)
|
||||||
|
|
||||||
|
|
||||||
|
# test_lunary_logging_with_metadata()
|
||||||
|
|
||||||
|
|
||||||
|
def test_lunary_logging_with_streaming_and_metadata():
|
||||||
|
try:
|
||||||
|
response = completion(
|
||||||
|
model="gpt-3.5-turbo",
|
||||||
|
messages=[{"role": "user", "content": "what llm are u"}],
|
||||||
|
max_tokens=10,
|
||||||
|
temperature=0.2,
|
||||||
|
metadata={
|
||||||
|
"run_name": "litellmRUN",
|
||||||
|
"project_name": "litellm-completion",
|
||||||
|
},
|
||||||
|
stream=True,
|
||||||
|
)
|
||||||
|
for chunk in response:
|
||||||
|
continue
|
||||||
|
except Exception as e:
|
||||||
|
print(e)
|
||||||
|
|
||||||
|
|
||||||
|
# test_lunary_logging_with_streaming_and_metadata()
|
|
@ -59,7 +59,7 @@ from .integrations.helicone import HeliconeLogger
|
||||||
from .integrations.aispend import AISpendLogger
|
from .integrations.aispend import AISpendLogger
|
||||||
from .integrations.berrispend import BerriSpendLogger
|
from .integrations.berrispend import BerriSpendLogger
|
||||||
from .integrations.supabase import Supabase
|
from .integrations.supabase import Supabase
|
||||||
from .integrations.llmonitor import LLMonitorLogger
|
from .integrations.lunary import LunaryLogger
|
||||||
from .integrations.prompt_layer import PromptLayerLogger
|
from .integrations.prompt_layer import PromptLayerLogger
|
||||||
from .integrations.langsmith import LangsmithLogger
|
from .integrations.langsmith import LangsmithLogger
|
||||||
from .integrations.weights_biases import WeightsBiasesLogger
|
from .integrations.weights_biases import WeightsBiasesLogger
|
||||||
|
@ -129,7 +129,7 @@ dynamoLogger = None
|
||||||
s3Logger = None
|
s3Logger = None
|
||||||
genericAPILogger = None
|
genericAPILogger = None
|
||||||
clickHouseLogger = None
|
clickHouseLogger = None
|
||||||
llmonitorLogger = None
|
lunaryLogger = None
|
||||||
aispendLogger = None
|
aispendLogger = None
|
||||||
berrispendLogger = None
|
berrispendLogger = None
|
||||||
supabaseClient = None
|
supabaseClient = None
|
||||||
|
@ -882,7 +882,7 @@ class CallTypes(Enum):
|
||||||
|
|
||||||
# Logging function -> log the exact model details + what's being sent | Non-BlockingP
|
# Logging function -> log the exact model details + what's being sent | Non-BlockingP
|
||||||
class Logging:
|
class Logging:
|
||||||
global supabaseClient, liteDebuggerClient, promptLayerLogger, weightsBiasesLogger, langsmithLogger, capture_exception, add_breadcrumb, llmonitorLogger
|
global supabaseClient, liteDebuggerClient, promptLayerLogger, weightsBiasesLogger, langsmithLogger, capture_exception, add_breadcrumb, lunaryLogger
|
||||||
|
|
||||||
def __init__(
|
def __init__(
|
||||||
self,
|
self,
|
||||||
|
@ -1429,27 +1429,37 @@ class Logging:
|
||||||
end_time=end_time,
|
end_time=end_time,
|
||||||
print_verbose=print_verbose,
|
print_verbose=print_verbose,
|
||||||
)
|
)
|
||||||
if callback == "llmonitor":
|
if callback == "lunary":
|
||||||
print_verbose("reaches llmonitor for logging!")
|
print_verbose("reaches lunary for logging!")
|
||||||
model = self.model
|
model = self.model
|
||||||
|
kwargs = self.model_call_details
|
||||||
|
|
||||||
input = self.model_call_details.get(
|
input = kwargs.get(
|
||||||
"messages", self.model_call_details.get("input", None)
|
"messages", kwargs.get("input", None)
|
||||||
)
|
)
|
||||||
|
|
||||||
# if contains input, it's 'embedding', otherwise 'llm'
|
|
||||||
type = (
|
type = (
|
||||||
"embed"
|
"embed"
|
||||||
if self.call_type == CallTypes.embedding.value
|
if self.call_type == CallTypes.embedding.value
|
||||||
else "llm"
|
else "llm"
|
||||||
)
|
)
|
||||||
|
|
||||||
llmonitorLogger.log_event(
|
# this only logs streaming once, complete_streaming_response exists i.e when stream ends
|
||||||
|
if self.stream:
|
||||||
|
if "complete_streaming_response" not in kwargs:
|
||||||
|
break
|
||||||
|
else:
|
||||||
|
result = kwargs["complete_streaming_response"]
|
||||||
|
|
||||||
|
lunaryLogger.log_event(
|
||||||
type=type,
|
type=type,
|
||||||
|
kwargs=kwargs,
|
||||||
event="end",
|
event="end",
|
||||||
model=model,
|
model=model,
|
||||||
input=input,
|
input=input,
|
||||||
user_id=self.model_call_details.get("user", "default"),
|
user_id=kwargs.get("user", None),
|
||||||
|
#user_props=self.model_call_details.get("user_props", None),
|
||||||
|
extra=kwargs.get("optional_params", {}),
|
||||||
response_obj=result,
|
response_obj=result,
|
||||||
start_time=start_time,
|
start_time=start_time,
|
||||||
end_time=end_time,
|
end_time=end_time,
|
||||||
|
@ -2041,8 +2051,8 @@ class Logging:
|
||||||
call_type=self.call_type,
|
call_type=self.call_type,
|
||||||
stream=self.stream,
|
stream=self.stream,
|
||||||
)
|
)
|
||||||
elif callback == "llmonitor":
|
elif callback == "lunary":
|
||||||
print_verbose("reaches llmonitor for logging error!")
|
print_verbose("reaches lunary for logging error!")
|
||||||
|
|
||||||
model = self.model
|
model = self.model
|
||||||
|
|
||||||
|
@ -2054,7 +2064,9 @@ class Logging:
|
||||||
else "llm"
|
else "llm"
|
||||||
)
|
)
|
||||||
|
|
||||||
llmonitorLogger.log_event(
|
|
||||||
|
|
||||||
|
lunaryLogger.log_event(
|
||||||
type=_type,
|
type=_type,
|
||||||
event="error",
|
event="error",
|
||||||
user_id=self.model_call_details.get("user", "default"),
|
user_id=self.model_call_details.get("user", "default"),
|
||||||
|
@ -6166,7 +6178,9 @@ def validate_environment(model: Optional[str] = None) -> dict:
|
||||||
|
|
||||||
|
|
||||||
def set_callbacks(callback_list, function_id=None):
|
def set_callbacks(callback_list, function_id=None):
|
||||||
global sentry_sdk_instance, capture_exception, add_breadcrumb, posthog, slack_app, alerts_channel, traceloopLogger, athinaLogger, heliconeLogger, aispendLogger, berrispendLogger, supabaseClient, liteDebuggerClient, llmonitorLogger, promptLayerLogger, langFuseLogger, customLogger, weightsBiasesLogger, langsmithLogger, dynamoLogger, s3Logger, dataDogLogger, prometheusLogger
|
|
||||||
|
global sentry_sdk_instance, capture_exception, add_breadcrumb, posthog, slack_app, alerts_channel, traceloopLogger, athinaLogger, heliconeLogger, aispendLogger, berrispendLogger, supabaseClient, liteDebuggerClient, lunaryLogger, promptLayerLogger, langFuseLogger, customLogger, weightsBiasesLogger, langsmithLogger, dynamoLogger, s3Logger, dataDogLogger, prometheusLogger
|
||||||
|
|
||||||
try:
|
try:
|
||||||
for callback in callback_list:
|
for callback in callback_list:
|
||||||
print_verbose(f"callback: {callback}")
|
print_verbose(f"callback: {callback}")
|
||||||
|
@ -6226,8 +6240,8 @@ def set_callbacks(callback_list, function_id=None):
|
||||||
print_verbose("Initialized Athina Logger")
|
print_verbose("Initialized Athina Logger")
|
||||||
elif callback == "helicone":
|
elif callback == "helicone":
|
||||||
heliconeLogger = HeliconeLogger()
|
heliconeLogger = HeliconeLogger()
|
||||||
elif callback == "llmonitor":
|
elif callback == "lunary":
|
||||||
llmonitorLogger = LLMonitorLogger()
|
lunaryLogger = LunaryLogger()
|
||||||
elif callback == "promptlayer":
|
elif callback == "promptlayer":
|
||||||
promptLayerLogger = PromptLayerLogger()
|
promptLayerLogger = PromptLayerLogger()
|
||||||
elif callback == "langfuse":
|
elif callback == "langfuse":
|
||||||
|
@ -6270,7 +6284,7 @@ def set_callbacks(callback_list, function_id=None):
|
||||||
|
|
||||||
# NOTE: DEPRECATING this in favor of using failure_handler() in Logging:
|
# NOTE: DEPRECATING this in favor of using failure_handler() in Logging:
|
||||||
def handle_failure(exception, traceback_exception, start_time, end_time, args, kwargs):
|
def handle_failure(exception, traceback_exception, start_time, end_time, args, kwargs):
|
||||||
global sentry_sdk_instance, capture_exception, add_breadcrumb, posthog, slack_app, alerts_channel, aispendLogger, berrispendLogger, supabaseClient, liteDebuggerClient, llmonitorLogger
|
global sentry_sdk_instance, capture_exception, add_breadcrumb, posthog, slack_app, alerts_channel, aispendLogger, berrispendLogger, supabaseClient, liteDebuggerClient, lunaryLogger
|
||||||
try:
|
try:
|
||||||
# print_verbose(f"handle_failure args: {args}")
|
# print_verbose(f"handle_failure args: {args}")
|
||||||
# print_verbose(f"handle_failure kwargs: {kwargs}")
|
# print_verbose(f"handle_failure kwargs: {kwargs}")
|
||||||
|
|
|
@ -1503,7 +1503,7 @@
|
||||||
"litellm_provider": "bedrock",
|
"litellm_provider": "bedrock",
|
||||||
"mode": "chat"
|
"mode": "chat"
|
||||||
},
|
},
|
||||||
"mistral.mixtral-8x7b-instruct": {
|
"mistral.mixtral-8x7b-instruct-v0:1": {
|
||||||
"max_tokens": 8191,
|
"max_tokens": 8191,
|
||||||
"max_input_tokens": 32000,
|
"max_input_tokens": 32000,
|
||||||
"max_output_tokens": 8191,
|
"max_output_tokens": 8191,
|
||||||
|
@ -1512,7 +1512,7 @@
|
||||||
"litellm_provider": "bedrock",
|
"litellm_provider": "bedrock",
|
||||||
"mode": "chat"
|
"mode": "chat"
|
||||||
},
|
},
|
||||||
"bedrock/us-west-2/mistral.mixtral-8x7b-instruct": {
|
"bedrock/us-west-2/mistral.mixtral-8x7b-instruct-v0:1": {
|
||||||
"max_tokens": 8191,
|
"max_tokens": 8191,
|
||||||
"max_input_tokens": 32000,
|
"max_input_tokens": 32000,
|
||||||
"max_output_tokens": 8191,
|
"max_output_tokens": 8191,
|
||||||
|
|
|
@ -45,8 +45,8 @@ model_list:
|
||||||
|
|
||||||
litellm_settings:
|
litellm_settings:
|
||||||
drop_params: True
|
drop_params: True
|
||||||
max_budget: 100
|
# max_budget: 100
|
||||||
budget_duration: 30d
|
# budget_duration: 30d
|
||||||
num_retries: 5
|
num_retries: 5
|
||||||
request_timeout: 600
|
request_timeout: 600
|
||||||
telemetry: False
|
telemetry: False
|
||||||
|
|
|
@ -1,6 +1,6 @@
|
||||||
[tool.poetry]
|
[tool.poetry]
|
||||||
name = "litellm"
|
name = "litellm"
|
||||||
version = "1.34.14"
|
version = "1.34.17"
|
||||||
description = "Library to easily interface with LLM API providers"
|
description = "Library to easily interface with LLM API providers"
|
||||||
authors = ["BerriAI"]
|
authors = ["BerriAI"]
|
||||||
license = "MIT"
|
license = "MIT"
|
||||||
|
@ -80,7 +80,7 @@ requires = ["poetry-core", "wheel"]
|
||||||
build-backend = "poetry.core.masonry.api"
|
build-backend = "poetry.core.masonry.api"
|
||||||
|
|
||||||
[tool.commitizen]
|
[tool.commitizen]
|
||||||
version = "1.34.14"
|
version = "1.34.17"
|
||||||
version_files = [
|
version_files = [
|
||||||
"pyproject.toml:^version"
|
"pyproject.toml:^version"
|
||||||
]
|
]
|
||||||
|
|
|
@ -65,7 +65,7 @@ async def update_key(session, get_key):
|
||||||
"Authorization": f"Bearer sk-1234",
|
"Authorization": f"Bearer sk-1234",
|
||||||
"Content-Type": "application/json",
|
"Content-Type": "application/json",
|
||||||
}
|
}
|
||||||
data = {"key": get_key, "models": ["gpt-4"]}
|
data = {"key": get_key, "models": ["gpt-4"], "duration": "120s"}
|
||||||
|
|
||||||
async with session.post(url, headers=headers, json=data) as response:
|
async with session.post(url, headers=headers, json=data) as response:
|
||||||
status = response.status
|
status = response.status
|
||||||
|
|
|
@ -2,7 +2,8 @@
|
||||||
## Tests /chat/completions by generating a key and then making a chat completions request
|
## Tests /chat/completions by generating a key and then making a chat completions request
|
||||||
import pytest
|
import pytest
|
||||||
import asyncio
|
import asyncio
|
||||||
import aiohttp
|
import aiohttp, openai
|
||||||
|
from openai import OpenAI
|
||||||
|
|
||||||
|
|
||||||
async def generate_key(session):
|
async def generate_key(session):
|
||||||
|
@ -114,14 +115,14 @@ async def completion(session, key):
|
||||||
|
|
||||||
async with session.post(url, headers=headers, json=data) as response:
|
async with session.post(url, headers=headers, json=data) as response:
|
||||||
status = response.status
|
status = response.status
|
||||||
response_text = await response.text()
|
|
||||||
|
|
||||||
print(response_text)
|
|
||||||
print()
|
|
||||||
|
|
||||||
if status != 200:
|
if status != 200:
|
||||||
raise Exception(f"Request did not return a 200 status code: {status}")
|
raise Exception(f"Request did not return a 200 status code: {status}")
|
||||||
|
|
||||||
|
response = await response.json()
|
||||||
|
|
||||||
|
return response
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
async def test_completion():
|
async def test_completion():
|
||||||
|
@ -137,7 +138,17 @@ async def test_completion():
|
||||||
await completion(session=session, key=key)
|
await completion(session=session, key=key)
|
||||||
key_gen = await new_user(session=session)
|
key_gen = await new_user(session=session)
|
||||||
key_2 = key_gen["key"]
|
key_2 = key_gen["key"]
|
||||||
await completion(session=session, key=key_2)
|
# response = await completion(session=session, key=key_2)
|
||||||
|
|
||||||
|
## validate openai format ##
|
||||||
|
client = OpenAI(api_key=key_2, base_url="http://0.0.0.0:4000")
|
||||||
|
|
||||||
|
client.completions.create(
|
||||||
|
model="gpt-4",
|
||||||
|
prompt="Say this is a test",
|
||||||
|
max_tokens=7,
|
||||||
|
temperature=0,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
async def embeddings(session, key):
|
async def embeddings(session, key):
|
||||||
|
|
File diff suppressed because one or more lines are too long
|
@ -0,0 +1 @@
|
||||||
|
self.__BUILD_MANIFEST={__rewrites:{afterFiles:[],beforeFiles:[],fallback:[]},"/_error":["static/chunks/pages/_error-d6107f1aac0c574c.js"],sortedPages:["/_app","/_error"]},self.__BUILD_MANIFEST_CB&&self.__BUILD_MANIFEST_CB();
|
|
@ -0,0 +1 @@
|
||||||
|
self.__SSG_MANIFEST=new Set([]);self.__SSG_MANIFEST_CB&&self.__SSG_MANIFEST_CB()
|
File diff suppressed because one or more lines are too long
|
@ -1 +0,0 @@
|
||||||
<!DOCTYPE html><html id="__next_error__"><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width, initial-scale=1"/><link rel="preload" as="script" fetchPriority="low" href="/ui/_next/static/chunks/webpack-d1ad37b1875df240.js" crossorigin=""/><script src="/ui/_next/static/chunks/fd9d1056-a507ee9e75a3be72.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/69-589b47e7a69d316f.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/main-app-9b4fb13a7db53edf.js" async="" crossorigin=""></script><title>🚅 LiteLLM</title><meta name="description" content="LiteLLM Proxy Admin UI"/><link rel="icon" href="/ui/favicon.ico" type="image/x-icon" sizes="16x16"/><meta name="next-size-adjust"/><script src="/ui/_next/static/chunks/polyfills-c67a75d1b6f99dc8.js" crossorigin="" noModule=""></script></head><body><script src="/ui/_next/static/chunks/webpack-d1ad37b1875df240.js" crossorigin="" async=""></script><script>(self.__next_f=self.__next_f||[]).push([0]);self.__next_f.push([2,null])</script><script>self.__next_f.push([1,"1:HL[\"/ui/_next/static/media/c9a5bc6a7c948fb0-s.p.woff2\",\"font\",{\"crossOrigin\":\"\",\"type\":\"font/woff2\"}]\n2:HL[\"/ui/_next/static/css/f8da5a6a5b29d249.css\",\"style\",{\"crossOrigin\":\"\"}]\n0:\"$L3\"\n"])</script><script>self.__next_f.push([1,"4:I[47690,[],\"\"]\n6:I[77831,[],\"\"]\n7:I[90177,[\"798\",\"static/chunks/798-4baed68da0c5497d.js\",\"931\",\"static/chunks/app/page-37392d6753f8a3d0.js\"],\"\"]\n8:I[5613,[],\"\"]\n9:I[31778,[],\"\"]\nb:I[48955,[],\"\"]\nc:[]\n"])</script><script>self.__next_f.push([1,"3:[[[\"$\",\"link\",\"0\",{\"rel\":\"stylesheet\",\"href\":\"/ui/_next/static/css/f8da5a6a5b29d249.css\",\"precedence\":\"next\",\"crossOrigin\":\"\"}]],[\"$\",\"$L4\",null,{\"buildId\":\"L9N6TOWJaqSp22Vj96YE4\",\"assetPrefix\":\"/ui\",\"initialCanonicalUrl\":\"/\",\"initialTree\":[\"\",{\"children\":[\"__PAGE__\",{}]},\"$undefined\",\"$undefined\",true],\"initialSeedData\":[\"\",{\"children\":[\"__PAGE__\",{},[\"$L5\",[\"$\",\"$L6\",null,{\"propsForComponent\":{\"params\":{}},\"Component\":\"$7\",\"isStaticGeneration\":true}],null]]},[null,[\"$\",\"html\",null,{\"lang\":\"en\",\"children\":[\"$\",\"body\",null,{\"className\":\"__className_c23dc8\",\"children\":[\"$\",\"$L8\",null,{\"parallelRouterKey\":\"children\",\"segmentPath\":[\"children\"],\"loading\":\"$undefined\",\"loadingStyles\":\"$undefined\",\"loadingScripts\":\"$undefined\",\"hasLoading\":false,\"error\":\"$undefined\",\"errorStyles\":\"$undefined\",\"errorScripts\":\"$undefined\",\"template\":[\"$\",\"$L9\",null,{}],\"templateStyles\":\"$undefined\",\"templateScripts\":\"$undefined\",\"notFound\":[[\"$\",\"title\",null,{\"children\":\"404: This page could not be found.\"}],[\"$\",\"div\",null,{\"style\":{\"fontFamily\":\"system-ui,\\\"Segoe UI\\\",Roboto,Helvetica,Arial,sans-serif,\\\"Apple Color Emoji\\\",\\\"Segoe UI Emoji\\\"\",\"height\":\"100vh\",\"textAlign\":\"center\",\"display\":\"flex\",\"flexDirection\":\"column\",\"alignItems\":\"center\",\"justifyContent\":\"center\"},\"children\":[\"$\",\"div\",null,{\"children\":[[\"$\",\"style\",null,{\"dangerouslySetInnerHTML\":{\"__html\":\"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}\"}}],[\"$\",\"h1\",null,{\"className\":\"next-error-h1\",\"style\":{\"display\":\"inline-block\",\"margin\":\"0 20px 0 0\",\"padding\":\"0 23px 0 0\",\"fontSize\":24,\"fontWeight\":500,\"verticalAlign\":\"top\",\"lineHeight\":\"49px\"},\"children\":\"404\"}],[\"$\",\"div\",null,{\"style\":{\"display\":\"inline-block\"},\"children\":[\"$\",\"h2\",null,{\"style\":{\"fontSize\":14,\"fontWeight\":400,\"lineHeight\":\"49px\",\"margin\":0},\"children\":\"This page could not be found.\"}]}]]}]}]],\"notFoundStyles\":[],\"styles\":null}]}]}],null]],\"initialHead\":[false,\"$La\"],\"globalErrorComponent\":\"$b\",\"missingSlots\":\"$Wc\"}]]\n"])</script><script>self.__next_f.push([1,"a:[[\"$\",\"meta\",\"0\",{\"name\":\"viewport\",\"content\":\"width=device-width, initial-scale=1\"}],[\"$\",\"meta\",\"1\",{\"charSet\":\"utf-8\"}],[\"$\",\"title\",\"2\",{\"children\":\"🚅 LiteLLM\"}],[\"$\",\"meta\",\"3\",{\"name\":\"description\",\"content\":\"LiteLLM Proxy Admin UI\"}],[\"$\",\"link\",\"4\",{\"rel\":\"icon\",\"href\":\"/ui/favicon.ico\",\"type\":\"image/x-icon\",\"sizes\":\"16x16\"}],[\"$\",\"meta\",\"5\",{\"name\":\"next-size-adjust\"}]]\n5:null\n"])</script><script>self.__next_f.push([1,""])</script></body></html>
|
|
|
@ -1,7 +0,0 @@
|
||||||
2:I[77831,[],""]
|
|
||||||
3:I[90177,["798","static/chunks/798-4baed68da0c5497d.js","931","static/chunks/app/page-37392d6753f8a3d0.js"],""]
|
|
||||||
4:I[5613,[],""]
|
|
||||||
5:I[31778,[],""]
|
|
||||||
0:["L9N6TOWJaqSp22Vj96YE4",[[["",{"children":["__PAGE__",{}]},"$undefined","$undefined",true],["",{"children":["__PAGE__",{},["$L1",["$","$L2",null,{"propsForComponent":{"params":{}},"Component":"$3","isStaticGeneration":true}],null]]},[null,["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_c23dc8","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[],"styles":null}]}]}],null]],[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/f8da5a6a5b29d249.css","precedence":"next","crossOrigin":""}]],"$L6"]]]]
|
|
||||||
6:[["$","meta","0",{"name":"viewport","content":"width=device-width, initial-scale=1"}],["$","meta","1",{"charSet":"utf-8"}],["$","title","2",{"children":"🚅 LiteLLM"}],["$","meta","3",{"name":"description","content":"LiteLLM Proxy Admin UI"}],["$","link","4",{"rel":"icon","href":"/ui/favicon.ico","type":"image/x-icon","sizes":"16x16"}],["$","meta","5",{"name":"next-size-adjust"}]]
|
|
||||||
1:null
|
|
|
@ -91,18 +91,19 @@ const CreateKey: React.FC<CreateKeyProps> = ({
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
const sendSlackAlert = async () => {
|
|
||||||
try {
|
const handleModelSelection = (selectedModels: string[]) => {
|
||||||
console.log("Sending Slack alert...");
|
if (selectedModels.includes("all_models")) {
|
||||||
const response = await slackBudgetAlertsHealthCheck(accessToken);
|
// Select all models except "All Models"
|
||||||
console.log("slackBudgetAlertsHealthCheck Response:", response);
|
const allModelsExceptAll = team ? team.models : userModels;
|
||||||
console.log("Testing Slack alert successful");
|
form.setFieldsValue({
|
||||||
} catch (error) {
|
models: allModelsExceptAll
|
||||||
console.error("Error sending Slack alert:", error);
|
});
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
const handleCopy = () => {
|
const handleCopy = () => {
|
||||||
message.success('API Key copied to clipboard');
|
message.success('API Key copied to clipboard');
|
||||||
};
|
};
|
||||||
|
@ -153,8 +154,11 @@ const CreateKey: React.FC<CreateKeyProps> = ({
|
||||||
mode="multiple"
|
mode="multiple"
|
||||||
placeholder="Select models"
|
placeholder="Select models"
|
||||||
style={{ width: "100%" }}
|
style={{ width: "100%" }}
|
||||||
|
onChange={(selectedModels) => handleModelSelection(selectedModels)}
|
||||||
>
|
>
|
||||||
|
<Option key="all_models" value="all_models">
|
||||||
|
All Models
|
||||||
|
</Option>
|
||||||
{team && team.models ? (
|
{team && team.models ? (
|
||||||
team.models.map((model: string) => (
|
team.models.map((model: string) => (
|
||||||
<Option key={model} value={model}>
|
<Option key={model} value={model}>
|
||||||
|
|
|
@ -216,6 +216,17 @@ const handleEditSubmit = async (formValues: Record<string, any>) => {
|
||||||
setIsDeleteModalOpen(true);
|
setIsDeleteModalOpen(true);
|
||||||
};
|
};
|
||||||
|
|
||||||
|
const handleModelSelection = (selectedModels: string[]) => {
|
||||||
|
if (selectedModels.includes("all_models")) {
|
||||||
|
// Select all models except "All Models"
|
||||||
|
const allModelsExceptAll = userModels.filter(model => model !== "all");
|
||||||
|
form.setFieldsValue({
|
||||||
|
models: allModelsExceptAll
|
||||||
|
});
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
const confirmDelete = async () => {
|
const confirmDelete = async () => {
|
||||||
if (teamToDelete == null || teams == null || accessToken == null) {
|
if (teamToDelete == null || teams == null || accessToken == null) {
|
||||||
return;
|
return;
|
||||||
|
@ -473,7 +484,11 @@ const handleEditSubmit = async (formValues: Record<string, any>) => {
|
||||||
mode="multiple"
|
mode="multiple"
|
||||||
placeholder="Select models"
|
placeholder="Select models"
|
||||||
style={{ width: "100%" }}
|
style={{ width: "100%" }}
|
||||||
|
onChange={(selectedModels) => handleModelSelection(selectedModels)}
|
||||||
>
|
>
|
||||||
|
<Select2.Option key="all_models" value="all_models">
|
||||||
|
All Models
|
||||||
|
</Select2.Option>
|
||||||
{userModels.map((model) => (
|
{userModels.map((model) => (
|
||||||
<Select2.Option key={model} value={model}>
|
<Select2.Option key={model} value={model}>
|
||||||
{model}
|
{model}
|
||||||
|
@ -481,6 +496,7 @@ const handleEditSubmit = async (formValues: Record<string, any>) => {
|
||||||
))}
|
))}
|
||||||
</Select2>
|
</Select2>
|
||||||
</Form.Item>
|
</Form.Item>
|
||||||
|
|
||||||
<Form.Item label="Max Budget (USD)" name="max_budget">
|
<Form.Item label="Max Budget (USD)" name="max_budget">
|
||||||
<InputNumber step={0.01} precision={2} width={200} />
|
<InputNumber step={0.01} precision={2} width={200} />
|
||||||
</Form.Item>
|
</Form.Item>
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue