forked from phoenix/litellm-mirror
feat: LLMonitor is now Lunary
This commit is contained in:
parent
f90533a3b3
commit
a9648613dc
15 changed files with 633 additions and 548 deletions
33
README.md
33
README.md
|
@ -25,27 +25,26 @@
|
||||||
</h4>
|
</h4>
|
||||||
|
|
||||||
LiteLLM manages:
|
LiteLLM manages:
|
||||||
|
|
||||||
- Translate inputs to provider's `completion`, `embedding`, and `image_generation` endpoints
|
- Translate inputs to provider's `completion`, `embedding`, and `image_generation` endpoints
|
||||||
- [Consistent output](https://docs.litellm.ai/docs/completion/output), text responses will always be available at `['choices'][0]['message']['content']`
|
- [Consistent output](https://docs.litellm.ai/docs/completion/output), text responses will always be available at `['choices'][0]['message']['content']`
|
||||||
- Retry/fallback logic across multiple deployments (e.g. Azure/OpenAI) - [Router](https://docs.litellm.ai/docs/routing)
|
- Retry/fallback logic across multiple deployments (e.g. Azure/OpenAI) - [Router](https://docs.litellm.ai/docs/routing)
|
||||||
- Set Budgets & Rate limits per project, api key, model [OpenAI Proxy Server](https://docs.litellm.ai/docs/simple_proxy)
|
- Set Budgets & Rate limits per project, api key, model [OpenAI Proxy Server](https://docs.litellm.ai/docs/simple_proxy)
|
||||||
|
|
||||||
|
|
||||||
[**Jump to OpenAI Proxy Docs**](https://github.com/BerriAI/litellm?tab=readme-ov-file#openai-proxy---docs) <br>
|
[**Jump to OpenAI Proxy Docs**](https://github.com/BerriAI/litellm?tab=readme-ov-file#openai-proxy---docs) <br>
|
||||||
[**Jump to Supported LLM Providers**](https://github.com/BerriAI/litellm?tab=readme-ov-file#supported-provider-docs)
|
[**Jump to Supported LLM Providers**](https://github.com/BerriAI/litellm?tab=readme-ov-file#supported-provider-docs)
|
||||||
|
|
||||||
Support for more providers. Missing a provider or LLM Platform, raise a [feature request](https://github.com/BerriAI/litellm/issues/new?assignees=&labels=enhancement&projects=&template=feature_request.yml&title=%5BFeature%5D%3A+).
|
Support for more providers. Missing a provider or LLM Platform, raise a [feature request](https://github.com/BerriAI/litellm/issues/new?assignees=&labels=enhancement&projects=&template=feature_request.yml&title=%5BFeature%5D%3A+).
|
||||||
|
|
||||||
# Usage ([**Docs**](https://docs.litellm.ai/docs/))
|
# Usage ([**Docs**](https://docs.litellm.ai/docs/))
|
||||||
|
|
||||||
> [!IMPORTANT]
|
> [!IMPORTANT]
|
||||||
> LiteLLM v1.0.0 now requires `openai>=1.0.0`. Migration guide [here](https://docs.litellm.ai/docs/migration)
|
> LiteLLM v1.0.0 now requires `openai>=1.0.0`. Migration guide [here](https://docs.litellm.ai/docs/migration)
|
||||||
|
|
||||||
|
|
||||||
<a target="_blank" href="https://colab.research.google.com/github/BerriAI/litellm/blob/main/cookbook/liteLLM_Getting_Started.ipynb">
|
<a target="_blank" href="https://colab.research.google.com/github/BerriAI/litellm/blob/main/cookbook/liteLLM_Getting_Started.ipynb">
|
||||||
<img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/>
|
<img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/>
|
||||||
</a>
|
</a>
|
||||||
|
|
||||||
|
|
||||||
```shell
|
```shell
|
||||||
pip install litellm
|
pip install litellm
|
||||||
```
|
```
|
||||||
|
@ -87,8 +86,10 @@ print(response)
|
||||||
```
|
```
|
||||||
|
|
||||||
## Streaming ([Docs](https://docs.litellm.ai/docs/completion/stream))
|
## Streaming ([Docs](https://docs.litellm.ai/docs/completion/stream))
|
||||||
|
|
||||||
liteLLM supports streaming the model response back, pass `stream=True` to get a streaming iterator in response.
|
liteLLM supports streaming the model response back, pass `stream=True` to get a streaming iterator in response.
|
||||||
Streaming is supported for all models (Bedrock, Huggingface, TogetherAI, Azure, OpenAI, etc.)
|
Streaming is supported for all models (Bedrock, Huggingface, TogetherAI, Azure, OpenAI, etc.)
|
||||||
|
|
||||||
```python
|
```python
|
||||||
from litellm import completion
|
from litellm import completion
|
||||||
response = completion(model="gpt-3.5-turbo", messages=messages, stream=True)
|
response = completion(model="gpt-3.5-turbo", messages=messages, stream=True)
|
||||||
|
@ -102,20 +103,22 @@ for part in response:
|
||||||
```
|
```
|
||||||
|
|
||||||
## Logging Observability ([Docs](https://docs.litellm.ai/docs/observability/callbacks))
|
## Logging Observability ([Docs](https://docs.litellm.ai/docs/observability/callbacks))
|
||||||
LiteLLM exposes pre defined callbacks to send data to Langfuse, DynamoDB, s3 Buckets, LLMonitor, Helicone, Promptlayer, Traceloop, Athina, Slack
|
|
||||||
|
LiteLLM exposes pre defined callbacks to send data to Lunary, Langfuse, DynamoDB, s3 Buckets, Helicone, Promptlayer, Traceloop, Athina, Slack
|
||||||
|
|
||||||
```python
|
```python
|
||||||
from litellm import completion
|
from litellm import completion
|
||||||
|
|
||||||
## set env variables for logging tools
|
## set env variables for logging tools
|
||||||
os.environ["LANGFUSE_PUBLIC_KEY"] = ""
|
os.environ["LANGFUSE_PUBLIC_KEY"] = ""
|
||||||
os.environ["LANGFUSE_SECRET_KEY"] = ""
|
os.environ["LANGFUSE_SECRET_KEY"] = ""
|
||||||
os.environ["LLMONITOR_APP_ID"] = "your-llmonitor-app-id"
|
os.environ["LUNARY_PUBLIC_KEY"] = "your-lunary-public-key"
|
||||||
os.environ["ATHINA_API_KEY"] = "your-athina-api-key"
|
os.environ["ATHINA_API_KEY"] = "your-athina-api-key"
|
||||||
|
|
||||||
os.environ["OPENAI_API_KEY"]
|
os.environ["OPENAI_API_KEY"]
|
||||||
|
|
||||||
# set callbacks
|
# set callbacks
|
||||||
litellm.success_callback = ["langfuse", "llmonitor", "athina"] # log input/output to langfuse, llmonitor, supabase, athina etc
|
litellm.success_callback = ["langfuse", "lunary", "athina"] # log input/output to langfuse, lunary, supabase, athina etc
|
||||||
|
|
||||||
#openai call
|
#openai call
|
||||||
response = completion(model="gpt-3.5-turbo", messages=[{"role": "user", "content": "Hi 👋 - i'm openai"}])
|
response = completion(model="gpt-3.5-turbo", messages=[{"role": "user", "content": "Hi 👋 - i'm openai"}])
|
||||||
|
@ -126,6 +129,7 @@ response = completion(model="gpt-3.5-turbo", messages=[{"role": "user", "content
|
||||||
Set Budgets & Rate limits across multiple projects
|
Set Budgets & Rate limits across multiple projects
|
||||||
|
|
||||||
The proxy provides:
|
The proxy provides:
|
||||||
|
|
||||||
1. [Hooks for auth](https://docs.litellm.ai/docs/proxy/virtual_keys#custom-auth)
|
1. [Hooks for auth](https://docs.litellm.ai/docs/proxy/virtual_keys#custom-auth)
|
||||||
2. [Hooks for logging](https://docs.litellm.ai/docs/proxy/logging#step-1---create-your-custom-litellm-callback-class)
|
2. [Hooks for logging](https://docs.litellm.ai/docs/proxy/logging#step-1---create-your-custom-litellm-callback-class)
|
||||||
3. [Cost tracking](https://docs.litellm.ai/docs/proxy/virtual_keys#tracking-spend)
|
3. [Cost tracking](https://docs.litellm.ai/docs/proxy/virtual_keys#tracking-spend)
|
||||||
|
@ -140,6 +144,7 @@ pip install 'litellm[proxy]'
|
||||||
```
|
```
|
||||||
|
|
||||||
### Step 1: Start litellm proxy
|
### Step 1: Start litellm proxy
|
||||||
|
|
||||||
```shell
|
```shell
|
||||||
$ litellm --model huggingface/bigcode/starcoder
|
$ litellm --model huggingface/bigcode/starcoder
|
||||||
|
|
||||||
|
@ -147,6 +152,7 @@ $ litellm --model huggingface/bigcode/starcoder
|
||||||
```
|
```
|
||||||
|
|
||||||
### Step 2: Make ChatCompletions Request to Proxy
|
### Step 2: Make ChatCompletions Request to Proxy
|
||||||
|
|
||||||
```python
|
```python
|
||||||
import openai # openai v1.0.0+
|
import openai # openai v1.0.0+
|
||||||
client = openai.OpenAI(api_key="anything",base_url="http://0.0.0.0:8000") # set proxy to base_url
|
client = openai.OpenAI(api_key="anything",base_url="http://0.0.0.0:8000") # set proxy to base_url
|
||||||
|
@ -162,6 +168,7 @@ print(response)
|
||||||
```
|
```
|
||||||
|
|
||||||
## Proxy Key Management ([Docs](https://docs.litellm.ai/docs/proxy/virtual_keys))
|
## Proxy Key Management ([Docs](https://docs.litellm.ai/docs/proxy/virtual_keys))
|
||||||
|
|
||||||
UI on `/ui` on your proxy server
|
UI on `/ui` on your proxy server
|
||||||

|

|
||||||
|
|
||||||
|
@ -169,6 +176,7 @@ Set budgets and rate limits across multiple projects
|
||||||
`POST /key/generate`
|
`POST /key/generate`
|
||||||
|
|
||||||
### Request
|
### Request
|
||||||
|
|
||||||
```shell
|
```shell
|
||||||
curl 'http://0.0.0.0:8000/key/generate' \
|
curl 'http://0.0.0.0:8000/key/generate' \
|
||||||
--header 'Authorization: Bearer sk-1234' \
|
--header 'Authorization: Bearer sk-1234' \
|
||||||
|
@ -177,6 +185,7 @@ curl 'http://0.0.0.0:8000/key/generate' \
|
||||||
```
|
```
|
||||||
|
|
||||||
### Expected Response
|
### Expected Response
|
||||||
|
|
||||||
```shell
|
```shell
|
||||||
{
|
{
|
||||||
"key": "sk-kdEXbIqZRwEeEiHwdg7sFA", # Bearer token
|
"key": "sk-kdEXbIqZRwEeEiHwdg7sFA", # Bearer token
|
||||||
|
@ -185,8 +194,9 @@ curl 'http://0.0.0.0:8000/key/generate' \
|
||||||
```
|
```
|
||||||
|
|
||||||
## Supported Providers ([Docs](https://docs.litellm.ai/docs/providers))
|
## Supported Providers ([Docs](https://docs.litellm.ai/docs/providers))
|
||||||
|
|
||||||
| Provider | [Completion](https://docs.litellm.ai/docs/#basic-usage) | [Streaming](https://docs.litellm.ai/docs/completion/stream#streaming-responses) | [Async Completion](https://docs.litellm.ai/docs/completion/stream#async-completion) | [Async Streaming](https://docs.litellm.ai/docs/completion/stream#async-streaming) | [Async Embedding](https://docs.litellm.ai/docs/embedding/supported_embedding) | [Async Image Generation](https://docs.litellm.ai/docs/image_generation) |
|
| Provider | [Completion](https://docs.litellm.ai/docs/#basic-usage) | [Streaming](https://docs.litellm.ai/docs/completion/stream#streaming-responses) | [Async Completion](https://docs.litellm.ai/docs/completion/stream#async-completion) | [Async Streaming](https://docs.litellm.ai/docs/completion/stream#async-streaming) | [Async Embedding](https://docs.litellm.ai/docs/embedding/supported_embedding) | [Async Image Generation](https://docs.litellm.ai/docs/image_generation) |
|
||||||
| ------------- | ------------- | ------------- | ------------- | ------------- | ------------- | ------------- |
|
| ----------------------------------------------------------------------------------- | ------------------------------------------------------- | ------------------------------------------------------------------------------- | ----------------------------------------------------------------------------------- | --------------------------------------------------------------------------------- | ----------------------------------------------------------------------------- | ----------------------------------------------------------------------- |
|
||||||
| [openai](https://docs.litellm.ai/docs/providers/openai) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ |
|
| [openai](https://docs.litellm.ai/docs/providers/openai) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ |
|
||||||
| [azure](https://docs.litellm.ai/docs/providers/azure) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ |
|
| [azure](https://docs.litellm.ai/docs/providers/azure) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ |
|
||||||
| [aws - sagemaker](https://docs.litellm.ai/docs/providers/aws_sagemaker) | ✅ | ✅ | ✅ | ✅ | ✅ |
|
| [aws - sagemaker](https://docs.litellm.ai/docs/providers/aws_sagemaker) | ✅ | ✅ | ✅ | ✅ | ✅ |
|
||||||
|
@ -216,25 +226,28 @@ curl 'http://0.0.0.0:8000/key/generate' \
|
||||||
| [voyage ai](https://docs.litellm.ai/docs/providers/voyage) | | | | | ✅ |
|
| [voyage ai](https://docs.litellm.ai/docs/providers/voyage) | | | | | ✅ |
|
||||||
| [xinference [Xorbits Inference]](https://docs.litellm.ai/docs/providers/xinference) | | | | | ✅ |
|
| [xinference [Xorbits Inference]](https://docs.litellm.ai/docs/providers/xinference) | | | | | ✅ |
|
||||||
|
|
||||||
|
|
||||||
[**Read the Docs**](https://docs.litellm.ai/docs/)
|
[**Read the Docs**](https://docs.litellm.ai/docs/)
|
||||||
|
|
||||||
## Contributing
|
## Contributing
|
||||||
|
|
||||||
To contribute: Clone the repo locally -> Make a change -> Submit a PR with the change.
|
To contribute: Clone the repo locally -> Make a change -> Submit a PR with the change.
|
||||||
|
|
||||||
Here's how to modify the repo locally:
|
Here's how to modify the repo locally:
|
||||||
Step 1: Clone the repo
|
Step 1: Clone the repo
|
||||||
|
|
||||||
```
|
```
|
||||||
git clone https://github.com/BerriAI/litellm.git
|
git clone https://github.com/BerriAI/litellm.git
|
||||||
```
|
```
|
||||||
|
|
||||||
Step 2: Navigate into the project, and install dependencies:
|
Step 2: Navigate into the project, and install dependencies:
|
||||||
|
|
||||||
```
|
```
|
||||||
cd litellm
|
cd litellm
|
||||||
poetry install
|
poetry install
|
||||||
```
|
```
|
||||||
|
|
||||||
Step 3: Test your change:
|
Step 3: Test your change:
|
||||||
|
|
||||||
```
|
```
|
||||||
cd litellm/tests # pwd: Documents/litellm/litellm/tests
|
cd litellm/tests # pwd: Documents/litellm/litellm/tests
|
||||||
poetry run flake8
|
poetry run flake8
|
||||||
|
@ -242,16 +255,19 @@ poetry run pytest .
|
||||||
```
|
```
|
||||||
|
|
||||||
Step 4: Submit a PR with your changes! 🚀
|
Step 4: Submit a PR with your changes! 🚀
|
||||||
|
|
||||||
- push your fork to your GitHub repo
|
- push your fork to your GitHub repo
|
||||||
- submit a PR from there
|
- submit a PR from there
|
||||||
|
|
||||||
# Support / talk with founders
|
# Support / talk with founders
|
||||||
|
|
||||||
- [Schedule Demo 👋](https://calendly.com/d/4mp-gd3-k5k/berriai-1-1-onboarding-litellm-hosted-version)
|
- [Schedule Demo 👋](https://calendly.com/d/4mp-gd3-k5k/berriai-1-1-onboarding-litellm-hosted-version)
|
||||||
- [Community Discord 💭](https://discord.gg/wuPM9dRgDw)
|
- [Community Discord 💭](https://discord.gg/wuPM9dRgDw)
|
||||||
- Our numbers 📞 +1 (770) 8783-106 / +1 (412) 618-6238
|
- Our numbers 📞 +1 (770) 8783-106 / +1 (412) 618-6238
|
||||||
- Our emails ✉️ ishaan@berri.ai / krrish@berri.ai
|
- Our emails ✉️ ishaan@berri.ai / krrish@berri.ai
|
||||||
|
|
||||||
# Why did we build this
|
# Why did we build this
|
||||||
|
|
||||||
- **Need for simplicity**: Our code started to get extremely complicated managing & translating calls between Azure, OpenAI and Cohere.
|
- **Need for simplicity**: Our code started to get extremely complicated managing & translating calls between Azure, OpenAI and Cohere.
|
||||||
|
|
||||||
# Contributors
|
# Contributors
|
||||||
|
@ -268,4 +284,3 @@ Step 4: Submit a PR with your changes! 🚀
|
||||||
<a href="https://github.com/BerriAI/litellm/graphs/contributors">
|
<a href="https://github.com/BerriAI/litellm/graphs/contributors">
|
||||||
<img src="https://contrib.rocks/image?repo=BerriAI/litellm" />
|
<img src="https://contrib.rocks/image?repo=BerriAI/litellm" />
|
||||||
</a>
|
</a>
|
||||||
|
|
||||||
|
|
|
@ -33,7 +33,7 @@
|
||||||
- Call all models using the OpenAI format - `completion(model, messages)`
|
- Call all models using the OpenAI format - `completion(model, messages)`
|
||||||
- Text responses will always be available at `['choices'][0]['message']['content']`
|
- Text responses will always be available at `['choices'][0]['message']['content']`
|
||||||
- **Error Handling** Using Model Fallbacks (if `GPT-4` fails, try `llama2`)
|
- **Error Handling** Using Model Fallbacks (if `GPT-4` fails, try `llama2`)
|
||||||
- **Logging** - Log Requests, Responses and Errors to `Supabase`, `Posthog`, `Mixpanel`, `Sentry`, `LLMonitor`,`Athina`, `Helicone` (Any of the supported providers here: https://litellm.readthedocs.io/en/latest/advanced/
|
- **Logging** - Log Requests, Responses and Errors to `Supabase`, `Posthog`, `Mixpanel`, `Sentry`, `Lunary`,`Athina`, `Helicone` (Any of the supported providers here: https://litellm.readthedocs.io/en/latest/advanced/
|
||||||
|
|
||||||
**Example: Logs sent to Supabase**
|
**Example: Logs sent to Supabase**
|
||||||
<img width="1015" alt="Screenshot 2023-08-11 at 4 02 46 PM" src="https://github.com/ishaan-jaff/proxy-server/assets/29436595/237557b8-ba09-4917-982c-8f3e1b2c8d08">
|
<img width="1015" alt="Screenshot 2023-08-11 at 4 02 46 PM" src="https://github.com/ishaan-jaff/proxy-server/assets/29436595/237557b8-ba09-4917-982c-8f3e1b2c8d08">
|
||||||
|
|
|
@ -30,13 +30,15 @@ Email us @ krrish@berri.ai
|
||||||
Next Steps 👉 [Call all supported models - e.g. Claude-2, Llama2-70b, etc.](./proxy_api.md#supported-models)
|
Next Steps 👉 [Call all supported models - e.g. Claude-2, Llama2-70b, etc.](./proxy_api.md#supported-models)
|
||||||
|
|
||||||
More details 👉
|
More details 👉
|
||||||
* [Completion() function details](./completion/)
|
|
||||||
* [All supported models / providers on LiteLLM](./providers/)
|
- [Completion() function details](./completion/)
|
||||||
* [Build your own OpenAI proxy](https://github.com/BerriAI/liteLLM-proxy/tree/main)
|
- [All supported models / providers on LiteLLM](./providers/)
|
||||||
|
- [Build your own OpenAI proxy](https://github.com/BerriAI/liteLLM-proxy/tree/main)
|
||||||
|
|
||||||
## streaming
|
## streaming
|
||||||
|
|
||||||
Same example from before. Just pass in `stream=True` in the completion args.
|
Same example from before. Just pass in `stream=True` in the completion args.
|
||||||
|
|
||||||
```python
|
```python
|
||||||
from litellm import completion
|
from litellm import completion
|
||||||
|
|
||||||
|
@ -56,8 +58,9 @@ print(response)
|
||||||
```
|
```
|
||||||
|
|
||||||
More details 👉
|
More details 👉
|
||||||
* [streaming + async](./completion/stream.md)
|
|
||||||
* [tutorial for streaming Llama2 on TogetherAI](./tutorials/TogetherAI_liteLLM.md)
|
- [streaming + async](./completion/stream.md)
|
||||||
|
- [tutorial for streaming Llama2 on TogetherAI](./tutorials/TogetherAI_liteLLM.md)
|
||||||
|
|
||||||
## exception handling
|
## exception handling
|
||||||
|
|
||||||
|
@ -76,25 +79,28 @@ except OpenAIError as e:
|
||||||
```
|
```
|
||||||
|
|
||||||
## Logging Observability - Log LLM Input/Output ([Docs](https://docs.litellm.ai/docs/observability/callbacks))
|
## Logging Observability - Log LLM Input/Output ([Docs](https://docs.litellm.ai/docs/observability/callbacks))
|
||||||
LiteLLM exposes pre defined callbacks to send data to Langfuse, LLMonitor, Helicone, Promptlayer, Traceloop, Slack
|
|
||||||
|
LiteLLM exposes pre defined callbacks to send data to Lunary, Langfuse, Helicone, Promptlayer, Traceloop, Slack
|
||||||
|
|
||||||
```python
|
```python
|
||||||
from litellm import completion
|
from litellm import completion
|
||||||
|
|
||||||
## set env variables for logging tools
|
## set env variables for logging tools
|
||||||
|
os.environ["LUNARY_PUBLIC_KEY"] = "your-lunary-public-key"
|
||||||
os.environ["LANGFUSE_PUBLIC_KEY"] = ""
|
os.environ["LANGFUSE_PUBLIC_KEY"] = ""
|
||||||
os.environ["LANGFUSE_SECRET_KEY"] = ""
|
os.environ["LANGFUSE_SECRET_KEY"] = ""
|
||||||
os.environ["LLMONITOR_APP_ID"] = "your-llmonitor-app-id"
|
|
||||||
|
|
||||||
os.environ["OPENAI_API_KEY"]
|
os.environ["OPENAI_API_KEY"]
|
||||||
|
|
||||||
# set callbacks
|
# set callbacks
|
||||||
litellm.success_callback = ["langfuse", "llmonitor"] # log input/output to langfuse, llmonitor, supabase
|
litellm.success_callback = ["lunary", "langfuse"] # log input/output to langfuse, lunary, supabase
|
||||||
|
|
||||||
#openai call
|
#openai call
|
||||||
response = completion(model="gpt-3.5-turbo", messages=[{"role": "user", "content": "Hi 👋 - i'm openai"}])
|
response = completion(model="gpt-3.5-turbo", messages=[{"role": "user", "content": "Hi 👋 - i'm openai"}])
|
||||||
```
|
```
|
||||||
|
|
||||||
More details 👉
|
More details 👉
|
||||||
* [exception mapping](./exception_mapping.md)
|
|
||||||
* [retries + model fallbacks for completion()](./completion/reliable_completions.md)
|
- [exception mapping](./exception_mapping.md)
|
||||||
* [tutorial for model fallbacks with completion()](./tutorials/fallbacks.md)
|
- [retries + model fallbacks for completion()](./completion/reliable_completions.md)
|
||||||
|
- [tutorial for model fallbacks with completion()](./tutorials/fallbacks.md)
|
||||||
|
|
|
@ -5,7 +5,6 @@ import TabItem from '@theme/TabItem';
|
||||||
|
|
||||||
https://github.com/BerriAI/litellm
|
https://github.com/BerriAI/litellm
|
||||||
|
|
||||||
|
|
||||||
## **Call 100+ LLMs using the same Input/Output Format**
|
## **Call 100+ LLMs using the same Input/Output Format**
|
||||||
|
|
||||||
- Translate inputs to provider's `completion`, `embedding`, and `image_generation` endpoints
|
- Translate inputs to provider's `completion`, `embedding`, and `image_generation` endpoints
|
||||||
|
@ -14,6 +13,7 @@ https://github.com/BerriAI/litellm
|
||||||
- Track spend & set budgets per project [OpenAI Proxy Server](https://docs.litellm.ai/docs/simple_proxy)
|
- Track spend & set budgets per project [OpenAI Proxy Server](https://docs.litellm.ai/docs/simple_proxy)
|
||||||
|
|
||||||
## Basic usage
|
## Basic usage
|
||||||
|
|
||||||
<a target="_blank" href="https://colab.research.google.com/github/BerriAI/litellm/blob/main/cookbook/liteLLM_Getting_Started.ipynb">
|
<a target="_blank" href="https://colab.research.google.com/github/BerriAI/litellm/blob/main/cookbook/liteLLM_Getting_Started.ipynb">
|
||||||
<img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/>
|
<img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/>
|
||||||
</a>
|
</a>
|
||||||
|
@ -21,6 +21,7 @@ https://github.com/BerriAI/litellm
|
||||||
```shell
|
```shell
|
||||||
pip install litellm
|
pip install litellm
|
||||||
```
|
```
|
||||||
|
|
||||||
<Tabs>
|
<Tabs>
|
||||||
<TabItem value="openai" label="OpenAI">
|
<TabItem value="openai" label="OpenAI">
|
||||||
|
|
||||||
|
@ -113,7 +114,6 @@ response = completion(
|
||||||
|
|
||||||
</TabItem>
|
</TabItem>
|
||||||
|
|
||||||
|
|
||||||
<TabItem value="ollama" label="Ollama">
|
<TabItem value="ollama" label="Ollama">
|
||||||
|
|
||||||
```python
|
```python
|
||||||
|
@ -125,6 +125,7 @@ response = completion(
|
||||||
api_base="http://localhost:11434"
|
api_base="http://localhost:11434"
|
||||||
)
|
)
|
||||||
```
|
```
|
||||||
|
|
||||||
</TabItem>
|
</TabItem>
|
||||||
<TabItem value="or" label="Openrouter">
|
<TabItem value="or" label="Openrouter">
|
||||||
|
|
||||||
|
@ -140,11 +141,13 @@ response = completion(
|
||||||
messages = [{ "content": "Hello, how are you?","role": "user"}],
|
messages = [{ "content": "Hello, how are you?","role": "user"}],
|
||||||
)
|
)
|
||||||
```
|
```
|
||||||
|
|
||||||
</TabItem>
|
</TabItem>
|
||||||
|
|
||||||
</Tabs>
|
</Tabs>
|
||||||
|
|
||||||
## Streaming
|
## Streaming
|
||||||
|
|
||||||
Set `stream=True` in the `completion` args.
|
Set `stream=True` in the `completion` args.
|
||||||
<Tabs>
|
<Tabs>
|
||||||
<TabItem value="openai" label="OpenAI">
|
<TabItem value="openai" label="OpenAI">
|
||||||
|
@ -243,7 +246,6 @@ response = completion(
|
||||||
|
|
||||||
</TabItem>
|
</TabItem>
|
||||||
|
|
||||||
|
|
||||||
<TabItem value="ollama" label="Ollama">
|
<TabItem value="ollama" label="Ollama">
|
||||||
|
|
||||||
```python
|
```python
|
||||||
|
@ -256,6 +258,7 @@ response = completion(
|
||||||
stream=True,
|
stream=True,
|
||||||
)
|
)
|
||||||
```
|
```
|
||||||
|
|
||||||
</TabItem>
|
</TabItem>
|
||||||
<TabItem value="or" label="Openrouter">
|
<TabItem value="or" label="Openrouter">
|
||||||
|
|
||||||
|
@ -272,6 +275,7 @@ response = completion(
|
||||||
stream=True,
|
stream=True,
|
||||||
)
|
)
|
||||||
```
|
```
|
||||||
|
|
||||||
</TabItem>
|
</TabItem>
|
||||||
|
|
||||||
</Tabs>
|
</Tabs>
|
||||||
|
@ -293,25 +297,28 @@ except OpenAIError as e:
|
||||||
```
|
```
|
||||||
|
|
||||||
## Logging Observability - Log LLM Input/Output ([Docs](https://docs.litellm.ai/docs/observability/callbacks))
|
## Logging Observability - Log LLM Input/Output ([Docs](https://docs.litellm.ai/docs/observability/callbacks))
|
||||||
LiteLLM exposes pre defined callbacks to send data to Langfuse, LLMonitor, Helicone, Promptlayer, Traceloop, Slack
|
|
||||||
|
LiteLLM exposes pre defined callbacks to send data to Lunary, Langfuse, Helicone, Promptlayer, Traceloop, Slack
|
||||||
|
|
||||||
```python
|
```python
|
||||||
from litellm import completion
|
from litellm import completion
|
||||||
|
|
||||||
## set env variables for logging tools
|
## set env variables for logging tools
|
||||||
os.environ["LANGFUSE_PUBLIC_KEY"] = ""
|
os.environ["LANGFUSE_PUBLIC_KEY"] = ""
|
||||||
os.environ["LANGFUSE_SECRET_KEY"] = ""
|
os.environ["LANGFUSE_SECRET_KEY"] = ""
|
||||||
os.environ["LLMONITOR_APP_ID"] = "your-llmonitor-app-id"
|
os.environ["LUNARY_PUBLIC_KEY"] = "your-lunary-public-key"
|
||||||
|
|
||||||
os.environ["OPENAI_API_KEY"]
|
os.environ["OPENAI_API_KEY"]
|
||||||
|
|
||||||
# set callbacks
|
# set callbacks
|
||||||
litellm.success_callback = ["langfuse", "llmonitor"] # log input/output to langfuse, llmonitor, supabase
|
litellm.success_callback = ["lunary", "langfuse"] # log input/output to lunary, langfuse, supabase
|
||||||
|
|
||||||
#openai call
|
#openai call
|
||||||
response = completion(model="gpt-3.5-turbo", messages=[{"role": "user", "content": "Hi 👋 - i'm openai"}])
|
response = completion(model="gpt-3.5-turbo", messages=[{"role": "user", "content": "Hi 👋 - i'm openai"}])
|
||||||
```
|
```
|
||||||
|
|
||||||
## Track Costs, Usage, Latency for streaming
|
## Track Costs, Usage, Latency for streaming
|
||||||
|
|
||||||
Use a callback function for this - more info on custom callbacks: https://docs.litellm.ai/docs/observability/custom_callback
|
Use a callback function for this - more info on custom callbacks: https://docs.litellm.ai/docs/observability/custom_callback
|
||||||
|
|
||||||
```python
|
```python
|
||||||
|
@ -351,6 +358,7 @@ Track spend across multiple projects/people
|
||||||

|

|
||||||
|
|
||||||
The proxy provides:
|
The proxy provides:
|
||||||
|
|
||||||
1. [Hooks for auth](https://docs.litellm.ai/docs/proxy/virtual_keys#custom-auth)
|
1. [Hooks for auth](https://docs.litellm.ai/docs/proxy/virtual_keys#custom-auth)
|
||||||
2. [Hooks for logging](https://docs.litellm.ai/docs/proxy/logging#step-1---create-your-custom-litellm-callback-class)
|
2. [Hooks for logging](https://docs.litellm.ai/docs/proxy/logging#step-1---create-your-custom-litellm-callback-class)
|
||||||
3. [Cost tracking](https://docs.litellm.ai/docs/proxy/virtual_keys#tracking-spend)
|
3. [Cost tracking](https://docs.litellm.ai/docs/proxy/virtual_keys#tracking-spend)
|
||||||
|
@ -365,6 +373,7 @@ pip install 'litellm[proxy]'
|
||||||
```
|
```
|
||||||
|
|
||||||
#### Step 1: Start litellm proxy
|
#### Step 1: Start litellm proxy
|
||||||
|
|
||||||
```shell
|
```shell
|
||||||
$ litellm --model huggingface/bigcode/starcoder
|
$ litellm --model huggingface/bigcode/starcoder
|
||||||
|
|
||||||
|
@ -372,6 +381,7 @@ $ litellm --model huggingface/bigcode/starcoder
|
||||||
```
|
```
|
||||||
|
|
||||||
#### Step 2: Make ChatCompletions Request to Proxy
|
#### Step 2: Make ChatCompletions Request to Proxy
|
||||||
|
|
||||||
```python
|
```python
|
||||||
import openai # openai v1.0.0+
|
import openai # openai v1.0.0+
|
||||||
client = openai.OpenAI(api_key="anything",base_url="http://0.0.0.0:8000") # set proxy to base_url
|
client = openai.OpenAI(api_key="anything",base_url="http://0.0.0.0:8000") # set proxy to base_url
|
||||||
|
@ -387,6 +397,7 @@ print(response)
|
||||||
```
|
```
|
||||||
|
|
||||||
## More details
|
## More details
|
||||||
* [exception mapping](./exception_mapping.md)
|
|
||||||
* [retries + model fallbacks for completion()](./completion/reliable_completions.md)
|
- [exception mapping](./exception_mapping.md)
|
||||||
* [proxy virtual keys & spend management](./tutorials/fallbacks.md)
|
- [retries + model fallbacks for completion()](./completion/reliable_completions.md)
|
||||||
|
- [proxy virtual keys & spend management](./tutorials/fallbacks.md)
|
||||||
|
|
|
@ -7,7 +7,7 @@ liteLLM provides `input_callbacks`, `success_callbacks` and `failure_callbacks`,
|
||||||
liteLLM supports:
|
liteLLM supports:
|
||||||
|
|
||||||
- [Custom Callback Functions](https://docs.litellm.ai/docs/observability/custom_callback)
|
- [Custom Callback Functions](https://docs.litellm.ai/docs/observability/custom_callback)
|
||||||
- [LLMonitor](https://llmonitor.com/docs)
|
- [Lunary](https://lunary.ai/docs)
|
||||||
- [Helicone](https://docs.helicone.ai/introduction)
|
- [Helicone](https://docs.helicone.ai/introduction)
|
||||||
- [Traceloop](https://traceloop.com/docs)
|
- [Traceloop](https://traceloop.com/docs)
|
||||||
- [Athina](https://docs.athina.ai/)
|
- [Athina](https://docs.athina.ai/)
|
||||||
|
@ -22,15 +22,15 @@ from litellm import completion
|
||||||
|
|
||||||
# set callbacks
|
# set callbacks
|
||||||
litellm.input_callback=["sentry"] # for sentry breadcrumbing - logs the input being sent to the api
|
litellm.input_callback=["sentry"] # for sentry breadcrumbing - logs the input being sent to the api
|
||||||
litellm.success_callback=["posthog", "helicone", "llmonitor", "athina"]
|
litellm.success_callback=["posthog", "helicone", "lunary", "athina"]
|
||||||
litellm.failure_callback=["sentry", "llmonitor"]
|
litellm.failure_callback=["sentry", "lunary"]
|
||||||
|
|
||||||
## set env variables
|
## set env variables
|
||||||
os.environ['SENTRY_DSN'], os.environ['SENTRY_API_TRACE_RATE']= ""
|
os.environ['SENTRY_DSN'], os.environ['SENTRY_API_TRACE_RATE']= ""
|
||||||
os.environ['POSTHOG_API_KEY'], os.environ['POSTHOG_API_URL'] = "api-key", "api-url"
|
os.environ['POSTHOG_API_KEY'], os.environ['POSTHOG_API_URL'] = "api-key", "api-url"
|
||||||
os.environ["HELICONE_API_KEY"] = ""
|
os.environ["HELICONE_API_KEY"] = ""
|
||||||
os.environ["TRACELOOP_API_KEY"] = ""
|
os.environ["TRACELOOP_API_KEY"] = ""
|
||||||
os.environ["LLMONITOR_APP_ID"] = ""
|
os.environ["LUNARY_PUBLIC_KEY"] = ""
|
||||||
os.environ["ATHINA_API_KEY"] = ""
|
os.environ["ATHINA_API_KEY"] = ""
|
||||||
|
|
||||||
response = completion(model="gpt-3.5-turbo", messages=messages)
|
response = completion(model="gpt-3.5-turbo", messages=messages)
|
||||||
|
|
|
@ -1,65 +0,0 @@
|
||||||
# LLMonitor Tutorial
|
|
||||||
|
|
||||||
[LLMonitor](https://llmonitor.com/) is an open-source observability platform that provides cost tracking, user tracking and powerful agent tracing.
|
|
||||||
|
|
||||||
<video controls width='900' >
|
|
||||||
<source src='https://llmonitor.com/videos/demo-annotated.mp4'/>
|
|
||||||
</video>
|
|
||||||
|
|
||||||
## Use LLMonitor to log requests across all LLM Providers (OpenAI, Azure, Anthropic, Cohere, Replicate, PaLM)
|
|
||||||
|
|
||||||
liteLLM provides `callbacks`, making it easy for you to log data depending on the status of your responses.
|
|
||||||
|
|
||||||
:::info
|
|
||||||
We want to learn how we can make the callbacks better! Meet the [founders](https://calendly.com/d/4mp-gd3-k5k/berriai-1-1-onboarding-litellm-hosted-version) or
|
|
||||||
join our [discord](https://discord.gg/wuPM9dRgDw)
|
|
||||||
:::
|
|
||||||
|
|
||||||
### Using Callbacks
|
|
||||||
|
|
||||||
First, sign up to get an app ID on the [LLMonitor dashboard](https://llmonitor.com).
|
|
||||||
|
|
||||||
Use just 2 lines of code, to instantly log your responses **across all providers** with llmonitor:
|
|
||||||
|
|
||||||
```python
|
|
||||||
litellm.success_callback = ["llmonitor"]
|
|
||||||
litellm.failure_callback = ["llmonitor"]
|
|
||||||
```
|
|
||||||
|
|
||||||
Complete code
|
|
||||||
|
|
||||||
```python
|
|
||||||
from litellm import completion
|
|
||||||
|
|
||||||
## set env variables
|
|
||||||
os.environ["LLMONITOR_APP_ID"] = "your-llmonitor-app-id"
|
|
||||||
# Optional: os.environ["LLMONITOR_API_URL"] = "self-hosting-url"
|
|
||||||
|
|
||||||
os.environ["OPENAI_API_KEY"], os.environ["COHERE_API_KEY"] = "", ""
|
|
||||||
|
|
||||||
# set callbacks
|
|
||||||
litellm.success_callback = ["llmonitor"]
|
|
||||||
litellm.failure_callback = ["llmonitor"]
|
|
||||||
|
|
||||||
#openai call
|
|
||||||
response = completion(
|
|
||||||
model="gpt-3.5-turbo",
|
|
||||||
messages=[{"role": "user", "content": "Hi 👋 - i'm openai"}],
|
|
||||||
user="ishaan_litellm"
|
|
||||||
)
|
|
||||||
|
|
||||||
#cohere call
|
|
||||||
response = completion(
|
|
||||||
model="command-nightly",
|
|
||||||
messages=[{"role": "user", "content": "Hi 👋 - i'm cohere"}],
|
|
||||||
user="ishaan_litellm"
|
|
||||||
)
|
|
||||||
```
|
|
||||||
|
|
||||||
## Support & Talk to Founders
|
|
||||||
|
|
||||||
- [Schedule Demo 👋](https://calendly.com/d/4mp-gd3-k5k/berriai-1-1-onboarding-litellm-hosted-version)
|
|
||||||
- [Community Discord 💭](https://discord.gg/wuPM9dRgDw)
|
|
||||||
- Our numbers 📞 +1 (770) 8783-106 / +1 (412) 618-6238
|
|
||||||
- Our emails ✉️ ishaan@berri.ai / krrish@berri.ai
|
|
||||||
- Meet the LLMonitor team on [Discord](http://discord.com/invite/8PafSG58kK) or via [email](mailto:vince@llmonitor.com).
|
|
82
docs/my-website/docs/observability/lunary_integration.md
Normal file
82
docs/my-website/docs/observability/lunary_integration.md
Normal file
|
@ -0,0 +1,82 @@
|
||||||
|
# Lunary - Logging and tracing LLM input/output
|
||||||
|
|
||||||
|
[Lunary](https://lunary.ai/) is an open-source AI developer platform providing observability, prompt management, and evaluation tools for AI developers.
|
||||||
|
|
||||||
|
<video controls width='900' >
|
||||||
|
<source src='https://lunary.ai/videos/demo-annotated.mp4'/>
|
||||||
|
</video>
|
||||||
|
|
||||||
|
## Use Lunary to log requests across all LLM Providers (OpenAI, Azure, Anthropic, Cohere, Replicate, PaLM)
|
||||||
|
|
||||||
|
liteLLM provides `callbacks`, making it easy for you to log data depending on the status of your responses.
|
||||||
|
|
||||||
|
:::info
|
||||||
|
We want to learn how we can make the callbacks better! Meet the [founders](https://calendly.com/d/4mp-gd3-k5k/berriai-1-1-onboarding-litellm-hosted-version) or
|
||||||
|
join our [discord](https://discord.gg/wuPM9dRgDw)
|
||||||
|
:::
|
||||||
|
|
||||||
|
### Using Callbacks
|
||||||
|
|
||||||
|
First, sign up to get a public key on the [Lunary dashboard](https://lunary.ai).
|
||||||
|
|
||||||
|
Use just 2 lines of code, to instantly log your responses **across all providers** with lunary:
|
||||||
|
|
||||||
|
```python
|
||||||
|
litellm.success_callback = ["lunary"]
|
||||||
|
litellm.failure_callback = ["lunary"]
|
||||||
|
```
|
||||||
|
|
||||||
|
Complete code
|
||||||
|
|
||||||
|
```python
|
||||||
|
from litellm import completion
|
||||||
|
|
||||||
|
## set env variables
|
||||||
|
os.environ["LUNARY_PUBLIC_KEY"] = "your-lunary-public-key"
|
||||||
|
|
||||||
|
os.environ["OPENAI_API_KEY"] = ""
|
||||||
|
|
||||||
|
# set callbacks
|
||||||
|
litellm.success_callback = ["lunary"]
|
||||||
|
litellm.failure_callback = ["lunary"]
|
||||||
|
|
||||||
|
#openai call
|
||||||
|
response = completion(
|
||||||
|
model="gpt-3.5-turbo",
|
||||||
|
messages=[{"role": "user", "content": "Hi 👋 - i'm openai"}],
|
||||||
|
user="ishaan_litellm"
|
||||||
|
)
|
||||||
|
```
|
||||||
|
|
||||||
|
## Templates
|
||||||
|
|
||||||
|
You can use Lunary to manage prompt templates and use them across all your LLM providers.
|
||||||
|
|
||||||
|
Make sure to have `lunary` installed:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
pip install lunary
|
||||||
|
```
|
||||||
|
|
||||||
|
Then, use the following code to pull templates into Lunary:
|
||||||
|
|
||||||
|
```python
|
||||||
|
from litellm import completion
|
||||||
|
from lunary
|
||||||
|
|
||||||
|
template = lunary.render_template("template-slug", {
|
||||||
|
"name": "John", # Inject variables
|
||||||
|
})
|
||||||
|
|
||||||
|
litellm.success_callback = ["lunary"]
|
||||||
|
|
||||||
|
result = completion(**template)
|
||||||
|
```
|
||||||
|
|
||||||
|
## Support & Talk to Founders
|
||||||
|
|
||||||
|
- Meet the Lunary team via [email](mailto:hello@lunary.ai).
|
||||||
|
- [Schedule Demo 👋](https://calendly.com/d/4mp-gd3-k5k/berriai-1-1-onboarding-litellm-hosted-version)
|
||||||
|
- [Community Discord 💭](https://discord.gg/wuPM9dRgDw)
|
||||||
|
- Our numbers 📞 +1 (770) 8783-106 / +1 (412) 618-6238
|
||||||
|
- Our emails ✉️ ishaan@berri.ai / krrish@berri.ai
|
|
@ -22,18 +22,18 @@ const sidebars = {
|
||||||
type: "category",
|
type: "category",
|
||||||
label: "💥 OpenAI Proxy Server",
|
label: "💥 OpenAI Proxy Server",
|
||||||
link: {
|
link: {
|
||||||
type: 'generated-index',
|
type: "generated-index",
|
||||||
title: '💥 OpenAI Proxy Server',
|
title: "💥 OpenAI Proxy Server",
|
||||||
description: `Proxy Server to call 100+ LLMs in a unified interface & track spend, set budgets per virtual key/user`,
|
description: `Proxy Server to call 100+ LLMs in a unified interface & track spend, set budgets per virtual key/user`,
|
||||||
slug: '/simple_proxy',
|
slug: "/simple_proxy",
|
||||||
},
|
},
|
||||||
items: [
|
items: [
|
||||||
"proxy/quick_start",
|
"proxy/quick_start",
|
||||||
"proxy/configs",
|
"proxy/configs",
|
||||||
{
|
{
|
||||||
type: 'link',
|
type: "link",
|
||||||
label: '📖 All Endpoints',
|
label: "📖 All Endpoints",
|
||||||
href: 'https://litellm-api.up.railway.app/',
|
href: "https://litellm-api.up.railway.app/",
|
||||||
},
|
},
|
||||||
"proxy/enterprise",
|
"proxy/enterprise",
|
||||||
"proxy/user_keys",
|
"proxy/user_keys",
|
||||||
|
@ -45,43 +45,33 @@ const sidebars = {
|
||||||
"proxy/debugging",
|
"proxy/debugging",
|
||||||
"proxy/pii_masking",
|
"proxy/pii_masking",
|
||||||
{
|
{
|
||||||
"type": "category",
|
type: "category",
|
||||||
"label": "🔥 Load Balancing",
|
label: "🔥 Load Balancing",
|
||||||
"items": [
|
items: ["proxy/load_balancing", "proxy/reliability"],
|
||||||
"proxy/load_balancing",
|
|
||||||
"proxy/reliability",
|
|
||||||
]
|
|
||||||
},
|
},
|
||||||
"proxy/caching",
|
"proxy/caching",
|
||||||
{
|
{
|
||||||
"type": "category",
|
type: "category",
|
||||||
"label": "Logging, Alerting",
|
label: "Logging, Alerting",
|
||||||
"items": [
|
items: ["proxy/logging", "proxy/alerting", "proxy/streaming_logging"],
|
||||||
"proxy/logging",
|
|
||||||
"proxy/alerting",
|
|
||||||
"proxy/streaming_logging",
|
|
||||||
]
|
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"type": "category",
|
type: "category",
|
||||||
"label": "Content Moderation",
|
label: "Content Moderation",
|
||||||
"items": [
|
items: ["proxy/call_hooks", "proxy/rules"],
|
||||||
"proxy/call_hooks",
|
|
||||||
"proxy/rules",
|
|
||||||
]
|
|
||||||
},
|
},
|
||||||
"proxy/deploy",
|
"proxy/deploy",
|
||||||
"proxy/cli",
|
"proxy/cli",
|
||||||
]
|
],
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
type: "category",
|
type: "category",
|
||||||
label: "Completion()",
|
label: "Completion()",
|
||||||
link: {
|
link: {
|
||||||
type: 'generated-index',
|
type: "generated-index",
|
||||||
title: 'Completion()',
|
title: "Completion()",
|
||||||
description: 'Details on the completion() function',
|
description: "Details on the completion() function",
|
||||||
slug: '/completion',
|
slug: "/completion",
|
||||||
},
|
},
|
||||||
items: [
|
items: [
|
||||||
"completion/input",
|
"completion/input",
|
||||||
|
@ -104,17 +94,18 @@ const sidebars = {
|
||||||
"embedding/supported_embedding",
|
"embedding/supported_embedding",
|
||||||
"embedding/async_embedding",
|
"embedding/async_embedding",
|
||||||
"embedding/moderation",
|
"embedding/moderation",
|
||||||
"image_generation"
|
"image_generation",
|
||||||
],
|
],
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
type: "category",
|
type: "category",
|
||||||
label: "Supported Models & Providers",
|
label: "Supported Models & Providers",
|
||||||
link: {
|
link: {
|
||||||
type: 'generated-index',
|
type: "generated-index",
|
||||||
title: 'Providers',
|
title: "Providers",
|
||||||
description: 'Learn how to deploy + call models from different providers on LiteLLM',
|
description:
|
||||||
slug: '/providers',
|
"Learn how to deploy + call models from different providers on LiteLLM",
|
||||||
|
slug: "/providers",
|
||||||
},
|
},
|
||||||
items: [
|
items: [
|
||||||
"providers/openai",
|
"providers/openai",
|
||||||
|
@ -148,7 +139,7 @@ const sidebars = {
|
||||||
"providers/openrouter",
|
"providers/openrouter",
|
||||||
"providers/custom_openai_proxy",
|
"providers/custom_openai_proxy",
|
||||||
"providers/petals",
|
"providers/petals",
|
||||||
]
|
],
|
||||||
},
|
},
|
||||||
"proxy/custom_pricing",
|
"proxy/custom_pricing",
|
||||||
"routing",
|
"routing",
|
||||||
|
@ -163,9 +154,10 @@ const sidebars = {
|
||||||
type: "category",
|
type: "category",
|
||||||
label: "Logging & Observability",
|
label: "Logging & Observability",
|
||||||
items: [
|
items: [
|
||||||
'debugging/local_debugging',
|
"debugging/local_debugging",
|
||||||
"observability/callbacks",
|
"observability/callbacks",
|
||||||
"observability/custom_callback",
|
"observability/custom_callback",
|
||||||
|
"observability/lunary_integration",
|
||||||
"observability/langfuse_integration",
|
"observability/langfuse_integration",
|
||||||
"observability/sentry",
|
"observability/sentry",
|
||||||
"observability/promptlayer_integration",
|
"observability/promptlayer_integration",
|
||||||
|
@ -174,7 +166,6 @@ const sidebars = {
|
||||||
"observability/slack_integration",
|
"observability/slack_integration",
|
||||||
"observability/traceloop_integration",
|
"observability/traceloop_integration",
|
||||||
"observability/athina_integration",
|
"observability/athina_integration",
|
||||||
"observability/llmonitor_integration",
|
|
||||||
"observability/helicone_integration",
|
"observability/helicone_integration",
|
||||||
"observability/supabase_integration",
|
"observability/supabase_integration",
|
||||||
`observability/telemetry`,
|
`observability/telemetry`,
|
||||||
|
@ -182,18 +173,18 @@ const sidebars = {
|
||||||
},
|
},
|
||||||
"caching/redis_cache",
|
"caching/redis_cache",
|
||||||
{
|
{
|
||||||
type: 'category',
|
type: "category",
|
||||||
label: 'Tutorials',
|
label: "Tutorials",
|
||||||
items: [
|
items: [
|
||||||
'tutorials/azure_openai',
|
"tutorials/azure_openai",
|
||||||
'tutorials/oobabooga',
|
"tutorials/oobabooga",
|
||||||
"tutorials/gradio_integration",
|
"tutorials/gradio_integration",
|
||||||
'tutorials/huggingface_codellama',
|
"tutorials/huggingface_codellama",
|
||||||
'tutorials/huggingface_tutorial',
|
"tutorials/huggingface_tutorial",
|
||||||
'tutorials/TogetherAI_liteLLM',
|
"tutorials/TogetherAI_liteLLM",
|
||||||
'tutorials/finetuned_chat_gpt',
|
"tutorials/finetuned_chat_gpt",
|
||||||
'tutorials/sagemaker_llms',
|
"tutorials/sagemaker_llms",
|
||||||
'tutorials/text_completion',
|
"tutorials/text_completion",
|
||||||
"tutorials/first_playground",
|
"tutorials/first_playground",
|
||||||
"tutorials/model_fallbacks",
|
"tutorials/model_fallbacks",
|
||||||
],
|
],
|
||||||
|
@ -201,24 +192,23 @@ const sidebars = {
|
||||||
{
|
{
|
||||||
type: "category",
|
type: "category",
|
||||||
label: "LangChain, LlamaIndex Integration",
|
label: "LangChain, LlamaIndex Integration",
|
||||||
items: [
|
items: ["langchain/langchain"],
|
||||||
"langchain/langchain"
|
|
||||||
],
|
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
type: 'category',
|
type: "category",
|
||||||
label: 'Extras',
|
label: "Extras",
|
||||||
items: [
|
items: [
|
||||||
'extras/contributing',
|
"extras/contributing",
|
||||||
"proxy_server",
|
"proxy_server",
|
||||||
{
|
{
|
||||||
type: "category",
|
type: "category",
|
||||||
label: "❤️ 🚅 Projects built on LiteLLM",
|
label: "❤️ 🚅 Projects built on LiteLLM",
|
||||||
link: {
|
link: {
|
||||||
type: 'generated-index',
|
type: "generated-index",
|
||||||
title: 'Projects built on LiteLLM',
|
title: "Projects built on LiteLLM",
|
||||||
description: 'Learn how to deploy + call models from different providers on LiteLLM',
|
description:
|
||||||
slug: '/project',
|
"Learn how to deploy + call models from different providers on LiteLLM",
|
||||||
|
slug: "/project",
|
||||||
},
|
},
|
||||||
items: [
|
items: [
|
||||||
"projects/Docq.AI",
|
"projects/Docq.AI",
|
||||||
|
@ -234,7 +224,7 @@ const sidebars = {
|
||||||
"projects/GPT Migrate",
|
"projects/GPT Migrate",
|
||||||
"projects/YiVal",
|
"projects/YiVal",
|
||||||
"projects/LiteLLM Proxy",
|
"projects/LiteLLM Proxy",
|
||||||
]
|
],
|
||||||
},
|
},
|
||||||
],
|
],
|
||||||
},
|
},
|
||||||
|
|
|
@ -5,7 +5,6 @@ import TabItem from '@theme/TabItem';
|
||||||
|
|
||||||
https://github.com/BerriAI/litellm
|
https://github.com/BerriAI/litellm
|
||||||
|
|
||||||
|
|
||||||
## **Call 100+ LLMs using the same Input/Output Format**
|
## **Call 100+ LLMs using the same Input/Output Format**
|
||||||
|
|
||||||
- Translate inputs to provider's `completion`, `embedding`, and `image_generation` endpoints
|
- Translate inputs to provider's `completion`, `embedding`, and `image_generation` endpoints
|
||||||
|
@ -14,6 +13,7 @@ https://github.com/BerriAI/litellm
|
||||||
- Track spend & set budgets per project [OpenAI Proxy Server](https://docs.litellm.ai/docs/simple_proxy)
|
- Track spend & set budgets per project [OpenAI Proxy Server](https://docs.litellm.ai/docs/simple_proxy)
|
||||||
|
|
||||||
## Basic usage
|
## Basic usage
|
||||||
|
|
||||||
<a target="_blank" href="https://colab.research.google.com/github/BerriAI/litellm/blob/main/cookbook/liteLLM_Getting_Started.ipynb">
|
<a target="_blank" href="https://colab.research.google.com/github/BerriAI/litellm/blob/main/cookbook/liteLLM_Getting_Started.ipynb">
|
||||||
<img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/>
|
<img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/>
|
||||||
</a>
|
</a>
|
||||||
|
@ -21,6 +21,7 @@ https://github.com/BerriAI/litellm
|
||||||
```shell
|
```shell
|
||||||
pip install litellm
|
pip install litellm
|
||||||
```
|
```
|
||||||
|
|
||||||
<Tabs>
|
<Tabs>
|
||||||
<TabItem value="openai" label="OpenAI">
|
<TabItem value="openai" label="OpenAI">
|
||||||
|
|
||||||
|
@ -113,7 +114,6 @@ response = completion(
|
||||||
|
|
||||||
</TabItem>
|
</TabItem>
|
||||||
|
|
||||||
|
|
||||||
<TabItem value="ollama" label="Ollama">
|
<TabItem value="ollama" label="Ollama">
|
||||||
|
|
||||||
```python
|
```python
|
||||||
|
@ -125,6 +125,7 @@ response = completion(
|
||||||
api_base="http://localhost:11434"
|
api_base="http://localhost:11434"
|
||||||
)
|
)
|
||||||
```
|
```
|
||||||
|
|
||||||
</TabItem>
|
</TabItem>
|
||||||
<TabItem value="or" label="Openrouter">
|
<TabItem value="or" label="Openrouter">
|
||||||
|
|
||||||
|
@ -140,11 +141,13 @@ response = completion(
|
||||||
messages = [{ "content": "Hello, how are you?","role": "user"}],
|
messages = [{ "content": "Hello, how are you?","role": "user"}],
|
||||||
)
|
)
|
||||||
```
|
```
|
||||||
|
|
||||||
</TabItem>
|
</TabItem>
|
||||||
|
|
||||||
</Tabs>
|
</Tabs>
|
||||||
|
|
||||||
## Streaming
|
## Streaming
|
||||||
|
|
||||||
Set `stream=True` in the `completion` args.
|
Set `stream=True` in the `completion` args.
|
||||||
<Tabs>
|
<Tabs>
|
||||||
<TabItem value="openai" label="OpenAI">
|
<TabItem value="openai" label="OpenAI">
|
||||||
|
@ -243,7 +246,6 @@ response = completion(
|
||||||
|
|
||||||
</TabItem>
|
</TabItem>
|
||||||
|
|
||||||
|
|
||||||
<TabItem value="ollama" label="Ollama">
|
<TabItem value="ollama" label="Ollama">
|
||||||
|
|
||||||
```python
|
```python
|
||||||
|
@ -256,6 +258,7 @@ response = completion(
|
||||||
stream=True,
|
stream=True,
|
||||||
)
|
)
|
||||||
```
|
```
|
||||||
|
|
||||||
</TabItem>
|
</TabItem>
|
||||||
<TabItem value="or" label="Openrouter">
|
<TabItem value="or" label="Openrouter">
|
||||||
|
|
||||||
|
@ -272,6 +275,7 @@ response = completion(
|
||||||
stream=True,
|
stream=True,
|
||||||
)
|
)
|
||||||
```
|
```
|
||||||
|
|
||||||
</TabItem>
|
</TabItem>
|
||||||
|
|
||||||
</Tabs>
|
</Tabs>
|
||||||
|
@ -293,25 +297,28 @@ except OpenAIError as e:
|
||||||
```
|
```
|
||||||
|
|
||||||
## Logging Observability - Log LLM Input/Output ([Docs](https://docs.litellm.ai/docs/observability/callbacks))
|
## Logging Observability - Log LLM Input/Output ([Docs](https://docs.litellm.ai/docs/observability/callbacks))
|
||||||
LiteLLM exposes pre defined callbacks to send data to Langfuse, LLMonitor, Helicone, Promptlayer, Traceloop, Slack
|
|
||||||
|
LiteLLM exposes pre defined callbacks to send data to Lunary, Langfuse, Helicone, Promptlayer, Traceloop, Slack
|
||||||
|
|
||||||
```python
|
```python
|
||||||
from litellm import completion
|
from litellm import completion
|
||||||
|
|
||||||
## set env variables for logging tools
|
## set env variables for logging tools
|
||||||
os.environ["LANGFUSE_PUBLIC_KEY"] = ""
|
os.environ["LANGFUSE_PUBLIC_KEY"] = ""
|
||||||
os.environ["LANGFUSE_SECRET_KEY"] = ""
|
os.environ["LANGFUSE_SECRET_KEY"] = ""
|
||||||
os.environ["LLMONITOR_APP_ID"] = "your-llmonitor-app-id"
|
os.environ["LUNARY_PUBLIC_KEY"] = "your-lunary-public-key"
|
||||||
|
|
||||||
os.environ["OPENAI_API_KEY"]
|
os.environ["OPENAI_API_KEY"]
|
||||||
|
|
||||||
# set callbacks
|
# set callbacks
|
||||||
litellm.success_callback = ["langfuse", "llmonitor"] # log input/output to langfuse, llmonitor, supabase
|
litellm.success_callback = ["langfuse", "lunary"] # log input/output to lunary, langfuse, supabase
|
||||||
|
|
||||||
#openai call
|
#openai call
|
||||||
response = completion(model="gpt-3.5-turbo", messages=[{"role": "user", "content": "Hi 👋 - i'm openai"}])
|
response = completion(model="gpt-3.5-turbo", messages=[{"role": "user", "content": "Hi 👋 - i'm openai"}])
|
||||||
```
|
```
|
||||||
|
|
||||||
## Track Costs, Usage, Latency for streaming
|
## Track Costs, Usage, Latency for streaming
|
||||||
|
|
||||||
Use a callback function for this - more info on custom callbacks: https://docs.litellm.ai/docs/observability/custom_callback
|
Use a callback function for this - more info on custom callbacks: https://docs.litellm.ai/docs/observability/custom_callback
|
||||||
|
|
||||||
```python
|
```python
|
||||||
|
@ -351,6 +358,7 @@ Track spend across multiple projects/people
|
||||||

|

|
||||||
|
|
||||||
The proxy provides:
|
The proxy provides:
|
||||||
|
|
||||||
1. [Hooks for auth](https://docs.litellm.ai/docs/proxy/virtual_keys#custom-auth)
|
1. [Hooks for auth](https://docs.litellm.ai/docs/proxy/virtual_keys#custom-auth)
|
||||||
2. [Hooks for logging](https://docs.litellm.ai/docs/proxy/logging#step-1---create-your-custom-litellm-callback-class)
|
2. [Hooks for logging](https://docs.litellm.ai/docs/proxy/logging#step-1---create-your-custom-litellm-callback-class)
|
||||||
3. [Cost tracking](https://docs.litellm.ai/docs/proxy/virtual_keys#tracking-spend)
|
3. [Cost tracking](https://docs.litellm.ai/docs/proxy/virtual_keys#tracking-spend)
|
||||||
|
@ -365,6 +373,7 @@ pip install 'litellm[proxy]'
|
||||||
```
|
```
|
||||||
|
|
||||||
#### Step 1: Start litellm proxy
|
#### Step 1: Start litellm proxy
|
||||||
|
|
||||||
```shell
|
```shell
|
||||||
$ litellm --model huggingface/bigcode/starcoder
|
$ litellm --model huggingface/bigcode/starcoder
|
||||||
|
|
||||||
|
@ -372,6 +381,7 @@ $ litellm --model huggingface/bigcode/starcoder
|
||||||
```
|
```
|
||||||
|
|
||||||
#### Step 2: Make ChatCompletions Request to Proxy
|
#### Step 2: Make ChatCompletions Request to Proxy
|
||||||
|
|
||||||
```python
|
```python
|
||||||
import openai # openai v1.0.0+
|
import openai # openai v1.0.0+
|
||||||
client = openai.OpenAI(api_key="anything",base_url="http://0.0.0.0:8000") # set proxy to base_url
|
client = openai.OpenAI(api_key="anything",base_url="http://0.0.0.0:8000") # set proxy to base_url
|
||||||
|
@ -387,6 +397,7 @@ print(response)
|
||||||
```
|
```
|
||||||
|
|
||||||
## More details
|
## More details
|
||||||
* [exception mapping](./exception_mapping.md)
|
|
||||||
* [retries + model fallbacks for completion()](./completion/reliable_completions.md)
|
- [exception mapping](./exception_mapping.md)
|
||||||
* [proxy virtual keys & spend management](./tutorials/fallbacks.md)
|
- [retries + model fallbacks for completion()](./completion/reliable_completions.md)
|
||||||
|
- [proxy virtual keys & spend management](./tutorials/fallbacks.md)
|
||||||
|
|
|
@ -6,7 +6,7 @@ liteLLM provides `success_callbacks` and `failure_callbacks`, making it easy for
|
||||||
|
|
||||||
liteLLM supports:
|
liteLLM supports:
|
||||||
|
|
||||||
- [LLMonitor](https://llmonitor.com/docs)
|
- [Lunary](https://lunary.ai/docs)
|
||||||
- [Helicone](https://docs.helicone.ai/introduction)
|
- [Helicone](https://docs.helicone.ai/introduction)
|
||||||
- [Sentry](https://docs.sentry.io/platforms/python/)
|
- [Sentry](https://docs.sentry.io/platforms/python/)
|
||||||
- [PostHog](https://posthog.com/docs/libraries/python)
|
- [PostHog](https://posthog.com/docs/libraries/python)
|
||||||
|
@ -18,8 +18,8 @@ liteLLM supports:
|
||||||
from litellm import completion
|
from litellm import completion
|
||||||
|
|
||||||
# set callbacks
|
# set callbacks
|
||||||
litellm.success_callback=["posthog", "helicone", "llmonitor"]
|
litellm.success_callback=["posthog", "helicone", "lunary"]
|
||||||
litellm.failure_callback=["sentry", "llmonitor"]
|
litellm.failure_callback=["sentry", "lunary"]
|
||||||
|
|
||||||
## set env variables
|
## set env variables
|
||||||
os.environ['SENTRY_DSN'], os.environ['SENTRY_API_TRACE_RATE']= ""
|
os.environ['SENTRY_DSN'], os.environ['SENTRY_API_TRACE_RATE']= ""
|
||||||
|
|
|
@ -1,127 +0,0 @@
|
||||||
#### What this does ####
|
|
||||||
# On success + failure, log events to aispend.io
|
|
||||||
import datetime
|
|
||||||
import traceback
|
|
||||||
import dotenv
|
|
||||||
import os
|
|
||||||
import requests
|
|
||||||
|
|
||||||
dotenv.load_dotenv() # Loading env variables using dotenv
|
|
||||||
|
|
||||||
|
|
||||||
# convert to {completion: xx, tokens: xx}
|
|
||||||
def parse_usage(usage):
|
|
||||||
return {
|
|
||||||
"completion": usage["completion_tokens"] if "completion_tokens" in usage else 0,
|
|
||||||
"prompt": usage["prompt_tokens"] if "prompt_tokens" in usage else 0,
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
def parse_messages(input):
|
|
||||||
if input is None:
|
|
||||||
return None
|
|
||||||
|
|
||||||
def clean_message(message):
|
|
||||||
# if is strin, return as is
|
|
||||||
if isinstance(message, str):
|
|
||||||
return message
|
|
||||||
|
|
||||||
if "message" in message:
|
|
||||||
return clean_message(message["message"])
|
|
||||||
text = message["content"]
|
|
||||||
if text == None:
|
|
||||||
text = message.get("function_call", None)
|
|
||||||
|
|
||||||
return {
|
|
||||||
"role": message["role"],
|
|
||||||
"text": text,
|
|
||||||
}
|
|
||||||
|
|
||||||
if isinstance(input, list):
|
|
||||||
if len(input) == 1:
|
|
||||||
return clean_message(input[0])
|
|
||||||
else:
|
|
||||||
return [clean_message(msg) for msg in input]
|
|
||||||
else:
|
|
||||||
return clean_message(input)
|
|
||||||
|
|
||||||
|
|
||||||
class LLMonitorLogger:
|
|
||||||
# Class variables or attributes
|
|
||||||
def __init__(self):
|
|
||||||
# Instance variables
|
|
||||||
self.api_url = os.getenv("LLMONITOR_API_URL") or "https://app.llmonitor.com"
|
|
||||||
self.app_id = os.getenv("LLMONITOR_APP_ID")
|
|
||||||
|
|
||||||
def log_event(
|
|
||||||
self,
|
|
||||||
type,
|
|
||||||
event,
|
|
||||||
run_id,
|
|
||||||
model,
|
|
||||||
print_verbose,
|
|
||||||
input=None,
|
|
||||||
user_id=None,
|
|
||||||
response_obj=None,
|
|
||||||
start_time=datetime.datetime.now(),
|
|
||||||
end_time=datetime.datetime.now(),
|
|
||||||
error=None,
|
|
||||||
):
|
|
||||||
# Method definition
|
|
||||||
try:
|
|
||||||
print_verbose(f"LLMonitor Logging - Logging request for model {model}")
|
|
||||||
|
|
||||||
if response_obj:
|
|
||||||
usage = (
|
|
||||||
parse_usage(response_obj["usage"])
|
|
||||||
if "usage" in response_obj
|
|
||||||
else None
|
|
||||||
)
|
|
||||||
output = response_obj["choices"] if "choices" in response_obj else None
|
|
||||||
else:
|
|
||||||
usage = None
|
|
||||||
output = None
|
|
||||||
|
|
||||||
if error:
|
|
||||||
error_obj = {"stack": error}
|
|
||||||
|
|
||||||
else:
|
|
||||||
error_obj = None
|
|
||||||
|
|
||||||
data = [
|
|
||||||
{
|
|
||||||
"type": type,
|
|
||||||
"name": model,
|
|
||||||
"runId": run_id,
|
|
||||||
"app": self.app_id,
|
|
||||||
"event": "start",
|
|
||||||
"timestamp": start_time.isoformat(),
|
|
||||||
"userId": user_id,
|
|
||||||
"input": parse_messages(input),
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"type": type,
|
|
||||||
"runId": run_id,
|
|
||||||
"app": self.app_id,
|
|
||||||
"event": event,
|
|
||||||
"error": error_obj,
|
|
||||||
"timestamp": end_time.isoformat(),
|
|
||||||
"userId": user_id,
|
|
||||||
"output": parse_messages(output),
|
|
||||||
"tokensUsage": usage,
|
|
||||||
},
|
|
||||||
]
|
|
||||||
|
|
||||||
print_verbose(f"LLMonitor Logging - final data object: {data}")
|
|
||||||
|
|
||||||
response = requests.post(
|
|
||||||
self.api_url + "/api/report",
|
|
||||||
headers={"Content-Type": "application/json"},
|
|
||||||
json={"events": data},
|
|
||||||
)
|
|
||||||
|
|
||||||
print_verbose(f"LLMonitor Logging - response: {response}")
|
|
||||||
except:
|
|
||||||
# traceback.print_exc()
|
|
||||||
print_verbose(f"LLMonitor Logging Error - {traceback.format_exc()}")
|
|
||||||
pass
|
|
167
litellm/integrations/lunary.py
Normal file
167
litellm/integrations/lunary.py
Normal file
|
@ -0,0 +1,167 @@
|
||||||
|
#### What this does ####
|
||||||
|
# On success + failure, log events to aispend.io
|
||||||
|
import datetime
|
||||||
|
import traceback
|
||||||
|
import dotenv
|
||||||
|
import os
|
||||||
|
import requests
|
||||||
|
|
||||||
|
dotenv.load_dotenv() # Loading env variables using dotenv
|
||||||
|
import traceback
|
||||||
|
import datetime, subprocess, sys
|
||||||
|
import litellm
|
||||||
|
|
||||||
|
# convert to {completion: xx, tokens: xx}
|
||||||
|
def parse_usage(usage):
|
||||||
|
return {
|
||||||
|
"completion": usage["completion_tokens"] if "completion_tokens" in usage else 0,
|
||||||
|
"prompt": usage["prompt_tokens"] if "prompt_tokens" in usage else 0,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def parse_messages(input):
|
||||||
|
if input is None:
|
||||||
|
return None
|
||||||
|
|
||||||
|
def clean_message(message):
|
||||||
|
# if is strin, return as is
|
||||||
|
if isinstance(message, str):
|
||||||
|
return message
|
||||||
|
|
||||||
|
if "message" in message:
|
||||||
|
return clean_message(message["message"])
|
||||||
|
|
||||||
|
|
||||||
|
serialized = {
|
||||||
|
"role": message.get("role"),
|
||||||
|
"content": message.get("content"),
|
||||||
|
}
|
||||||
|
|
||||||
|
# Only add tool_calls and function_call to res if they are set
|
||||||
|
if message.get("tool_calls"):
|
||||||
|
serialized["tool_calls"] = message.get("tool_calls")
|
||||||
|
if message.get("function_call"):
|
||||||
|
serialized["function_call"] = message.get("function_call")
|
||||||
|
|
||||||
|
return serialized
|
||||||
|
|
||||||
|
if isinstance(input, list):
|
||||||
|
if len(input) == 1:
|
||||||
|
return clean_message(input[0])
|
||||||
|
else:
|
||||||
|
return [clean_message(msg) for msg in input]
|
||||||
|
else:
|
||||||
|
return clean_message(input)
|
||||||
|
|
||||||
|
|
||||||
|
class LunaryLogger:
|
||||||
|
# Class variables or attributes
|
||||||
|
def __init__(self):
|
||||||
|
try:
|
||||||
|
import lunary
|
||||||
|
# lunary.__version__ doesn't exist throws if lunary is not installed
|
||||||
|
if not hasattr(lunary, "track_event"):
|
||||||
|
raise ImportError
|
||||||
|
|
||||||
|
self.lunary_client = lunary
|
||||||
|
except ImportError:
|
||||||
|
print("Lunary not installed. Installing now...")
|
||||||
|
subprocess.check_call([sys.executable, "-m", "pip", "install", "lunary", "--upgrade"])
|
||||||
|
import importlib
|
||||||
|
import lunary
|
||||||
|
importlib.reload(lunary)
|
||||||
|
|
||||||
|
self.lunary_client = lunary
|
||||||
|
|
||||||
|
|
||||||
|
def log_event(
|
||||||
|
self,
|
||||||
|
kwargs,
|
||||||
|
type,
|
||||||
|
event,
|
||||||
|
run_id,
|
||||||
|
model,
|
||||||
|
print_verbose,
|
||||||
|
extra=None,
|
||||||
|
input=None,
|
||||||
|
user_id=None,
|
||||||
|
response_obj=None,
|
||||||
|
start_time=datetime.datetime.now(),
|
||||||
|
end_time=datetime.datetime.now(),
|
||||||
|
error=None,
|
||||||
|
):
|
||||||
|
# Method definition
|
||||||
|
try:
|
||||||
|
|
||||||
|
print_verbose(f"Lunary Logging - Logging request for model {model}")
|
||||||
|
|
||||||
|
litellm_params = kwargs.get("litellm_params", {})
|
||||||
|
metadata = (
|
||||||
|
litellm_params.get("metadata", {}) or {}
|
||||||
|
)
|
||||||
|
|
||||||
|
tags = litellm_params.pop("tags", None) or []
|
||||||
|
|
||||||
|
template_id = extra.pop("templateId", None),
|
||||||
|
|
||||||
|
for param, value in extra.items():
|
||||||
|
if not isinstance(value, (str, int, bool, float)):
|
||||||
|
try:
|
||||||
|
extra[param] = str(value)
|
||||||
|
except:
|
||||||
|
# if casting value to str fails don't block logging
|
||||||
|
pass
|
||||||
|
|
||||||
|
if response_obj:
|
||||||
|
usage = (
|
||||||
|
parse_usage(response_obj["usage"])
|
||||||
|
if "usage" in response_obj
|
||||||
|
else None
|
||||||
|
)
|
||||||
|
output = response_obj["choices"] if "choices" in response_obj else None
|
||||||
|
else:
|
||||||
|
usage = None
|
||||||
|
output = None
|
||||||
|
|
||||||
|
if error:
|
||||||
|
error_obj = {"stack": error}
|
||||||
|
else:
|
||||||
|
error_obj = None
|
||||||
|
|
||||||
|
print(start_time.isoformat())
|
||||||
|
|
||||||
|
self.lunary_client.track_event(
|
||||||
|
type,
|
||||||
|
"start",
|
||||||
|
run_id,
|
||||||
|
user_id=user_id,
|
||||||
|
name=model,
|
||||||
|
input=parse_messages(input),
|
||||||
|
timestamp=start_time.isoformat(),
|
||||||
|
# template_id=template_id,
|
||||||
|
metadata=metadata,
|
||||||
|
runtime="litellm",
|
||||||
|
tags=tags,
|
||||||
|
extra=extra,
|
||||||
|
# user_props=user_props,
|
||||||
|
)
|
||||||
|
|
||||||
|
self.lunary_client.track_event(
|
||||||
|
type,
|
||||||
|
event,
|
||||||
|
run_id,
|
||||||
|
timestamp=end_time.isoformat(),
|
||||||
|
runtime="litellm",
|
||||||
|
error=error_obj,
|
||||||
|
output=parse_messages(output),
|
||||||
|
token_usage={
|
||||||
|
"prompt": usage.get("prompt_tokens"),
|
||||||
|
"completion": usage.get("completion_tokens"),
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
except:
|
||||||
|
# traceback.print_exc()
|
||||||
|
print_verbose(f"Lunary Logging Error - {traceback.format_exc()}")
|
||||||
|
pass
|
|
@ -1,76 +0,0 @@
|
||||||
# #### What this tests ####
|
|
||||||
# # This tests if logging to the llmonitor integration actually works
|
|
||||||
# # Adds the parent directory to the system path
|
|
||||||
# import sys
|
|
||||||
# import os
|
|
||||||
|
|
||||||
# sys.path.insert(0, os.path.abspath("../.."))
|
|
||||||
|
|
||||||
# from litellm import completion, embedding
|
|
||||||
# import litellm
|
|
||||||
|
|
||||||
# litellm.success_callback = ["llmonitor"]
|
|
||||||
# litellm.failure_callback = ["llmonitor"]
|
|
||||||
|
|
||||||
# litellm.set_verbose = True
|
|
||||||
|
|
||||||
|
|
||||||
# def test_chat_openai():
|
|
||||||
# try:
|
|
||||||
# response = completion(
|
|
||||||
# model="gpt-3.5-turbo",
|
|
||||||
# messages=[{"role": "user", "content": "Hi 👋 - i'm openai"}],
|
|
||||||
# user="ishaan_from_litellm"
|
|
||||||
# )
|
|
||||||
|
|
||||||
# print(response)
|
|
||||||
|
|
||||||
# except Exception as e:
|
|
||||||
# print(e)
|
|
||||||
|
|
||||||
|
|
||||||
# def test_embedding_openai():
|
|
||||||
# try:
|
|
||||||
# response = embedding(model="text-embedding-ada-002", input=["test"])
|
|
||||||
# # Add any assertions here to check the response
|
|
||||||
# print(f"response: {str(response)[:50]}")
|
|
||||||
# except Exception as e:
|
|
||||||
# print(e)
|
|
||||||
|
|
||||||
|
|
||||||
# test_chat_openai()
|
|
||||||
# # test_embedding_openai()
|
|
||||||
|
|
||||||
|
|
||||||
# def test_llmonitor_logging_function_calling():
|
|
||||||
# function1 = [
|
|
||||||
# {
|
|
||||||
# "name": "get_current_weather",
|
|
||||||
# "description": "Get the current weather in a given location",
|
|
||||||
# "parameters": {
|
|
||||||
# "type": "object",
|
|
||||||
# "properties": {
|
|
||||||
# "location": {
|
|
||||||
# "type": "string",
|
|
||||||
# "description": "The city and state, e.g. San Francisco, CA",
|
|
||||||
# },
|
|
||||||
# "unit": {"type": "string", "enum": ["celsius", "fahrenheit"]},
|
|
||||||
# },
|
|
||||||
# "required": ["location"],
|
|
||||||
# },
|
|
||||||
# }
|
|
||||||
# ]
|
|
||||||
# try:
|
|
||||||
# response = completion(model="gpt-3.5-turbo",
|
|
||||||
# messages=[{
|
|
||||||
# "role": "user",
|
|
||||||
# "content": "what's the weather in boston"
|
|
||||||
# }],
|
|
||||||
# temperature=0.1,
|
|
||||||
# functions=function1,
|
|
||||||
# )
|
|
||||||
# print(response)
|
|
||||||
# except Exception as e:
|
|
||||||
# print(e)
|
|
||||||
|
|
||||||
# # test_llmonitor_logging_function_calling()
|
|
70
litellm/tests/test_lunary.py
Normal file
70
litellm/tests/test_lunary.py
Normal file
|
@ -0,0 +1,70 @@
|
||||||
|
import sys
|
||||||
|
import os
|
||||||
|
import io
|
||||||
|
|
||||||
|
sys.path.insert(0, os.path.abspath("../.."))
|
||||||
|
|
||||||
|
from litellm import completion
|
||||||
|
import litellm
|
||||||
|
|
||||||
|
litellm.success_callback = ["lunary"]
|
||||||
|
litellm.set_verbose = True
|
||||||
|
import time
|
||||||
|
|
||||||
|
|
||||||
|
def test_lunary_logging():
|
||||||
|
try:
|
||||||
|
response = completion(
|
||||||
|
model="gpt-3.5-turbo",
|
||||||
|
messages=[{"role": "user", "content": "what llm are u"}],
|
||||||
|
max_tokens=10,
|
||||||
|
temperature=0.2,
|
||||||
|
)
|
||||||
|
print(response)
|
||||||
|
except Exception as e:
|
||||||
|
print(e)
|
||||||
|
|
||||||
|
|
||||||
|
test_lunary_logging()
|
||||||
|
|
||||||
|
|
||||||
|
def test_lunary_logging_with_metadata():
|
||||||
|
try:
|
||||||
|
response = completion(
|
||||||
|
model="gpt-3.5-turbo",
|
||||||
|
messages=[{"role": "user", "content": "what llm are u"}],
|
||||||
|
max_tokens=10,
|
||||||
|
temperature=0.2,
|
||||||
|
metadata={
|
||||||
|
"run_name": "litellmRUN",
|
||||||
|
"project_name": "litellm-completion",
|
||||||
|
},
|
||||||
|
)
|
||||||
|
print(response)
|
||||||
|
except Exception as e:
|
||||||
|
print(e)
|
||||||
|
|
||||||
|
|
||||||
|
# test_lunary_logging_with_metadata()
|
||||||
|
|
||||||
|
|
||||||
|
def test_lunary_logging_with_streaming_and_metadata():
|
||||||
|
try:
|
||||||
|
response = completion(
|
||||||
|
model="gpt-3.5-turbo",
|
||||||
|
messages=[{"role": "user", "content": "what llm are u"}],
|
||||||
|
max_tokens=10,
|
||||||
|
temperature=0.2,
|
||||||
|
metadata={
|
||||||
|
"run_name": "litellmRUN",
|
||||||
|
"project_name": "litellm-completion",
|
||||||
|
},
|
||||||
|
stream=True,
|
||||||
|
)
|
||||||
|
for chunk in response:
|
||||||
|
continue
|
||||||
|
except Exception as e:
|
||||||
|
print(e)
|
||||||
|
|
||||||
|
|
||||||
|
# test_lunary_logging_with_streaming_and_metadata()
|
|
@ -60,7 +60,7 @@ from .integrations.helicone import HeliconeLogger
|
||||||
from .integrations.aispend import AISpendLogger
|
from .integrations.aispend import AISpendLogger
|
||||||
from .integrations.berrispend import BerriSpendLogger
|
from .integrations.berrispend import BerriSpendLogger
|
||||||
from .integrations.supabase import Supabase
|
from .integrations.supabase import Supabase
|
||||||
from .integrations.llmonitor import LLMonitorLogger
|
from .integrations.lunary import LunaryLogger
|
||||||
from .integrations.prompt_layer import PromptLayerLogger
|
from .integrations.prompt_layer import PromptLayerLogger
|
||||||
from .integrations.langsmith import LangsmithLogger
|
from .integrations.langsmith import LangsmithLogger
|
||||||
from .integrations.weights_biases import WeightsBiasesLogger
|
from .integrations.weights_biases import WeightsBiasesLogger
|
||||||
|
@ -126,7 +126,7 @@ dynamoLogger = None
|
||||||
s3Logger = None
|
s3Logger = None
|
||||||
genericAPILogger = None
|
genericAPILogger = None
|
||||||
clickHouseLogger = None
|
clickHouseLogger = None
|
||||||
llmonitorLogger = None
|
lunaryLogger = None
|
||||||
aispendLogger = None
|
aispendLogger = None
|
||||||
berrispendLogger = None
|
berrispendLogger = None
|
||||||
supabaseClient = None
|
supabaseClient = None
|
||||||
|
@ -788,7 +788,7 @@ class CallTypes(Enum):
|
||||||
|
|
||||||
# Logging function -> log the exact model details + what's being sent | Non-BlockingP
|
# Logging function -> log the exact model details + what's being sent | Non-BlockingP
|
||||||
class Logging:
|
class Logging:
|
||||||
global supabaseClient, liteDebuggerClient, promptLayerLogger, weightsBiasesLogger, langsmithLogger, capture_exception, add_breadcrumb, llmonitorLogger
|
global supabaseClient, liteDebuggerClient, promptLayerLogger, weightsBiasesLogger, langsmithLogger, capture_exception, add_breadcrumb, lunaryLogger
|
||||||
|
|
||||||
def __init__(
|
def __init__(
|
||||||
self,
|
self,
|
||||||
|
@ -1327,27 +1327,28 @@ class Logging:
|
||||||
end_time=end_time,
|
end_time=end_time,
|
||||||
print_verbose=print_verbose,
|
print_verbose=print_verbose,
|
||||||
)
|
)
|
||||||
if callback == "llmonitor":
|
if callback == "lunary":
|
||||||
print_verbose("reaches llmonitor for logging!")
|
print_verbose("reaches lunary for logging!")
|
||||||
model = self.model
|
model = self.model
|
||||||
|
|
||||||
input = self.model_call_details.get(
|
input = self.model_call_details.get(
|
||||||
"messages", self.model_call_details.get("input", None)
|
"messages", self.model_call_details.get("input", None)
|
||||||
)
|
)
|
||||||
|
|
||||||
# if contains input, it's 'embedding', otherwise 'llm'
|
|
||||||
type = (
|
type = (
|
||||||
"embed"
|
"embed"
|
||||||
if self.call_type == CallTypes.embedding.value
|
if self.call_type == CallTypes.embedding.value
|
||||||
else "llm"
|
else "llm"
|
||||||
)
|
)
|
||||||
|
|
||||||
llmonitorLogger.log_event(
|
lunaryLogger.log_event(
|
||||||
type=type,
|
type=type,
|
||||||
|
kwargs=self.model_call_details,
|
||||||
event="end",
|
event="end",
|
||||||
model=model,
|
model=model,
|
||||||
input=input,
|
input=input,
|
||||||
user_id=self.model_call_details.get("user", "default"),
|
user_id=self.model_call_details.get("user", "default"),
|
||||||
|
extra=self.model_call_details.get("optional_params", {}),
|
||||||
response_obj=result,
|
response_obj=result,
|
||||||
start_time=start_time,
|
start_time=start_time,
|
||||||
end_time=end_time,
|
end_time=end_time,
|
||||||
|
@ -1842,8 +1843,8 @@ class Logging:
|
||||||
call_type=self.call_type,
|
call_type=self.call_type,
|
||||||
stream=self.stream,
|
stream=self.stream,
|
||||||
)
|
)
|
||||||
elif callback == "llmonitor":
|
elif callback == "lunary":
|
||||||
print_verbose("reaches llmonitor for logging error!")
|
print_verbose("reaches lunary for logging error!")
|
||||||
|
|
||||||
model = self.model
|
model = self.model
|
||||||
|
|
||||||
|
@ -1855,7 +1856,7 @@ class Logging:
|
||||||
else "llm"
|
else "llm"
|
||||||
)
|
)
|
||||||
|
|
||||||
llmonitorLogger.log_event(
|
lunaryLogger.log_event(
|
||||||
type=_type,
|
type=_type,
|
||||||
event="error",
|
event="error",
|
||||||
user_id=self.model_call_details.get("user", "default"),
|
user_id=self.model_call_details.get("user", "default"),
|
||||||
|
@ -5593,7 +5594,7 @@ def validate_environment(model: Optional[str] = None) -> dict:
|
||||||
|
|
||||||
|
|
||||||
def set_callbacks(callback_list, function_id=None):
|
def set_callbacks(callback_list, function_id=None):
|
||||||
global sentry_sdk_instance, capture_exception, add_breadcrumb, posthog, slack_app, alerts_channel, traceloopLogger, athinaLogger, heliconeLogger, aispendLogger, berrispendLogger, supabaseClient, liteDebuggerClient, llmonitorLogger, promptLayerLogger, langFuseLogger, customLogger, weightsBiasesLogger, langsmithLogger, dynamoLogger, s3Logger
|
global sentry_sdk_instance, capture_exception, add_breadcrumb, posthog, slack_app, alerts_channel, traceloopLogger, athinaLogger, heliconeLogger, aispendLogger, berrispendLogger, supabaseClient, liteDebuggerClient, lunaryLogger, promptLayerLogger, langFuseLogger, customLogger, weightsBiasesLogger, langsmithLogger, dynamoLogger, s3Logger
|
||||||
try:
|
try:
|
||||||
for callback in callback_list:
|
for callback in callback_list:
|
||||||
print_verbose(f"callback: {callback}")
|
print_verbose(f"callback: {callback}")
|
||||||
|
@ -5653,8 +5654,8 @@ def set_callbacks(callback_list, function_id=None):
|
||||||
print_verbose("Initialized Athina Logger")
|
print_verbose("Initialized Athina Logger")
|
||||||
elif callback == "helicone":
|
elif callback == "helicone":
|
||||||
heliconeLogger = HeliconeLogger()
|
heliconeLogger = HeliconeLogger()
|
||||||
elif callback == "llmonitor":
|
elif callback == "lunary":
|
||||||
llmonitorLogger = LLMonitorLogger()
|
lunaryLogger = LunaryLogger()
|
||||||
elif callback == "promptlayer":
|
elif callback == "promptlayer":
|
||||||
promptLayerLogger = PromptLayerLogger()
|
promptLayerLogger = PromptLayerLogger()
|
||||||
elif callback == "langfuse":
|
elif callback == "langfuse":
|
||||||
|
@ -5692,7 +5693,7 @@ def set_callbacks(callback_list, function_id=None):
|
||||||
|
|
||||||
# NOTE: DEPRECATING this in favor of using failure_handler() in Logging:
|
# NOTE: DEPRECATING this in favor of using failure_handler() in Logging:
|
||||||
def handle_failure(exception, traceback_exception, start_time, end_time, args, kwargs):
|
def handle_failure(exception, traceback_exception, start_time, end_time, args, kwargs):
|
||||||
global sentry_sdk_instance, capture_exception, add_breadcrumb, posthog, slack_app, alerts_channel, aispendLogger, berrispendLogger, supabaseClient, liteDebuggerClient, llmonitorLogger
|
global sentry_sdk_instance, capture_exception, add_breadcrumb, posthog, slack_app, alerts_channel, aispendLogger, berrispendLogger, supabaseClient, liteDebuggerClient, lunaryLogger
|
||||||
try:
|
try:
|
||||||
# print_verbose(f"handle_failure args: {args}")
|
# print_verbose(f"handle_failure args: {args}")
|
||||||
# print_verbose(f"handle_failure kwargs: {kwargs}")
|
# print_verbose(f"handle_failure kwargs: {kwargs}")
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue