forked from phoenix/litellm-mirror
Merge branch 'main' into litellm_perf_fix
This commit is contained in:
commit
2e81b98846
10 changed files with 116 additions and 75 deletions
|
@ -41,7 +41,7 @@ RUN pip install redisvl==0.0.7 --no-deps
|
|||
# ensure pyjwt is used, not jwt
|
||||
RUN pip uninstall jwt -y
|
||||
RUN pip uninstall PyJWT -y
|
||||
RUN pip install PyJWT --no-cache-dir
|
||||
RUN pip install PyJWT==2.9.0 --no-cache-dir
|
||||
|
||||
# Build Admin UI
|
||||
RUN chmod +x build_admin_ui.sh && ./build_admin_ui.sh
|
||||
|
|
|
@ -36,8 +36,8 @@ RUN pip wheel --no-cache-dir --wheel-dir=/wheels/ -r requirements.txt
|
|||
# Runtime stage
|
||||
FROM $LITELLM_RUNTIME_IMAGE AS runtime
|
||||
|
||||
# Update dependencies and clean up - handles debian security issue
|
||||
RUN apt-get update && apt-get upgrade -y && rm -rf /var/lib/apt/lists/*
|
||||
# Update dependencies and clean up
|
||||
RUN apk update && apk upgrade && rm -rf /var/cache/apk/*
|
||||
|
||||
WORKDIR /app
|
||||
|
||||
|
|
|
@ -59,7 +59,7 @@ RUN pip install redisvl==0.0.7 --no-deps
|
|||
# ensure pyjwt is used, not jwt
|
||||
RUN pip uninstall jwt -y
|
||||
RUN pip uninstall PyJWT -y
|
||||
RUN pip install PyJWT --no-cache-dir
|
||||
RUN pip install PyJWT==2.9.0 --no-cache-dir
|
||||
|
||||
# Build Admin UI
|
||||
RUN chmod +x build_admin_ui.sh && ./build_admin_ui.sh
|
||||
|
|
|
@ -59,7 +59,7 @@ RUN pip install redisvl==0.0.7 --no-deps
|
|||
# ensure pyjwt is used, not jwt
|
||||
RUN pip uninstall jwt -y
|
||||
RUN pip uninstall PyJWT -y
|
||||
RUN pip install PyJWT --no-cache-dir
|
||||
RUN pip install PyJWT==2.9.0 --no-cache-dir
|
||||
|
||||
# Build Admin UI
|
||||
RUN chmod +x build_admin_ui.sh && ./build_admin_ui.sh
|
||||
|
|
|
@ -2,24 +2,56 @@ import Image from '@theme/IdealImage';
|
|||
|
||||
# 🪢 Langfuse - Logging LLM Input/Output
|
||||
|
||||
LangFuse is open Source Observability & Analytics for LLM Apps
|
||||
Detailed production traces and a granular view on quality, cost and latency
|
||||
## What is Langfuse?
|
||||
|
||||
<Image img={require('../../img/langfuse.png')} />
|
||||
Langfuse ([GitHub](https://github.com/langfuse/langfuse)) is an open-source LLM engineering platform for model [tracing](https://langfuse.com/docs/tracing), [prompt management](https://langfuse.com/docs/prompts/get-started), and application [evaluation](https://langfuse.com/docs/scores/overview). Langfuse helps teams to collaboratively debug, analyze, and iterate on their LLM applications.
|
||||
|
||||
## Monitoring LiteLLM with Langfuse
|
||||
|
||||
You can integrate LiteLLM with Langfuse in three different ways:
|
||||
|
||||
1. Using the LiteLLM Proxy with the OpenAI SDK Wrapper. This proxy standardizes over 100 models to the OpenAI API schema, and the Langfuse OpenAI SDK wrapper instruments the LLM calls.
|
||||
2. Enabling logging in the LiteLLM Proxy through the UI to send logs to Langfuse.
|
||||
3. Configuring the LiteLLM Python SDK to send logs to Langfuse by setting the appropriate environment variables.
|
||||
|
||||
|
||||
Example trace in Langfuse using multiple models via LiteLLM:
|
||||
<Image img={require('../../img/langfuse-example-trace-multiple-models-min')} />
|
||||
|
||||
## 1. LiteLLM Proxy + Langfuse OpenAI SDK Wrapper
|
||||
|
||||
:::info
|
||||
We want to learn how we can make the callbacks better! Meet the LiteLLM [founders](https://calendly.com/d/4mp-gd3-k5k/berriai-1-1-onboarding-litellm-hosted-version) or
|
||||
join our [discord](https://discord.gg/wuPM9dRgDw)
|
||||
:::
|
||||
This is the recommended method to integrate LiteLLM with Langfuse. The Langfuse OpenAI SDK wrapper automatically records token counts, latencies, streaming response times (time to first token), API errors, and more.
|
||||
:::
|
||||
|
||||
## Pre-Requisites
|
||||
**How this works:**
|
||||
|
||||
The [LiteLLM Proxy](https://docs.litellm.ai/docs/simple_proxy) standardizes 100+ models on the OpenAI API schema
|
||||
and the Langfuse OpenAI SDK wrapper ([Python](https://langfuse.com/docs/integrations/openai/python), [JS/TS](https://langfuse.com/docs/integrations/openai/js)) instruments the LLM calls.
|
||||
|
||||
To see a full end-to-end example, check out the LiteLLM cookbook:
|
||||
|
||||
- [Python Cookbook](https://langfuse.com/docs/integrations/litellm/example-proxy-python)
|
||||
- [JS/TS Cookbook](https://langfuse.com/docs/integrations/litellm/example-proxy-js)
|
||||
|
||||
|
||||
## 2. Send Logs from LiteLLM Proxy to Langfuse
|
||||
|
||||
By setting the callback to Langfuse in the LiteLLM UI you can instantly log your responses across all providers. For more information on how to setup the Proxy UI, see the [LiteLLM docs](../proxy/ui).
|
||||
|
||||
<Image img={require('../../img/langfuse-litellm-ui.png')} />
|
||||
|
||||
## 3. LiteLLM Python SDK
|
||||
|
||||
### Pre-Requisites
|
||||
Ensure you have run `pip install langfuse` for this integration
|
||||
```shell
|
||||
pip install langfuse>=2.0.0 litellm
|
||||
```
|
||||
|
||||
## Quick Start
|
||||
Use just 2 lines of code, to instantly log your responses **across all providers** with Langfuse
|
||||
### Quick Start
|
||||
Use just 2 lines of code, to instantly log your responses **across all providers** with Langfuse:
|
||||
|
||||
<a target="_blank" href="https://colab.research.google.com/github/BerriAI/litellm/blob/main/cookbook/logging_observability/LiteLLM_Langfuse.ipynb">
|
||||
<img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/>
|
||||
</a>
|
||||
|
@ -55,8 +87,8 @@ response = litellm.completion(
|
|||
)
|
||||
```
|
||||
|
||||
## Advanced
|
||||
### Set Custom Generation names, pass metadata
|
||||
### Advanced
|
||||
#### Set Custom Generation Names, pass Metadata
|
||||
|
||||
Pass `generation_name` in `metadata`
|
||||
|
||||
|
@ -66,13 +98,13 @@ from litellm import completion
|
|||
import os
|
||||
|
||||
# from https://cloud.langfuse.com/
|
||||
os.environ["LANGFUSE_PUBLIC_KEY"] = ""
|
||||
os.environ["LANGFUSE_SECRET_KEY"] = ""
|
||||
os.environ["LANGFUSE_PUBLIC_KEY"] = "pk-..."
|
||||
os.environ["LANGFUSE_SECRET_KEY"] = "sk-..."
|
||||
|
||||
|
||||
# OpenAI and Cohere keys
|
||||
# You can use any of the litellm supported providers: https://docs.litellm.ai/docs/providers
|
||||
os.environ['OPENAI_API_KEY']=""
|
||||
os.environ['OPENAI_API_KEY']="sk-..."
|
||||
|
||||
# set langfuse as a callback, litellm will send the data to langfuse
|
||||
litellm.success_callback = ["langfuse"]
|
||||
|
@ -94,7 +126,7 @@ print(response)
|
|||
|
||||
```
|
||||
|
||||
### Set Custom Trace ID, Trace User ID, Trace Metadata, Trace Version, Trace Release and Tags
|
||||
#### Set Custom Trace ID, Trace User ID, Trace Metadata, Trace Version, Trace Release and Tags
|
||||
|
||||
Pass `trace_id`, `trace_user_id`, `trace_metadata`, `trace_version`, `trace_release`, `tags` in `metadata`
|
||||
|
||||
|
@ -105,10 +137,10 @@ from litellm import completion
|
|||
import os
|
||||
|
||||
# from https://cloud.langfuse.com/
|
||||
os.environ["LANGFUSE_PUBLIC_KEY"] = ""
|
||||
os.environ["LANGFUSE_SECRET_KEY"] = ""
|
||||
os.environ["LANGFUSE_PUBLIC_KEY"] = "pk-..."
|
||||
os.environ["LANGFUSE_SECRET_KEY"] = "sk-..."
|
||||
|
||||
os.environ['OPENAI_API_KEY']=""
|
||||
os.environ['OPENAI_API_KEY']="sk-..."
|
||||
|
||||
# set langfuse as a callback, litellm will send the data to langfuse
|
||||
litellm.success_callback = ["langfuse"]
|
||||
|
@ -167,9 +199,9 @@ curl --location --request POST 'http://0.0.0.0:4000/chat/completions' \
|
|||
```
|
||||
|
||||
|
||||
### Trace & Generation Parameters
|
||||
#### Trace & Generation Parameters
|
||||
|
||||
#### Trace Specific Parameters
|
||||
##### Trace Specific Parameters
|
||||
|
||||
* `trace_id` - Identifier for the trace, must use `existing_trace_id` instead of `trace_id` if this is an existing trace, auto-generated by default
|
||||
* `trace_name` - Name of the trace, auto-generated by default
|
||||
|
@ -216,10 +248,10 @@ from langchain.schema import HumanMessage
|
|||
import litellm
|
||||
|
||||
# from https://cloud.langfuse.com/
|
||||
os.environ["LANGFUSE_PUBLIC_KEY"] = ""
|
||||
os.environ["LANGFUSE_SECRET_KEY"] = ""
|
||||
os.environ["LANGFUSE_PUBLIC_KEY"] = "pk-..."
|
||||
os.environ["LANGFUSE_SECRET_KEY"] = "sk-..."
|
||||
|
||||
os.environ['OPENAI_API_KEY']=""
|
||||
os.environ['OPENAI_API_KEY']="sk-..."
|
||||
|
||||
# set langfuse as a callback, litellm will send the data to langfuse
|
||||
litellm.success_callback = ["langfuse"]
|
||||
|
@ -242,13 +274,13 @@ messages = [
|
|||
chat(messages)
|
||||
```
|
||||
|
||||
## Redacting Messages, Response Content from Langfuse Logging
|
||||
### Redacting Messages, Response Content from Langfuse Logging
|
||||
|
||||
### Redact Messages and Responses from all Langfuse Logging
|
||||
#### Redact Messages and Responses from all Langfuse Logging
|
||||
|
||||
Set `litellm.turn_off_message_logging=True` This will prevent the messages and responses from being logged to langfuse, but request metadata will still be logged.
|
||||
|
||||
### Redact Messages and Responses from specific Langfuse Logging
|
||||
#### Redact Messages and Responses from specific Langfuse Logging
|
||||
|
||||
In the metadata typically passed for text completion or embedding calls you can set specific keys to mask the messages and responses for this call.
|
||||
|
||||
|
@ -258,13 +290,14 @@ Setting `mask_output` to `True` will make the output from being logged for this
|
|||
|
||||
Be aware that if you are continuing an existing trace, and you set `update_trace_keys` to include either `input` or `output` and you set the corresponding `mask_input` or `mask_output`, then that trace will have its existing input and/or output replaced with a redacted message.
|
||||
|
||||
## **Use with LiteLLM Proxy (LLM Gateway) **
|
||||
### Use with LiteLLM Proxy (LLM Gateway)
|
||||
|
||||
👉 [**Follow this link to start sending logs to langfuse with LiteLLM Proxy server**](../proxy/logging)
|
||||
|
||||
## Troubleshooting & Errors
|
||||
### Data not getting logged to Langfuse ?
|
||||
- Ensure you're on the latest version of langfuse `pip install langfuse -U`. The latest version allows litellm to log JSON input/outputs to langfuse
|
||||
- Follow [this checklist](https://langfuse.com/faq/all/missing-traces) if you don't see any traces in langfuse.
|
||||
|
||||
## Support & Talk to Founders
|
||||
|
||||
|
|
Binary file not shown.
After Width: | Height: | Size: 67 KiB |
BIN
docs/my-website/img/langfuse-litellm-ui.png
Normal file
BIN
docs/my-website/img/langfuse-litellm-ui.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 157 KiB |
45
litellm/llms/OpenAI/common_utils.py
Normal file
45
litellm/llms/OpenAI/common_utils.py
Normal file
|
@ -0,0 +1,45 @@
|
|||
"""
|
||||
Common helpers / utils across al OpenAI endpoints
|
||||
"""
|
||||
|
||||
import json
|
||||
from typing import Any, Dict, List
|
||||
|
||||
import openai
|
||||
|
||||
|
||||
####### Error Handling Utils for OpenAI API #######################
|
||||
###################################################################
|
||||
def drop_params_from_unprocessable_entity_error(
|
||||
e: openai.UnprocessableEntityError, data: Dict[str, Any]
|
||||
) -> Dict[str, Any]:
|
||||
"""
|
||||
Helper function to read OpenAI UnprocessableEntityError and drop the params that raised an error from the error message.
|
||||
|
||||
Args:
|
||||
e (UnprocessableEntityError): The UnprocessableEntityError exception
|
||||
data (Dict[str, Any]): The original data dictionary containing all parameters
|
||||
|
||||
Returns:
|
||||
Dict[str, Any]: A new dictionary with invalid parameters removed
|
||||
"""
|
||||
invalid_params: List[str] = []
|
||||
if e.body is not None and isinstance(e.body, dict) and e.body.get("message"):
|
||||
message = e.body.get("message", {})
|
||||
if isinstance(message, str):
|
||||
try:
|
||||
message = json.loads(message)
|
||||
except json.JSONDecodeError:
|
||||
message = {"detail": message}
|
||||
detail = message.get("detail")
|
||||
if isinstance(detail, List) and len(detail) > 0 and isinstance(detail[0], dict):
|
||||
for error_dict in detail:
|
||||
if (
|
||||
error_dict.get("loc")
|
||||
and isinstance(error_dict.get("loc"), list)
|
||||
and len(error_dict.get("loc")) == 2
|
||||
):
|
||||
invalid_params.append(error_dict["loc"][1])
|
||||
|
||||
new_data = {k: v for k, v in data.items() if k not in invalid_params}
|
||||
return new_data
|
|
@ -31,6 +31,7 @@ from litellm.utils import (
|
|||
from ...types.llms.openai import *
|
||||
from ..base import BaseLLM
|
||||
from ..prompt_templates.factory import custom_prompt, prompt_factory
|
||||
from .common_utils import drop_params_from_unprocessable_entity_error
|
||||
|
||||
|
||||
class OpenAIError(Exception):
|
||||
|
@ -831,27 +832,9 @@ class OpenAIChatCompletion(BaseLLM):
|
|||
except openai.UnprocessableEntityError as e:
|
||||
## check if body contains unprocessable params - related issue https://github.com/BerriAI/litellm/issues/4800
|
||||
if litellm.drop_params is True or drop_params is True:
|
||||
invalid_params: List[str] = []
|
||||
if e.body is not None and isinstance(e.body, dict) and e.body.get("detail"): # type: ignore
|
||||
detail = e.body.get("detail") # type: ignore
|
||||
if (
|
||||
isinstance(detail, List)
|
||||
and len(detail) > 0
|
||||
and isinstance(detail[0], dict)
|
||||
):
|
||||
for error_dict in detail:
|
||||
if (
|
||||
error_dict.get("loc")
|
||||
and isinstance(error_dict.get("loc"), list)
|
||||
and len(error_dict.get("loc")) == 2
|
||||
):
|
||||
invalid_params.append(error_dict["loc"][1])
|
||||
|
||||
new_data = {}
|
||||
for k, v in optional_params.items():
|
||||
if k not in invalid_params:
|
||||
new_data[k] = v
|
||||
optional_params = new_data
|
||||
optional_params = drop_params_from_unprocessable_entity_error(
|
||||
e, optional_params
|
||||
)
|
||||
else:
|
||||
raise e
|
||||
# e.message
|
||||
|
@ -967,27 +950,7 @@ class OpenAIChatCompletion(BaseLLM):
|
|||
except openai.UnprocessableEntityError as e:
|
||||
## check if body contains unprocessable params - related issue https://github.com/BerriAI/litellm/issues/4800
|
||||
if litellm.drop_params is True or drop_params is True:
|
||||
invalid_params: List[str] = []
|
||||
if e.body is not None and isinstance(e.body, dict) and e.body.get("detail"): # type: ignore
|
||||
detail = e.body.get("detail") # type: ignore
|
||||
if (
|
||||
isinstance(detail, List)
|
||||
and len(detail) > 0
|
||||
and isinstance(detail[0], dict)
|
||||
):
|
||||
for error_dict in detail:
|
||||
if (
|
||||
error_dict.get("loc")
|
||||
and isinstance(error_dict.get("loc"), list)
|
||||
and len(error_dict.get("loc")) == 2
|
||||
):
|
||||
invalid_params.append(error_dict["loc"][1])
|
||||
|
||||
new_data = {}
|
||||
for k, v in data.items():
|
||||
if k not in invalid_params:
|
||||
new_data[k] = v
|
||||
data = new_data
|
||||
data = drop_params_from_unprocessable_entity_error(e, data)
|
||||
else:
|
||||
raise e
|
||||
# e.message
|
||||
|
|
|
@ -21,7 +21,7 @@ prometheus_client==0.20.0 # for /metrics endpoint on proxy
|
|||
orjson==3.9.15 # fast /embedding responses
|
||||
apscheduler==3.10.4 # for resetting budget in background
|
||||
fastapi-sso==0.10.0 # admin UI, SSO
|
||||
pyjwt[crypto]==2.8.0
|
||||
pyjwt[crypto]==2.9.0
|
||||
python-multipart==0.0.9 # admin UI
|
||||
Pillow==10.3.0
|
||||
azure-ai-contentsafety==1.0.0 # for azure content safety
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue