Update MLflow calllback and documentation (#7809)

* Update MLlfow tracer

Signed-off-by: B-Step62 <yuki.watanabe@databricks.com>

* doc update

Signed-off-by: B-Step62 <yuki.watanabe@databricks.com>

* doc update

Signed-off-by: B-Step62 <yuki.watanabe@databricks.com>

* image rename

Signed-off-by: B-Step62 <yuki.watanabe@databricks.com>

---------

Signed-off-by: B-Step62 <yuki.watanabe@databricks.com>
This commit is contained in:
Yuki Watanabe 2025-01-22 13:56:48 +09:00 committed by GitHub
parent 4978669273
commit 3f053fc99c
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
11 changed files with 180 additions and 22 deletions

View file

@ -175,12 +175,12 @@ for part in response:
## Logging Observability ([Docs](https://docs.litellm.ai/docs/observability/callbacks))
LiteLLM exposes pre defined callbacks to send data to Lunary, Langfuse, DynamoDB, s3 Buckets, Helicone, Promptlayer, Traceloop, Athina, Slack, MLflow
LiteLLM exposes pre defined callbacks to send data to Lunary, MLflow, Langfuse, DynamoDB, s3 Buckets, Helicone, Promptlayer, Traceloop, Athina, Slack
```python
from litellm import completion
## set env variables for logging tools
## set env variables for logging tools (when using MLflow, no API key set up is required)
os.environ["LUNARY_PUBLIC_KEY"] = "your-lunary-public-key"
os.environ["HELICONE_API_KEY"] = "your-helicone-auth-key"
os.environ["LANGFUSE_PUBLIC_KEY"] = ""
@ -190,7 +190,7 @@ os.environ["ATHINA_API_KEY"] = "your-athina-api-key"
os.environ["OPENAI_API_KEY"]
# set callbacks
litellm.success_callback = ["lunary", "langfuse", "athina", "helicone"] # log input/output to lunary, langfuse, supabase, athina, helicone etc
litellm.success_callback = ["lunary", "mlflow", "langfuse", "athina", "helicone"] # log input/output to lunary, langfuse, supabase, athina, helicone etc
#openai call
response = completion(model="anthropic/claude-3-sonnet-20240229", messages=[{"role": "user", "content": "Hi 👋 - i'm openai"}])

View file

@ -80,12 +80,12 @@ except OpenAIError as e:
## Logging Observability - Log LLM Input/Output ([Docs](https://docs.litellm.ai/docs/observability/callbacks))
LiteLLM exposes pre defined callbacks to send data to Lunary, Langfuse, Helicone, Promptlayer, Traceloop, Slack
LiteLLM exposes pre defined callbacks to send data to MLflow, Lunary, Langfuse, Helicone, Promptlayer, Traceloop, Slack
```python
from litellm import completion
## set env variables for logging tools
## set env variables for logging tools (API key set up is not required when using MLflow)
os.environ["LUNARY_PUBLIC_KEY"] = "your-lunary-public-key" # get your public key at https://app.lunary.ai/settings
os.environ["HELICONE_API_KEY"] = "your-helicone-key"
os.environ["LANGFUSE_PUBLIC_KEY"] = ""
@ -94,7 +94,7 @@ os.environ["LANGFUSE_SECRET_KEY"] = ""
os.environ["OPENAI_API_KEY"]
# set callbacks
litellm.success_callback = ["lunary", "langfuse", "helicone"] # log input/output to langfuse, lunary, supabase, helicone
litellm.success_callback = ["lunary", "mlflow", "langfuse", "helicone"] # log input/output to MLflow, langfuse, lunary, helicone
#openai call
response = completion(model="gpt-3.5-turbo", messages=[{"role": "user", "content": "Hi 👋 - i'm openai"}])

View file

@ -393,12 +393,12 @@ except OpenAIError as e:
```
### Logging Observability - Log LLM Input/Output ([Docs](https://docs.litellm.ai/docs/observability/callbacks))
LiteLLM exposes pre defined callbacks to send data to Lunary, Langfuse, Helicone, Promptlayer, Traceloop, Slack
LiteLLM exposes pre defined callbacks to send data to Lunary, MLflow, Langfuse, Helicone, Promptlayer, Traceloop, Slack
```python
from litellm import completion
## set env variables for logging tools
## set env variables for logging tools (API key set up is not required when using MLflow)
os.environ["LUNARY_PUBLIC_KEY"] = "your-lunary-public-key" # get your public key at https://app.lunary.ai/settings
os.environ["HELICONE_API_KEY"] = "your-helicone-key"
os.environ["LANGFUSE_PUBLIC_KEY"] = ""
@ -407,7 +407,7 @@ os.environ["LANGFUSE_SECRET_KEY"] = ""
os.environ["OPENAI_API_KEY"]
# set callbacks
litellm.success_callback = ["lunary", "langfuse", "helicone"] # log input/output to lunary, langfuse, supabase, helicone
litellm.success_callback = ["lunary", "mlflow", "langfuse", "helicone"] # log input/output to lunary, mlflow, langfuse, helicone
#openai call
response = completion(model="gpt-3.5-turbo", messages=[{"role": "user", "content": "Hi 👋 - i'm openai"}])

View file

@ -111,6 +111,30 @@ chat.invoke(messages)
</TabItem>
</Tabs>
## Use Langchain ChatLiteLLM with MLflow
MLflow provides open-source observability solution for ChatLiteLLM.
To enable the integration, simply call `mlflow.litellm.autolog()` before in your code. No other setup is necessary.
```python
import mlflow
mlflow.litellm.autolog()
```
Once the auto-tracing is enabled, you can invoke `ChatLiteLLM` and see recorded traces in MLflow.
```python
import os
from langchain.chat_models import ChatLiteLLM
os.environ['OPENAI_API_KEY']="sk-..."
chat = ChatLiteLLM(model="gpt-4o-mini")
chat.invoke("Hi!")
```
## Use Langchain ChatLiteLLM with Lunary
```python
import os

View file

@ -1,4 +1,6 @@
# MLflow
import Image from '@theme/IdealImage';
# 🔁 MLflow - OSS LLM Observability and Evaluation
## What is MLflow?
@ -18,7 +20,7 @@ Install MLflow:
pip install mlflow
```
To enable LiteLLM tracing:
To enable MLflow auto tracing for LiteLLM:
```python
import mlflow
@ -29,9 +31,9 @@ mlflow.litellm.autolog()
# litellm.callbacks = ["mlflow"]
```
Since MLflow is open-source, no sign-up or API key is needed to log traces!
Since MLflow is open-source and free, **no sign-up or API key is needed to log traces!**
```
```python
import litellm
import os
@ -53,6 +55,63 @@ Open the MLflow UI and go to the `Traces` tab to view logged traces:
mlflow ui
```
## Tracing Tool Calls
MLflow integration with LiteLLM support tracking tool calls in addition to the messages.
```python
import mlflow
# Enable MLflow auto-tracing for LiteLLM
mlflow.litellm.autolog()
# Define the tool function.
def get_weather(location: str) -> str:
if location == "Tokyo":
return "sunny"
elif location == "Paris":
return "rainy"
return "unknown"
# Define function spec
get_weather_tool = {
"type": "function",
"function": {
"name": "get_weather",
"description": "Get the current weather in a given location",
"parameters": {
"properties": {
"location": {
"description": "The city and state, e.g., San Francisco, CA",
"type": "string",
},
},
"required": ["location"],
"type": "object",
},
},
}
# Call LiteLLM as usual
response = litellm.completion(
model="gpt-4o-mini",
messages=[
{"role": "user", "content": "What's the weather like in Paris today?"}
],
tools=[get_weather_tool]
)
```
<Image img={require('../../img/mlflow_tool_calling_tracing.png')} />
## Evaluation
MLflow LiteLLM integration allow you to run qualitative assessment against LLM to evaluate or/and monitor your GenAI application.
Visit [Evaluate LLMs Tutorial](../tutorials/eval_suites.md) for the complete guidance on how to run evaluation suite with LiteLLM and MLflow.
## Exporting Traces to OpenTelemetry collectors
MLflow traces are compatible with OpenTelemetry. You can export traces to any OpenTelemetry collector (e.g., Jaeger, Zipkin, Datadog, New Relic) by setting the endpoint URL in the environment variables.
@ -75,7 +134,7 @@ import litellm
import mlflow
from mlflow.entities import SpanType
# Enable LiteLLM tracing
# Enable MLflow auto-tracing for LiteLLM
mlflow.litellm.autolog()

View file

@ -30,7 +30,7 @@ import TabItem from '@theme/TabItem';
6. [**litellm.completion() / litellm.embedding()**:](../index#litellm-python-sdk) The litellm Python SDK is used to call the LLM in the OpenAI API format (Translation and parameter mapping)
7. **Post-Request Processing**: After the response is sent back to the client, the following **asynchronous** tasks are performed:
- [Logging to Lunary, LangFuse or other logging destinations](./logging)
- [Logging to Lunary, MLflow, LangFuse or other logging destinations](./logging)
- The [MaxParallelRequestsHandler](https://github.com/BerriAI/litellm/blob/main/litellm/proxy/hooks/parallel_request_limiter.py) updates the rpm/tpm usage for the
- Global Server Rate Limit
- Virtual Key Rate Limit

View file

@ -3,6 +3,7 @@
Log Proxy input, output, and exceptions using:
- Lunary
- MLflow
- Langfuse
- OpenTelemetry
- GCS, s3, Azure (Blob) Buckets
@ -232,6 +233,58 @@ curl -X POST 'http://0.0.0.0:4000/chat/completions' \
}'
```
## MLflow
### Step1: Install dependencies
Install the dependencies.
```shell
pip install litellm mlflow
```
### Step 2: Create a `config.yaml` with `mlflow` callback
```yaml
model_list:
- model_name: "*"
litellm_params:
model: "*"
litellm_settings:
success_callback: ["mlflow"]
failure_callback: ["mlflow"]
```
### Step 3: Start the LiteLLM proxy
```shell
litellm --config config.yaml
```
### Step 4: Make a request
```shell
curl -X POST 'http://0.0.0.0:4000/chat/completions' \
-H 'Content-Type: application/json' \
-d '{
"model": "gpt-4o-mini",
"messages": [
{
"role": "user",
"content": "What is the capital of France?"
}
]
}'
```
### Step 5: Review traces
Run the following command to start MLflow UI and review recorded traces.
```shell
mlflow ui
```
## Langfuse
We will use the `--config` to set `litellm.success_callback = ["langfuse"]` this will log all successfull LLM calls to langfuse. Make sure to set `LANGFUSE_PUBLIC_KEY` and `LANGFUSE_SECRET_KEY` in your environment

View file

@ -2,9 +2,9 @@ import Image from '@theme/IdealImage';
import Tabs from '@theme/Tabs';
import TabItem from '@theme/TabItem';
# Evaluate LLMs - ML Flow Evals, Auto Eval
# Evaluate LLMs - MLflow Evals, Auto Eval
## Using LiteLLM with ML Flow
## Using LiteLLM with MLflow
MLflow provides an API `mlflow.evaluate()` to help evaluate your LLMs https://mlflow.org/docs/latest/llms/llm-evaluate/index.html
### Pre Requisites
@ -153,7 +153,7 @@ $ litellm --model command-nightly
</Tabs>
### Step 2: Run ML Flow
### Step 2: Run MLflow
Before running the eval we will set `openai.api_base` to the litellm proxy from Step 1
```python
@ -209,7 +209,7 @@ with mlflow.start_run() as run:
```
### ML Flow Output
### MLflow Output
```
{'toxicity/v1/mean': 0.00014476531214313582, 'toxicity/v1/variance': 2.5759661361262862e-12, 'toxicity/v1/p90': 0.00014604929747292773, 'toxicity/v1/ratio': 0.0, 'exact_match/v1': 0.0}
Downloading artifacts: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 1890.18it/s]

Binary file not shown.

After

Width:  |  Height:  |  Size: 85 KiB

View file

@ -331,12 +331,12 @@ except OpenAIError as e:
```
### Logging Observability - Log LLM Input/Output ([Docs](https://docs.litellm.ai/docs/observability/callbacks))
LiteLLM exposes pre defined callbacks to send data to Lunary, Langfuse, Helicone, Promptlayer, Traceloop, Slack
LiteLLM exposes pre defined callbacks to send data to MLflow, Lunary, Langfuse, Helicone, Promptlayer, Traceloop, Slack
```python
from litellm import completion
## set env variables for logging tools
## set env variables for logging tools (API key set up is not required when using MLflow)
os.environ["LUNARY_PUBLIC_KEY"] = "your-lunary-public-key" # get your key at https://app.lunary.ai/settings
os.environ["HELICONE_API_KEY"] = "your-helicone-key"
os.environ["LANGFUSE_PUBLIC_KEY"] = ""
@ -345,7 +345,7 @@ os.environ["LANGFUSE_SECRET_KEY"] = ""
os.environ["OPENAI_API_KEY"]
# set callbacks
litellm.success_callback = ["lunary", "langfuse", "helicone"] # log input/output to lunary, langfuse, supabase, helicone
litellm.success_callback = ["lunary", "mlflow", "langfuse", "helicone"] # log input/output to lunary, mlflow, langfuse, helicone
#openai call
response = completion(model="gpt-3.5-turbo", messages=[{"role": "user", "content": "Hi 👋 - i'm openai"}])

View file

@ -36,6 +36,7 @@ class MlflowLogger(CustomLogger):
else:
span = self._start_span_or_trace(kwargs, start_time)
end_time_ns = int(end_time.timestamp() * 1e9)
self._extract_and_set_chat_attributes(span, kwargs, response_obj)
self._end_span_or_trace(
span=span,
outputs=response_obj,
@ -45,6 +46,21 @@ class MlflowLogger(CustomLogger):
except Exception:
verbose_logger.debug("MLflow Logging Error", stack_info=True)
def _extract_and_set_chat_attributes(self, span, kwargs, response_obj):
try:
from mlflow.tracing.utils import set_span_chat_messages, set_span_chat_tools
except ImportError:
return
inputs = self._construct_input(kwargs)
input_messages = inputs.get("messages", [])
output_messages = [c.message.model_dump(exclude_none=True)
for c in getattr(response_obj, "choices", [])]
if messages := [*input_messages, *output_messages]:
set_span_chat_messages(span, messages)
if tools := inputs.get("tools"):
set_span_chat_tools(span, tools)
def log_failure_event(self, kwargs, response_obj, start_time, end_time):
self._handle_failure(kwargs, response_obj, start_time, end_time)
@ -67,6 +83,7 @@ class MlflowLogger(CustomLogger):
if exception := kwargs.get("exception"):
span.add_event(SpanEvent.from_exception(exception)) # type: ignore
self._extract_and_set_chat_attributes(span, kwargs, response_obj)
self._end_span_or_trace(
span=span,
outputs=response_obj,
@ -107,6 +124,8 @@ class MlflowLogger(CustomLogger):
# has complete_streaming_response that gathers the full response.
if final_response := kwargs.get("complete_streaming_response"):
end_time_ns = int(end_time.timestamp() * 1e9)
self._extract_and_set_chat_attributes(span, kwargs, final_response)
self._end_span_or_trace(
span=span,
outputs=final_response,
@ -135,6 +154,9 @@ class MlflowLogger(CustomLogger):
def _construct_input(self, kwargs):
"""Construct span inputs with optional parameters"""
inputs = {"messages": kwargs.get("messages")}
if tools := kwargs.get("tools"):
inputs["tools"] = tools
for key in ["functions", "tools", "stream", "tool_choice", "user"]:
if value := kwargs.get("optional_params", {}).pop(key, None):
inputs[key] = value