mirror of
https://github.com/BerriAI/litellm.git
synced 2025-04-25 02:34:29 +00:00
Update MLflow calllback and documentation (#7809)
* Update MLlfow tracer Signed-off-by: B-Step62 <yuki.watanabe@databricks.com> * doc update Signed-off-by: B-Step62 <yuki.watanabe@databricks.com> * doc update Signed-off-by: B-Step62 <yuki.watanabe@databricks.com> * image rename Signed-off-by: B-Step62 <yuki.watanabe@databricks.com> --------- Signed-off-by: B-Step62 <yuki.watanabe@databricks.com>
This commit is contained in:
parent
4978669273
commit
3f053fc99c
11 changed files with 180 additions and 22 deletions
|
@ -175,12 +175,12 @@ for part in response:
|
|||
|
||||
## Logging Observability ([Docs](https://docs.litellm.ai/docs/observability/callbacks))
|
||||
|
||||
LiteLLM exposes pre defined callbacks to send data to Lunary, Langfuse, DynamoDB, s3 Buckets, Helicone, Promptlayer, Traceloop, Athina, Slack, MLflow
|
||||
LiteLLM exposes pre defined callbacks to send data to Lunary, MLflow, Langfuse, DynamoDB, s3 Buckets, Helicone, Promptlayer, Traceloop, Athina, Slack
|
||||
|
||||
```python
|
||||
from litellm import completion
|
||||
|
||||
## set env variables for logging tools
|
||||
## set env variables for logging tools (when using MLflow, no API key set up is required)
|
||||
os.environ["LUNARY_PUBLIC_KEY"] = "your-lunary-public-key"
|
||||
os.environ["HELICONE_API_KEY"] = "your-helicone-auth-key"
|
||||
os.environ["LANGFUSE_PUBLIC_KEY"] = ""
|
||||
|
@ -190,7 +190,7 @@ os.environ["ATHINA_API_KEY"] = "your-athina-api-key"
|
|||
os.environ["OPENAI_API_KEY"]
|
||||
|
||||
# set callbacks
|
||||
litellm.success_callback = ["lunary", "langfuse", "athina", "helicone"] # log input/output to lunary, langfuse, supabase, athina, helicone etc
|
||||
litellm.success_callback = ["lunary", "mlflow", "langfuse", "athina", "helicone"] # log input/output to lunary, langfuse, supabase, athina, helicone etc
|
||||
|
||||
#openai call
|
||||
response = completion(model="anthropic/claude-3-sonnet-20240229", messages=[{"role": "user", "content": "Hi 👋 - i'm openai"}])
|
||||
|
|
|
@ -80,12 +80,12 @@ except OpenAIError as e:
|
|||
|
||||
## Logging Observability - Log LLM Input/Output ([Docs](https://docs.litellm.ai/docs/observability/callbacks))
|
||||
|
||||
LiteLLM exposes pre defined callbacks to send data to Lunary, Langfuse, Helicone, Promptlayer, Traceloop, Slack
|
||||
LiteLLM exposes pre defined callbacks to send data to MLflow, Lunary, Langfuse, Helicone, Promptlayer, Traceloop, Slack
|
||||
|
||||
```python
|
||||
from litellm import completion
|
||||
|
||||
## set env variables for logging tools
|
||||
## set env variables for logging tools (API key set up is not required when using MLflow)
|
||||
os.environ["LUNARY_PUBLIC_KEY"] = "your-lunary-public-key" # get your public key at https://app.lunary.ai/settings
|
||||
os.environ["HELICONE_API_KEY"] = "your-helicone-key"
|
||||
os.environ["LANGFUSE_PUBLIC_KEY"] = ""
|
||||
|
@ -94,7 +94,7 @@ os.environ["LANGFUSE_SECRET_KEY"] = ""
|
|||
os.environ["OPENAI_API_KEY"]
|
||||
|
||||
# set callbacks
|
||||
litellm.success_callback = ["lunary", "langfuse", "helicone"] # log input/output to langfuse, lunary, supabase, helicone
|
||||
litellm.success_callback = ["lunary", "mlflow", "langfuse", "helicone"] # log input/output to MLflow, langfuse, lunary, helicone
|
||||
|
||||
#openai call
|
||||
response = completion(model="gpt-3.5-turbo", messages=[{"role": "user", "content": "Hi 👋 - i'm openai"}])
|
||||
|
|
|
@ -393,12 +393,12 @@ except OpenAIError as e:
|
|||
```
|
||||
|
||||
### Logging Observability - Log LLM Input/Output ([Docs](https://docs.litellm.ai/docs/observability/callbacks))
|
||||
LiteLLM exposes pre defined callbacks to send data to Lunary, Langfuse, Helicone, Promptlayer, Traceloop, Slack
|
||||
LiteLLM exposes pre defined callbacks to send data to Lunary, MLflow, Langfuse, Helicone, Promptlayer, Traceloop, Slack
|
||||
|
||||
```python
|
||||
from litellm import completion
|
||||
|
||||
## set env variables for logging tools
|
||||
## set env variables for logging tools (API key set up is not required when using MLflow)
|
||||
os.environ["LUNARY_PUBLIC_KEY"] = "your-lunary-public-key" # get your public key at https://app.lunary.ai/settings
|
||||
os.environ["HELICONE_API_KEY"] = "your-helicone-key"
|
||||
os.environ["LANGFUSE_PUBLIC_KEY"] = ""
|
||||
|
@ -407,7 +407,7 @@ os.environ["LANGFUSE_SECRET_KEY"] = ""
|
|||
os.environ["OPENAI_API_KEY"]
|
||||
|
||||
# set callbacks
|
||||
litellm.success_callback = ["lunary", "langfuse", "helicone"] # log input/output to lunary, langfuse, supabase, helicone
|
||||
litellm.success_callback = ["lunary", "mlflow", "langfuse", "helicone"] # log input/output to lunary, mlflow, langfuse, helicone
|
||||
|
||||
#openai call
|
||||
response = completion(model="gpt-3.5-turbo", messages=[{"role": "user", "content": "Hi 👋 - i'm openai"}])
|
||||
|
|
|
@ -111,6 +111,30 @@ chat.invoke(messages)
|
|||
</TabItem>
|
||||
</Tabs>
|
||||
|
||||
## Use Langchain ChatLiteLLM with MLflow
|
||||
|
||||
MLflow provides open-source observability solution for ChatLiteLLM.
|
||||
|
||||
To enable the integration, simply call `mlflow.litellm.autolog()` before in your code. No other setup is necessary.
|
||||
|
||||
```python
|
||||
import mlflow
|
||||
|
||||
mlflow.litellm.autolog()
|
||||
```
|
||||
|
||||
Once the auto-tracing is enabled, you can invoke `ChatLiteLLM` and see recorded traces in MLflow.
|
||||
|
||||
```python
|
||||
import os
|
||||
from langchain.chat_models import ChatLiteLLM
|
||||
|
||||
os.environ['OPENAI_API_KEY']="sk-..."
|
||||
|
||||
chat = ChatLiteLLM(model="gpt-4o-mini")
|
||||
chat.invoke("Hi!")
|
||||
```
|
||||
|
||||
## Use Langchain ChatLiteLLM with Lunary
|
||||
```python
|
||||
import os
|
||||
|
|
|
@ -1,4 +1,6 @@
|
|||
# MLflow
|
||||
import Image from '@theme/IdealImage';
|
||||
|
||||
# 🔁 MLflow - OSS LLM Observability and Evaluation
|
||||
|
||||
## What is MLflow?
|
||||
|
||||
|
@ -18,7 +20,7 @@ Install MLflow:
|
|||
pip install mlflow
|
||||
```
|
||||
|
||||
To enable LiteLLM tracing:
|
||||
To enable MLflow auto tracing for LiteLLM:
|
||||
|
||||
```python
|
||||
import mlflow
|
||||
|
@ -29,9 +31,9 @@ mlflow.litellm.autolog()
|
|||
# litellm.callbacks = ["mlflow"]
|
||||
```
|
||||
|
||||
Since MLflow is open-source, no sign-up or API key is needed to log traces!
|
||||
Since MLflow is open-source and free, **no sign-up or API key is needed to log traces!**
|
||||
|
||||
```
|
||||
```python
|
||||
import litellm
|
||||
import os
|
||||
|
||||
|
@ -53,6 +55,63 @@ Open the MLflow UI and go to the `Traces` tab to view logged traces:
|
|||
mlflow ui
|
||||
```
|
||||
|
||||
## Tracing Tool Calls
|
||||
|
||||
MLflow integration with LiteLLM support tracking tool calls in addition to the messages.
|
||||
|
||||
```python
|
||||
import mlflow
|
||||
|
||||
# Enable MLflow auto-tracing for LiteLLM
|
||||
mlflow.litellm.autolog()
|
||||
|
||||
# Define the tool function.
|
||||
def get_weather(location: str) -> str:
|
||||
if location == "Tokyo":
|
||||
return "sunny"
|
||||
elif location == "Paris":
|
||||
return "rainy"
|
||||
return "unknown"
|
||||
|
||||
# Define function spec
|
||||
get_weather_tool = {
|
||||
"type": "function",
|
||||
"function": {
|
||||
"name": "get_weather",
|
||||
"description": "Get the current weather in a given location",
|
||||
"parameters": {
|
||||
"properties": {
|
||||
"location": {
|
||||
"description": "The city and state, e.g., San Francisco, CA",
|
||||
"type": "string",
|
||||
},
|
||||
},
|
||||
"required": ["location"],
|
||||
"type": "object",
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
# Call LiteLLM as usual
|
||||
response = litellm.completion(
|
||||
model="gpt-4o-mini",
|
||||
messages=[
|
||||
{"role": "user", "content": "What's the weather like in Paris today?"}
|
||||
],
|
||||
tools=[get_weather_tool]
|
||||
)
|
||||
```
|
||||
|
||||
<Image img={require('../../img/mlflow_tool_calling_tracing.png')} />
|
||||
|
||||
|
||||
## Evaluation
|
||||
|
||||
MLflow LiteLLM integration allow you to run qualitative assessment against LLM to evaluate or/and monitor your GenAI application.
|
||||
|
||||
Visit [Evaluate LLMs Tutorial](../tutorials/eval_suites.md) for the complete guidance on how to run evaluation suite with LiteLLM and MLflow.
|
||||
|
||||
|
||||
## Exporting Traces to OpenTelemetry collectors
|
||||
|
||||
MLflow traces are compatible with OpenTelemetry. You can export traces to any OpenTelemetry collector (e.g., Jaeger, Zipkin, Datadog, New Relic) by setting the endpoint URL in the environment variables.
|
||||
|
@ -75,7 +134,7 @@ import litellm
|
|||
import mlflow
|
||||
from mlflow.entities import SpanType
|
||||
|
||||
# Enable LiteLLM tracing
|
||||
# Enable MLflow auto-tracing for LiteLLM
|
||||
mlflow.litellm.autolog()
|
||||
|
||||
|
||||
|
|
|
@ -30,7 +30,7 @@ import TabItem from '@theme/TabItem';
|
|||
6. [**litellm.completion() / litellm.embedding()**:](../index#litellm-python-sdk) The litellm Python SDK is used to call the LLM in the OpenAI API format (Translation and parameter mapping)
|
||||
|
||||
7. **Post-Request Processing**: After the response is sent back to the client, the following **asynchronous** tasks are performed:
|
||||
- [Logging to Lunary, LangFuse or other logging destinations](./logging)
|
||||
- [Logging to Lunary, MLflow, LangFuse or other logging destinations](./logging)
|
||||
- The [MaxParallelRequestsHandler](https://github.com/BerriAI/litellm/blob/main/litellm/proxy/hooks/parallel_request_limiter.py) updates the rpm/tpm usage for the
|
||||
- Global Server Rate Limit
|
||||
- Virtual Key Rate Limit
|
||||
|
|
|
@ -3,6 +3,7 @@
|
|||
Log Proxy input, output, and exceptions using:
|
||||
|
||||
- Lunary
|
||||
- MLflow
|
||||
- Langfuse
|
||||
- OpenTelemetry
|
||||
- GCS, s3, Azure (Blob) Buckets
|
||||
|
@ -232,6 +233,58 @@ curl -X POST 'http://0.0.0.0:4000/chat/completions' \
|
|||
}'
|
||||
```
|
||||
|
||||
## MLflow
|
||||
|
||||
|
||||
### Step1: Install dependencies
|
||||
Install the dependencies.
|
||||
|
||||
```shell
|
||||
pip install litellm mlflow
|
||||
```
|
||||
|
||||
### Step 2: Create a `config.yaml` with `mlflow` callback
|
||||
|
||||
```yaml
|
||||
model_list:
|
||||
- model_name: "*"
|
||||
litellm_params:
|
||||
model: "*"
|
||||
litellm_settings:
|
||||
success_callback: ["mlflow"]
|
||||
failure_callback: ["mlflow"]
|
||||
```
|
||||
|
||||
### Step 3: Start the LiteLLM proxy
|
||||
```shell
|
||||
litellm --config config.yaml
|
||||
```
|
||||
|
||||
### Step 4: Make a request
|
||||
|
||||
```shell
|
||||
curl -X POST 'http://0.0.0.0:4000/chat/completions' \
|
||||
-H 'Content-Type: application/json' \
|
||||
-d '{
|
||||
"model": "gpt-4o-mini",
|
||||
"messages": [
|
||||
{
|
||||
"role": "user",
|
||||
"content": "What is the capital of France?"
|
||||
}
|
||||
]
|
||||
}'
|
||||
```
|
||||
|
||||
### Step 5: Review traces
|
||||
|
||||
Run the following command to start MLflow UI and review recorded traces.
|
||||
|
||||
```shell
|
||||
mlflow ui
|
||||
```
|
||||
|
||||
|
||||
## Langfuse
|
||||
|
||||
We will use the `--config` to set `litellm.success_callback = ["langfuse"]` this will log all successfull LLM calls to langfuse. Make sure to set `LANGFUSE_PUBLIC_KEY` and `LANGFUSE_SECRET_KEY` in your environment
|
||||
|
|
|
@ -2,9 +2,9 @@ import Image from '@theme/IdealImage';
|
|||
import Tabs from '@theme/Tabs';
|
||||
import TabItem from '@theme/TabItem';
|
||||
|
||||
# Evaluate LLMs - ML Flow Evals, Auto Eval
|
||||
# Evaluate LLMs - MLflow Evals, Auto Eval
|
||||
|
||||
## Using LiteLLM with ML Flow
|
||||
## Using LiteLLM with MLflow
|
||||
MLflow provides an API `mlflow.evaluate()` to help evaluate your LLMs https://mlflow.org/docs/latest/llms/llm-evaluate/index.html
|
||||
|
||||
### Pre Requisites
|
||||
|
@ -153,7 +153,7 @@ $ litellm --model command-nightly
|
|||
</Tabs>
|
||||
|
||||
|
||||
### Step 2: Run ML Flow
|
||||
### Step 2: Run MLflow
|
||||
Before running the eval we will set `openai.api_base` to the litellm proxy from Step 1
|
||||
|
||||
```python
|
||||
|
@ -209,7 +209,7 @@ with mlflow.start_run() as run:
|
|||
|
||||
```
|
||||
|
||||
### ML Flow Output
|
||||
### MLflow Output
|
||||
```
|
||||
{'toxicity/v1/mean': 0.00014476531214313582, 'toxicity/v1/variance': 2.5759661361262862e-12, 'toxicity/v1/p90': 0.00014604929747292773, 'toxicity/v1/ratio': 0.0, 'exact_match/v1': 0.0}
|
||||
Downloading artifacts: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 1890.18it/s]
|
||||
|
|
BIN
docs/my-website/img/mlflow_tool_calling_tracing.png
Normal file
BIN
docs/my-website/img/mlflow_tool_calling_tracing.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 85 KiB |
|
@ -331,12 +331,12 @@ except OpenAIError as e:
|
|||
```
|
||||
|
||||
### Logging Observability - Log LLM Input/Output ([Docs](https://docs.litellm.ai/docs/observability/callbacks))
|
||||
LiteLLM exposes pre defined callbacks to send data to Lunary, Langfuse, Helicone, Promptlayer, Traceloop, Slack
|
||||
LiteLLM exposes pre defined callbacks to send data to MLflow, Lunary, Langfuse, Helicone, Promptlayer, Traceloop, Slack
|
||||
|
||||
```python
|
||||
from litellm import completion
|
||||
|
||||
## set env variables for logging tools
|
||||
## set env variables for logging tools (API key set up is not required when using MLflow)
|
||||
os.environ["LUNARY_PUBLIC_KEY"] = "your-lunary-public-key" # get your key at https://app.lunary.ai/settings
|
||||
os.environ["HELICONE_API_KEY"] = "your-helicone-key"
|
||||
os.environ["LANGFUSE_PUBLIC_KEY"] = ""
|
||||
|
@ -345,7 +345,7 @@ os.environ["LANGFUSE_SECRET_KEY"] = ""
|
|||
os.environ["OPENAI_API_KEY"]
|
||||
|
||||
# set callbacks
|
||||
litellm.success_callback = ["lunary", "langfuse", "helicone"] # log input/output to lunary, langfuse, supabase, helicone
|
||||
litellm.success_callback = ["lunary", "mlflow", "langfuse", "helicone"] # log input/output to lunary, mlflow, langfuse, helicone
|
||||
|
||||
#openai call
|
||||
response = completion(model="gpt-3.5-turbo", messages=[{"role": "user", "content": "Hi 👋 - i'm openai"}])
|
||||
|
|
|
@ -36,6 +36,7 @@ class MlflowLogger(CustomLogger):
|
|||
else:
|
||||
span = self._start_span_or_trace(kwargs, start_time)
|
||||
end_time_ns = int(end_time.timestamp() * 1e9)
|
||||
self._extract_and_set_chat_attributes(span, kwargs, response_obj)
|
||||
self._end_span_or_trace(
|
||||
span=span,
|
||||
outputs=response_obj,
|
||||
|
@ -45,6 +46,21 @@ class MlflowLogger(CustomLogger):
|
|||
except Exception:
|
||||
verbose_logger.debug("MLflow Logging Error", stack_info=True)
|
||||
|
||||
def _extract_and_set_chat_attributes(self, span, kwargs, response_obj):
|
||||
try:
|
||||
from mlflow.tracing.utils import set_span_chat_messages, set_span_chat_tools
|
||||
except ImportError:
|
||||
return
|
||||
|
||||
inputs = self._construct_input(kwargs)
|
||||
input_messages = inputs.get("messages", [])
|
||||
output_messages = [c.message.model_dump(exclude_none=True)
|
||||
for c in getattr(response_obj, "choices", [])]
|
||||
if messages := [*input_messages, *output_messages]:
|
||||
set_span_chat_messages(span, messages)
|
||||
if tools := inputs.get("tools"):
|
||||
set_span_chat_tools(span, tools)
|
||||
|
||||
def log_failure_event(self, kwargs, response_obj, start_time, end_time):
|
||||
self._handle_failure(kwargs, response_obj, start_time, end_time)
|
||||
|
||||
|
@ -67,6 +83,7 @@ class MlflowLogger(CustomLogger):
|
|||
if exception := kwargs.get("exception"):
|
||||
span.add_event(SpanEvent.from_exception(exception)) # type: ignore
|
||||
|
||||
self._extract_and_set_chat_attributes(span, kwargs, response_obj)
|
||||
self._end_span_or_trace(
|
||||
span=span,
|
||||
outputs=response_obj,
|
||||
|
@ -107,6 +124,8 @@ class MlflowLogger(CustomLogger):
|
|||
# has complete_streaming_response that gathers the full response.
|
||||
if final_response := kwargs.get("complete_streaming_response"):
|
||||
end_time_ns = int(end_time.timestamp() * 1e9)
|
||||
|
||||
self._extract_and_set_chat_attributes(span, kwargs, final_response)
|
||||
self._end_span_or_trace(
|
||||
span=span,
|
||||
outputs=final_response,
|
||||
|
@ -135,6 +154,9 @@ class MlflowLogger(CustomLogger):
|
|||
def _construct_input(self, kwargs):
|
||||
"""Construct span inputs with optional parameters"""
|
||||
inputs = {"messages": kwargs.get("messages")}
|
||||
if tools := kwargs.get("tools"):
|
||||
inputs["tools"] = tools
|
||||
|
||||
for key in ["functions", "tools", "stream", "tool_choice", "user"]:
|
||||
if value := kwargs.get("optional_params", {}).pop(key, None):
|
||||
inputs[key] = value
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue