Update MLflow calllback and documentation (#7809)

* Update MLlfow tracer Signed-off-by: B-Step62 <yuki.watanabe@databricks.com> * doc update Signed-off-by: B-Step62 <yuki.watanabe@databricks.com> * doc update Signed-off-by: B-Step62 <yuki.watanabe@databricks.com> * image rename Signed-off-by: B-Step62 <yuki.watanabe@databricks.com> --------- Signed-off-by: B-Step62 <yuki.watanabe@databricks.com>
2025-04-25 02:34:29 +00:00 · 2025-01-22 13:56:48 +09:00 · 2025-01-22 13:56:48 +09:00 · 3f053fc99c
commit 3f053fc99c
parent 4978669273
11 changed files with 180 additions and 22 deletions
--- a/README.md
+++ b/README.md
@ -175,12 +175,12 @@ for part in response:

 ## Logging Observability ([Docs](https://docs.litellm.ai/docs/observability/callbacks))

-LiteLLM exposes pre defined callbacks to send data to Lunary, Langfuse, DynamoDB, s3 Buckets, Helicone, Promptlayer, Traceloop, Athina, Slack, MLflow
+LiteLLM exposes pre defined callbacks to send data to Lunary, MLflow, Langfuse, DynamoDB, s3 Buckets, Helicone, Promptlayer, Traceloop, Athina, Slack

 ```python
 from litellm import completion

-## set env variables for logging tools
+## set env variables for logging tools (when using MLflow, no API key set up is required)
 os.environ["LUNARY_PUBLIC_KEY"] = "your-lunary-public-key"
 os.environ["HELICONE_API_KEY"] = "your-helicone-auth-key"
 os.environ["LANGFUSE_PUBLIC_KEY"] = ""
@ -190,7 +190,7 @@ os.environ["ATHINA_API_KEY"] = "your-athina-api-key"
 os.environ["OPENAI_API_KEY"]

 # set callbacks
-litellm.success_callback = ["lunary", "langfuse", "athina", "helicone"] # log input/output to lunary, langfuse, supabase, athina, helicone etc
+litellm.success_callback = ["lunary", "mlflow", "langfuse", "athina", "helicone"] # log input/output to lunary, langfuse, supabase, athina, helicone etc

 #openai call
 response = completion(model="anthropic/claude-3-sonnet-20240229", messages=[{"role": "user", "content": "Hi 👋 - i'm openai"}])
--- a/docs/my-website/docs/getting_started.md
+++ b/docs/my-website/docs/getting_started.md
@ -80,12 +80,12 @@ except OpenAIError as e:

 ## Logging Observability - Log LLM Input/Output ([Docs](https://docs.litellm.ai/docs/observability/callbacks))

-LiteLLM exposes pre defined callbacks to send data to Lunary, Langfuse, Helicone, Promptlayer, Traceloop, Slack
+LiteLLM exposes pre defined callbacks to send data to MLflow, Lunary, Langfuse, Helicone, Promptlayer, Traceloop, Slack

 ```python
 from litellm import completion

-## set env variables for logging tools
+## set env variables for logging tools (API key set up is not required when using MLflow)
 os.environ["LUNARY_PUBLIC_KEY"] = "your-lunary-public-key" # get your public key at https://app.lunary.ai/settings
 os.environ["HELICONE_API_KEY"] = "your-helicone-key"
 os.environ["LANGFUSE_PUBLIC_KEY"] = ""
@ -94,7 +94,7 @@ os.environ["LANGFUSE_SECRET_KEY"] = ""
 os.environ["OPENAI_API_KEY"]

 # set callbacks
-litellm.success_callback = ["lunary", "langfuse", "helicone"] # log input/output to langfuse, lunary, supabase, helicone
+litellm.success_callback = ["lunary", "mlflow", "langfuse", "helicone"] # log input/output to MLflow, langfuse, lunary, helicone

 #openai call
 response = completion(model="gpt-3.5-turbo", messages=[{"role": "user", "content": "Hi 👋 - i'm openai"}])
--- a/docs/my-website/docs/index.md
+++ b/docs/my-website/docs/index.md
@ -393,12 +393,12 @@ except OpenAIError as e:
 ```

 ### Logging Observability - Log LLM Input/Output ([Docs](https://docs.litellm.ai/docs/observability/callbacks))
-LiteLLM exposes pre defined callbacks to send data to Lunary, Langfuse, Helicone, Promptlayer, Traceloop, Slack
+LiteLLM exposes pre defined callbacks to send data to Lunary, MLflow, Langfuse, Helicone, Promptlayer, Traceloop, Slack

 ```python
 from litellm import completion

-## set env variables for logging tools
+## set env variables for logging tools (API key set up is not required when using MLflow)
 os.environ["LUNARY_PUBLIC_KEY"] = "your-lunary-public-key" # get your public key at https://app.lunary.ai/settings
 os.environ["HELICONE_API_KEY"] = "your-helicone-key"
 os.environ["LANGFUSE_PUBLIC_KEY"] = ""
@ -407,7 +407,7 @@ os.environ["LANGFUSE_SECRET_KEY"] = ""
 os.environ["OPENAI_API_KEY"]

 # set callbacks
-litellm.success_callback = ["lunary", "langfuse", "helicone"] # log input/output to lunary, langfuse, supabase, helicone
+litellm.success_callback = ["lunary", "mlflow", "langfuse", "helicone"] # log input/output to lunary, mlflow, langfuse, helicone

 #openai call
 response = completion(model="gpt-3.5-turbo", messages=[{"role": "user", "content": "Hi 👋 - i'm openai"}])
--- a/docs/my-website/docs/langchain/langchain.md
+++ b/docs/my-website/docs/langchain/langchain.md
@ -111,6 +111,30 @@ chat.invoke(messages)
 </TabItem>
 </Tabs>

+## Use Langchain ChatLiteLLM with MLflow
+
+MLflow provides open-source observability solution for ChatLiteLLM.
+
+To enable the integration, simply call `mlflow.litellm.autolog()` before in your code. No other setup is necessary.
+
+```python
+import mlflow
+
+mlflow.litellm.autolog()
+```
+
+Once the auto-tracing is enabled, you can invoke `ChatLiteLLM` and see recorded traces in MLflow.
+
+```python
+import os
+from langchain.chat_models import ChatLiteLLM
+
+os.environ['OPENAI_API_KEY']="sk-..."
+
+chat = ChatLiteLLM(model="gpt-4o-mini")
+chat.invoke("Hi!")
+```
+
 ## Use Langchain ChatLiteLLM with Lunary
 ```python
 import os
--- a/docs/my-website/docs/observability/mlflow.md
+++ b/docs/my-website/docs/observability/mlflow.md
@ -1,4 +1,6 @@
-# MLflow
+import Image from '@theme/IdealImage';
+
+# 🔁 MLflow - OSS LLM Observability and Evaluation

 ## What is MLflow?

@ -18,7 +20,7 @@ Install MLflow:
 pip install mlflow
 ```

-To enable LiteLLM tracing:
+To enable MLflow auto tracing for LiteLLM:

 ```python
 import mlflow
@ -29,9 +31,9 @@ mlflow.litellm.autolog()
 # litellm.callbacks = ["mlflow"]
 ```

-Since MLflow is open-source, no sign-up or API key is needed to log traces!
+Since MLflow is open-source and free, **no sign-up or API key is needed to log traces!**

-```
+```python
 import litellm
 import os

@ -53,6 +55,63 @@ Open the MLflow UI and go to the `Traces` tab to view logged traces:
 mlflow ui
 ```

+## Tracing Tool Calls
+
+MLflow integration with LiteLLM support tracking tool calls in addition to the messages.
+
+```python
+import mlflow
+
+# Enable MLflow auto-tracing for LiteLLM
+mlflow.litellm.autolog()
+
+# Define the tool function.
+def get_weather(location: str) -> str:
+    if location == "Tokyo":
+        return "sunny"
+    elif location == "Paris":
+        return "rainy"
+    return "unknown"
+
+# Define function spec
+get_weather_tool = {
+    "type": "function",
+    "function": {
+        "name": "get_weather",
+        "description": "Get the current weather in a given location",
+        "parameters": {
+            "properties": {
+                "location": {
+                    "description": "The city and state, e.g., San Francisco, CA",
+                    "type": "string",
+                },
+            },
+            "required": ["location"],
+            "type": "object",
+        },
+    },
+}
+
+# Call LiteLLM as usual
+response = litellm.completion(
+    model="gpt-4o-mini",
+    messages=[
+      {"role": "user", "content": "What's the weather like in Paris today?"}
+    ],
+    tools=[get_weather_tool]
+)
+```
+
+<Image img={require('../../img/mlflow_tool_calling_tracing.png')} />
+
+
+## Evaluation
+
+MLflow LiteLLM integration allow you to run qualitative assessment against LLM to evaluate or/and monitor your GenAI application.
+
+Visit [Evaluate LLMs Tutorial](../tutorials/eval_suites.md) for the complete guidance on how to run evaluation suite with LiteLLM and MLflow.
+
+
 ## Exporting Traces to OpenTelemetry collectors

 MLflow traces are compatible with OpenTelemetry. You can export traces to any OpenTelemetry collector (e.g., Jaeger, Zipkin, Datadog, New Relic) by setting the endpoint URL in the environment variables.
@ -75,7 +134,7 @@ import litellm
 import mlflow
 from mlflow.entities import SpanType

-# Enable LiteLLM tracing
+# Enable MLflow auto-tracing for LiteLLM
 mlflow.litellm.autolog()


--- a/docs/my-website/docs/proxy/architecture.md
+++ b/docs/my-website/docs/proxy/architecture.md
@ -30,7 +30,7 @@ import TabItem from '@theme/TabItem';
 6. [**litellm.completion() / litellm.embedding()**:](../index#litellm-python-sdk) The litellm Python SDK is used to call the LLM in the OpenAI API format (Translation and parameter mapping)

 7. **Post-Request Processing**: After the response is sent back to the client, the following **asynchronous** tasks are performed:
-   - [Logging to Lunary, LangFuse or other logging destinations](./logging)
+   - [Logging to Lunary, MLflow, LangFuse or other logging destinations](./logging)
   - The [MaxParallelRequestsHandler](https://github.com/BerriAI/litellm/blob/main/litellm/proxy/hooks/parallel_request_limiter.py) updates the rpm/tpm usage for the 
        - Global Server Rate Limit
        - Virtual Key Rate Limit
--- a/docs/my-website/docs/proxy/logging.md
+++ b/docs/my-website/docs/proxy/logging.md
@ -3,6 +3,7 @@
 Log Proxy input, output, and exceptions using:

 - Lunary
+- MLflow
 - Langfuse
 - OpenTelemetry
 - GCS, s3, Azure (Blob) Buckets
@ -232,6 +233,58 @@ curl -X POST 'http://0.0.0.0:4000/chat/completions' \
 }'
 ```

+## MLflow
+
+
+### Step1: Install dependencies
+Install the dependencies.
+
+```shell
+pip install litellm mlflow
+```
+
+### Step 2: Create a `config.yaml` with `mlflow` callback
+
+```yaml
+model_list:
+  - model_name: "*"
+    litellm_params:
+      model: "*"
+litellm_settings:
+  success_callback: ["mlflow"]
+  failure_callback: ["mlflow"]
+```
+
+### Step 3: Start the LiteLLM proxy
+```shell
+litellm --config config.yaml
+```
+
+### Step 4: Make a request
+
+```shell
+curl -X POST 'http://0.0.0.0:4000/chat/completions' \
+-H 'Content-Type: application/json' \
+-d '{
+    "model": "gpt-4o-mini",
+    "messages": [
+      {
+        "role": "user",
+        "content": "What is the capital of France?"
+      }
+    ]
+}'
+```
+
+### Step 5: Review traces
+
+Run the following command to start MLflow UI and review recorded traces.
+
+```shell
+mlflow ui
+```
+
+
 ## Langfuse

 We will use the `--config` to set `litellm.success_callback = ["langfuse"]` this will log all successfull LLM calls to langfuse. Make sure to set `LANGFUSE_PUBLIC_KEY` and `LANGFUSE_SECRET_KEY` in your environment
--- a/docs/my-website/docs/tutorials/eval_suites.md
+++ b/docs/my-website/docs/tutorials/eval_suites.md
@ -2,9 +2,9 @@ import Image from '@theme/IdealImage';
 import Tabs from '@theme/Tabs';
 import TabItem from '@theme/TabItem';

-# Evaluate LLMs - ML Flow Evals, Auto Eval
+# Evaluate LLMs - MLflow Evals, Auto Eval

-## Using LiteLLM with ML Flow 
+## Using LiteLLM with MLflow
 MLflow provides an API `mlflow.evaluate()` to help evaluate your LLMs https://mlflow.org/docs/latest/llms/llm-evaluate/index.html

 ### Pre Requisites
@ -153,7 +153,7 @@ $ litellm --model command-nightly
 </Tabs>


-### Step 2: Run ML Flow
+### Step 2: Run MLflow
 Before running the eval we will set `openai.api_base` to the litellm proxy from Step 1

 ```python
@ -209,7 +209,7 @@ with mlflow.start_run() as run:

 ```

-### ML Flow Output
+### MLflow Output
 ```
 {'toxicity/v1/mean': 0.00014476531214313582, 'toxicity/v1/variance': 2.5759661361262862e-12, 'toxicity/v1/p90': 0.00014604929747292773, 'toxicity/v1/ratio': 0.0, 'exact_match/v1': 0.0}
 Downloading artifacts: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 1890.18it/s]
--- a/docs/my-website/img/mlflow_tool_calling_tracing.png
+++ b/docs/my-website/img/mlflow_tool_calling_tracing.png
--- a/docs/my-website/src/pages/index.md
+++ b/docs/my-website/src/pages/index.md
@ -331,12 +331,12 @@ except OpenAIError as e:
 ```

 ### Logging Observability - Log LLM Input/Output ([Docs](https://docs.litellm.ai/docs/observability/callbacks))
-LiteLLM exposes pre defined callbacks to send data to Lunary, Langfuse, Helicone, Promptlayer, Traceloop, Slack
+LiteLLM exposes pre defined callbacks to send data to MLflow, Lunary, Langfuse, Helicone, Promptlayer, Traceloop, Slack

 ```python
 from litellm import completion

-## set env variables for logging tools
+## set env variables for logging tools (API key set up is not required when using MLflow)
 os.environ["LUNARY_PUBLIC_KEY"] = "your-lunary-public-key" # get your key at https://app.lunary.ai/settings
 os.environ["HELICONE_API_KEY"] = "your-helicone-key"
 os.environ["LANGFUSE_PUBLIC_KEY"] = ""
@ -345,7 +345,7 @@ os.environ["LANGFUSE_SECRET_KEY"] = ""
 os.environ["OPENAI_API_KEY"]

 # set callbacks
-litellm.success_callback = ["lunary", "langfuse", "helicone"] # log input/output to lunary, langfuse, supabase, helicone
+litellm.success_callback = ["lunary", "mlflow", "langfuse", "helicone"] # log input/output to lunary, mlflow, langfuse, helicone

 #openai call
 response = completion(model="gpt-3.5-turbo", messages=[{"role": "user", "content": "Hi 👋 - i'm openai"}])
--- a/litellm/integrations/mlflow.py
+++ b/litellm/integrations/mlflow.py
@ -36,6 +36,7 @@ class MlflowLogger(CustomLogger):
            else:
                span = self._start_span_or_trace(kwargs, start_time)
                end_time_ns = int(end_time.timestamp() * 1e9)
+                self._extract_and_set_chat_attributes(span, kwargs, response_obj)
                self._end_span_or_trace(
                    span=span,
                    outputs=response_obj,
@ -45,6 +46,21 @@ class MlflowLogger(CustomLogger):
        except Exception:
            verbose_logger.debug("MLflow Logging Error", stack_info=True)

+    def _extract_and_set_chat_attributes(self, span, kwargs, response_obj):
+        try:
+            from mlflow.tracing.utils import set_span_chat_messages, set_span_chat_tools
+        except ImportError:
+            return
+
+        inputs = self._construct_input(kwargs)
+        input_messages = inputs.get("messages", [])
+        output_messages = [c.message.model_dump(exclude_none=True)
+                           for c in getattr(response_obj, "choices", [])]
+        if messages := [*input_messages, *output_messages]:
+            set_span_chat_messages(span, messages)
+        if tools := inputs.get("tools"):
+            set_span_chat_tools(span, tools)
+
    def log_failure_event(self, kwargs, response_obj, start_time, end_time):
        self._handle_failure(kwargs, response_obj, start_time, end_time)

@ -67,6 +83,7 @@ class MlflowLogger(CustomLogger):
            if exception := kwargs.get("exception"):
                span.add_event(SpanEvent.from_exception(exception))  # type: ignore

+            self._extract_and_set_chat_attributes(span, kwargs, response_obj)
            self._end_span_or_trace(
                span=span,
                outputs=response_obj,
@ -107,6 +124,8 @@ class MlflowLogger(CustomLogger):
        # has complete_streaming_response that gathers the full response.
        if final_response := kwargs.get("complete_streaming_response"):
            end_time_ns = int(end_time.timestamp() * 1e9)
+
+            self._extract_and_set_chat_attributes(span, kwargs, final_response)
            self._end_span_or_trace(
                span=span,
                outputs=final_response,
@ -135,6 +154,9 @@ class MlflowLogger(CustomLogger):
    def _construct_input(self, kwargs):
        """Construct span inputs with optional parameters"""
        inputs = {"messages": kwargs.get("messages")}
+        if tools := kwargs.get("tools"):
+            inputs["tools"] = tools
+
        for key in ["functions", "tools", "stream", "tool_choice", "user"]:
            if value := kwargs.get("optional_params", {}).pop(key, None):
                inputs[key] = value