test(telemetry): Telemetry Tests (#3805)

# What does this PR do? Adds a test and a standardized way to build future tests out for telemetry in llama stack. Contributes to https://github.com/llamastack/llama-stack/issues/3806 ## Test Plan This is the test plan 😎
2025-12-06 18:40:57 +00:00 · 2025-10-17 13:43:33 -04:00 · 2025-10-17 13:43:33 -04:00 · 943558af36
commit 943558af36
parent 224c99560c
11 changed files with 9452 additions and 3 deletions
--- a/llama_stack/providers/inline/telemetry/meta_reference/telemetry.py
+++ b/llama_stack/providers/inline/telemetry/meta_reference/telemetry.py
@ -79,7 +79,6 @@ class TelemetryAdapter(Telemetry):
            metrics.set_meter_provider(metric_provider)
        self.meter = metrics.get_meter(__name__)
        self._lock = _global_lock
    async def initialize(self) -> None:
--- a/llama_stack/providers/utils/telemetry/trace_protocol.py
+++ b/llama_stack/providers/utils/telemetry/trace_protocol.py
@ -70,7 +70,7 @@ def trace_protocol[T](cls: type[T]) -> type[T]:
                "__class__": class_name,
                "__method__": method_name,
                "__type__": span_type,
-                "__args__": str(combined_args),
+                "__args__": json.dumps(combined_args),
            }
            return class_name, method_name, span_attributes
@ -82,8 +82,8 @@ def trace_protocol[T](cls: type[T]) -> type[T]:
            class_name, method_name, span_attributes = create_span_context(self, *args, **kwargs)
            with tracing.span(f"{class_name}.{method_name}", span_attributes) as span:
                count = 0
                try:
                    count = 0
                    async for item in method(self, *args, **kwargs):
                        yield item
                        count += 1
--- a/tests/integration/common/recordings/ab1a32474062bbad640ce43d02d6b61ed9f174c225597f0241cf120c47c7d2fa.json
+++ b/tests/integration/common/recordings/ab1a32474062bbad640ce43d02d6b61ed9f174c225597f0241cf120c47c7d2fa.json
@ -0,0 +1,506 @@
 {
  "test_id": null,
  "request": {
    "method": "POST",
    "url": "http://0.0.0.0:11434/v1/v1/chat/completions",
    "headers": {},
    "body": {
      "model": "llama3.2:3b-instruct-fp16",
      "messages": [
        {
          "role": "system",
          "content": "You are a helpful assistant"
        },
        {
          "role": "user",
          "content": "What is 2 + 2?"
        },
        {
          "role": "assistant",
          "content": "The answer to the equation 2 + 2 is 4."
        },
        {
          "role": "user",
          "content": "Tell me a short joke"
        }
      ],
      "max_tokens": 0,
      "stream": true
    },
    "endpoint": "/v1/chat/completions",
    "model": "llama3.2:3b-instruct-fp16"
  },
  "response": {
    "body": [
      {
        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
        "__data__": {
          "id": "rec-ab1a32474062",
          "choices": [
            {
              "delta": {
                "content": "Why",
                "function_call": null,
                "refusal": null,
                "role": "assistant",
                "tool_calls": null
              },
              "finish_reason": null,
              "index": 0,
              "logprobs": null
            }
          ],
          "created": 0,
          "model": "llama3.2:3b-instruct-fp16",
          "object": "chat.completion.chunk",
          "service_tier": null,
          "system_fingerprint": "fp_ollama",
          "usage": null
        }
      },
      {
        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
        "__data__": {
          "id": "rec-ab1a32474062",
          "choices": [
            {
              "delta": {
                "content": " did",
                "function_call": null,
                "refusal": null,
                "role": "assistant",
                "tool_calls": null
              },
              "finish_reason": null,
              "index": 0,
              "logprobs": null
            }
          ],
          "created": 0,
          "model": "llama3.2:3b-instruct-fp16",
          "object": "chat.completion.chunk",
          "service_tier": null,
          "system_fingerprint": "fp_ollama",
          "usage": null
        }
      },
      {
        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
        "__data__": {
          "id": "rec-ab1a32474062",
          "choices": [
            {
              "delta": {
                "content": " the",
                "function_call": null,
                "refusal": null,
                "role": "assistant",
                "tool_calls": null
              },
              "finish_reason": null,
              "index": 0,
              "logprobs": null
            }
          ],
          "created": 0,
          "model": "llama3.2:3b-instruct-fp16",
          "object": "chat.completion.chunk",
          "service_tier": null,
          "system_fingerprint": "fp_ollama",
          "usage": null
        }
      },
      {
        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
        "__data__": {
          "id": "rec-ab1a32474062",
          "choices": [
            {
              "delta": {
                "content": " scare",
                "function_call": null,
                "refusal": null,
                "role": "assistant",
                "tool_calls": null
              },
              "finish_reason": null,
              "index": 0,
              "logprobs": null
            }
          ],
          "created": 0,
          "model": "llama3.2:3b-instruct-fp16",
          "object": "chat.completion.chunk",
          "service_tier": null,
          "system_fingerprint": "fp_ollama",
          "usage": null
        }
      },
      {
        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
        "__data__": {
          "id": "rec-ab1a32474062",
          "choices": [
            {
              "delta": {
                "content": "crow",
                "function_call": null,
                "refusal": null,
                "role": "assistant",
                "tool_calls": null
              },
              "finish_reason": null,
              "index": 0,
              "logprobs": null
            }
          ],
          "created": 0,
          "model": "llama3.2:3b-instruct-fp16",
          "object": "chat.completion.chunk",
          "service_tier": null,
          "system_fingerprint": "fp_ollama",
          "usage": null
        }
      },
      {
        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
        "__data__": {
          "id": "rec-ab1a32474062",
          "choices": [
            {
              "delta": {
                "content": " win",
                "function_call": null,
                "refusal": null,
                "role": "assistant",
                "tool_calls": null
              },
              "finish_reason": null,
              "index": 0,
              "logprobs": null
            }
          ],
          "created": 0,
          "model": "llama3.2:3b-instruct-fp16",
          "object": "chat.completion.chunk",
          "service_tier": null,
          "system_fingerprint": "fp_ollama",
          "usage": null
        }
      },
      {
        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
        "__data__": {
          "id": "rec-ab1a32474062",
          "choices": [
            {
              "delta": {
                "content": " an",
                "function_call": null,
                "refusal": null,
                "role": "assistant",
                "tool_calls": null
              },
              "finish_reason": null,
              "index": 0,
              "logprobs": null
            }
          ],
          "created": 0,
          "model": "llama3.2:3b-instruct-fp16",
          "object": "chat.completion.chunk",
          "service_tier": null,
          "system_fingerprint": "fp_ollama",
          "usage": null
        }
      },
      {
        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
        "__data__": {
          "id": "rec-ab1a32474062",
          "choices": [
            {
              "delta": {
                "content": " award",
                "function_call": null,
                "refusal": null,
                "role": "assistant",
                "tool_calls": null
              },
              "finish_reason": null,
              "index": 0,
              "logprobs": null
            }
          ],
          "created": 0,
          "model": "llama3.2:3b-instruct-fp16",
          "object": "chat.completion.chunk",
          "service_tier": null,
          "system_fingerprint": "fp_ollama",
          "usage": null
        }
      },
      {
        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
        "__data__": {
          "id": "rec-ab1a32474062",
          "choices": [
            {
              "delta": {
                "content": "?\n\n",
                "function_call": null,
                "refusal": null,
                "role": "assistant",
                "tool_calls": null
              },
              "finish_reason": null,
              "index": 0,
              "logprobs": null
            }
          ],
          "created": 0,
          "model": "llama3.2:3b-instruct-fp16",
          "object": "chat.completion.chunk",
          "service_tier": null,
          "system_fingerprint": "fp_ollama",
          "usage": null
        }
      },
      {
        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
        "__data__": {
          "id": "rec-ab1a32474062",
          "choices": [
            {
              "delta": {
                "content": "Because",
                "function_call": null,
                "refusal": null,
                "role": "assistant",
                "tool_calls": null
              },
              "finish_reason": null,
              "index": 0,
              "logprobs": null
            }
          ],
          "created": 0,
          "model": "llama3.2:3b-instruct-fp16",
          "object": "chat.completion.chunk",
          "service_tier": null,
          "system_fingerprint": "fp_ollama",
          "usage": null
        }
      },
      {
        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
        "__data__": {
          "id": "rec-ab1a32474062",
          "choices": [
            {
              "delta": {
                "content": " he",
                "function_call": null,
                "refusal": null,
                "role": "assistant",
                "tool_calls": null
              },
              "finish_reason": null,
              "index": 0,
              "logprobs": null
            }
          ],
          "created": 0,
          "model": "llama3.2:3b-instruct-fp16",
          "object": "chat.completion.chunk",
          "service_tier": null,
          "system_fingerprint": "fp_ollama",
          "usage": null
        }
      },
      {
        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
        "__data__": {
          "id": "rec-ab1a32474062",
          "choices": [
            {
              "delta": {
                "content": " was",
                "function_call": null,
                "refusal": null,
                "role": "assistant",
                "tool_calls": null
              },
              "finish_reason": null,
              "index": 0,
              "logprobs": null
            }
          ],
          "created": 0,
          "model": "llama3.2:3b-instruct-fp16",
          "object": "chat.completion.chunk",
          "service_tier": null,
          "system_fingerprint": "fp_ollama",
          "usage": null
        }
      },
      {
        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
        "__data__": {
          "id": "rec-ab1a32474062",
          "choices": [
            {
              "delta": {
                "content": " outstanding",
                "function_call": null,
                "refusal": null,
                "role": "assistant",
                "tool_calls": null
              },
              "finish_reason": null,
              "index": 0,
              "logprobs": null
            }
          ],
          "created": 0,
          "model": "llama3.2:3b-instruct-fp16",
          "object": "chat.completion.chunk",
          "service_tier": null,
          "system_fingerprint": "fp_ollama",
          "usage": null
        }
      },
      {
        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
        "__data__": {
          "id": "rec-ab1a32474062",
          "choices": [
            {
              "delta": {
                "content": " in",
                "function_call": null,
                "refusal": null,
                "role": "assistant",
                "tool_calls": null
              },
              "finish_reason": null,
              "index": 0,
              "logprobs": null
            }
          ],
          "created": 0,
          "model": "llama3.2:3b-instruct-fp16",
          "object": "chat.completion.chunk",
          "service_tier": null,
          "system_fingerprint": "fp_ollama",
          "usage": null
        }
      },
      {
        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
        "__data__": {
          "id": "rec-ab1a32474062",
          "choices": [
            {
              "delta": {
                "content": " his",
                "function_call": null,
                "refusal": null,
                "role": "assistant",
                "tool_calls": null
              },
              "finish_reason": null,
              "index": 0,
              "logprobs": null
            }
          ],
          "created": 0,
          "model": "llama3.2:3b-instruct-fp16",
          "object": "chat.completion.chunk",
          "service_tier": null,
          "system_fingerprint": "fp_ollama",
          "usage": null
        }
      },
      {
        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
        "__data__": {
          "id": "rec-ab1a32474062",
          "choices": [
            {
              "delta": {
                "content": " field",
                "function_call": null,
                "refusal": null,
                "role": "assistant",
                "tool_calls": null
              },
              "finish_reason": null,
              "index": 0,
              "logprobs": null
            }
          ],
          "created": 0,
          "model": "llama3.2:3b-instruct-fp16",
          "object": "chat.completion.chunk",
          "service_tier": null,
          "system_fingerprint": "fp_ollama",
          "usage": null
        }
      },
      {
        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
        "__data__": {
          "id": "rec-ab1a32474062",
          "choices": [
            {
              "delta": {
                "content": "!",
                "function_call": null,
                "refusal": null,
                "role": "assistant",
                "tool_calls": null
              },
              "finish_reason": null,
              "index": 0,
              "logprobs": null
            }
          ],
          "created": 0,
          "model": "llama3.2:3b-instruct-fp16",
          "object": "chat.completion.chunk",
          "service_tier": null,
          "system_fingerprint": "fp_ollama",
          "usage": null
        }
      },
      {
        "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
        "__data__": {
          "id": "rec-ab1a32474062",
          "choices": [
            {
              "delta": {
                "content": "",
                "function_call": null,
                "refusal": null,
                "role": "assistant",
                "tool_calls": null
              },
              "finish_reason": "stop",
              "index": 0,
              "logprobs": null
            }
          ],
          "created": 0,
          "model": "llama3.2:3b-instruct-fp16",
          "object": "chat.completion.chunk",
          "service_tier": null,
          "system_fingerprint": "fp_ollama",
          "usage": null
        }
      }
    ],
    "is_streaming": true
  }
 }
--- a/tests/integration/common/recordings/models-64a2277c90f0f42576f60c1030e3a020403d34a95f56931b792d5939f4cebc57-826d44c3.json
+++ b/tests/integration/common/recordings/models-64a2277c90f0f42576f60c1030e3a020403d34a95f56931b792d5939f4cebc57-826d44c3.json
@ -0,0 +1,88 @@
 {
  "test_id": null,
  "request": {
    "method": "POST",
    "url": "http://0.0.0.0:11434/v1/v1/models",
    "headers": {},
    "body": {},
    "endpoint": "/v1/models",
    "model": ""
  },
  "response": {
    "body": [
      {
        "__type__": "openai.types.model.Model",
        "__data__": {
          "id": "llama3.2:3b-instruct-fp16",
          "created": 1760453641,
          "object": "model",
          "owned_by": "library"
        }
      },
      {
        "__type__": "openai.types.model.Model",
        "__data__": {
          "id": "qwen3:4b",
          "created": 1757615302,
          "object": "model",
          "owned_by": "library"
        }
      },
      {
        "__type__": "openai.types.model.Model",
        "__data__": {
          "id": "gpt-oss:latest",
          "created": 1756395223,
          "object": "model",
          "owned_by": "library"
        }
      },
      {
        "__type__": "openai.types.model.Model",
        "__data__": {
          "id": "nomic-embed-text:latest",
          "created": 1756318548,
          "object": "model",
          "owned_by": "library"
        }
      },
      {
        "__type__": "openai.types.model.Model",
        "__data__": {
          "id": "llama3.2:3b",
          "created": 1755191039,
          "object": "model",
          "owned_by": "library"
        }
      },
      {
        "__type__": "openai.types.model.Model",
        "__data__": {
          "id": "all-minilm:l6-v2",
          "created": 1753968177,
          "object": "model",
          "owned_by": "library"
        }
      },
      {
        "__type__": "openai.types.model.Model",
        "__data__": {
          "id": "llama3.2:1b",
          "created": 1746124735,
          "object": "model",
          "owned_by": "library"
        }
      },
      {
        "__type__": "openai.types.model.Model",
        "__data__": {
          "id": "llama3.2:latest",
          "created": 1746044170,
          "object": "model",
          "owned_by": "library"
        }
      }
    ],
    "is_streaming": false
  }
 }
--- a/tests/integration/telemetry/conftest.py
+++ b/tests/integration/telemetry/conftest.py
@ -0,0 +1,95 @@
 # Copyright (c) Meta Platforms, Inc. and affiliates.
 # All rights reserved.
 #
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 """Telemetry test configuration using OpenTelemetry SDK exporters.
 This conftest provides in-memory telemetry collection for library_client mode only.
 Tests using these fixtures should skip in server mode since the in-memory collector
 cannot access spans from a separate server process.
 """
 from typing import Any
 import opentelemetry.metrics as otel_metrics
 import opentelemetry.trace as otel_trace
 import pytest
 from opentelemetry import metrics, trace
 from opentelemetry.sdk.metrics import MeterProvider
 from opentelemetry.sdk.metrics.export import InMemoryMetricReader
 from opentelemetry.sdk.trace import ReadableSpan, TracerProvider
 from opentelemetry.sdk.trace.export import SimpleSpanProcessor
 from opentelemetry.sdk.trace.export.in_memory_span_exporter import InMemorySpanExporter
 import llama_stack.providers.inline.telemetry.meta_reference.telemetry as telemetry_module
 from llama_stack.testing.api_recorder import patch_httpx_for_test_id
 from tests.integration.fixtures.common import instantiate_llama_stack_client
 class TestCollector:
    def __init__(self, span_exp, metric_read):
        assert span_exp and metric_read
        self.span_exporter = span_exp
        self.metric_reader = metric_read
    def get_spans(self) -> tuple[ReadableSpan, ...]:
        return self.span_exporter.get_finished_spans()
    def get_metrics(self) -> Any | None:
        metrics = self.metric_reader.get_metrics_data()
        if metrics and metrics.resource_metrics:
            return metrics.resource_metrics[0].scope_metrics[0].metrics
        return None
    def clear(self) -> None:
        self.span_exporter.clear()
        self.metric_reader.get_metrics_data()
@pytest.fixture(scope="session")
 def _telemetry_providers():
    """Set up in-memory OTEL providers before llama_stack_client initializes."""
    # Reset set-once flags to allow re-initialization
    if hasattr(otel_trace, "_TRACER_PROVIDER_SET_ONCE"):
        otel_trace._TRACER_PROVIDER_SET_ONCE._done = False  # type: ignore
    if hasattr(otel_metrics, "_METER_PROVIDER_SET_ONCE"):
        otel_metrics._METER_PROVIDER_SET_ONCE._done = False  # type: ignore
    # Create in-memory exporters/readers
    span_exporter = InMemorySpanExporter()
    tracer_provider = TracerProvider()
    tracer_provider.add_span_processor(SimpleSpanProcessor(span_exporter))
    trace.set_tracer_provider(tracer_provider)
    metric_reader = InMemoryMetricReader()
    meter_provider = MeterProvider(metric_readers=[metric_reader])
    metrics.set_meter_provider(meter_provider)
    # Set module-level provider so TelemetryAdapter uses our in-memory providers
    telemetry_module._TRACER_PROVIDER = tracer_provider
    yield (span_exporter, metric_reader, tracer_provider, meter_provider)
    telemetry_module._TRACER_PROVIDER = None
    tracer_provider.shutdown()
    meter_provider.shutdown()
@pytest.fixture(scope="session")
 def llama_stack_client(_telemetry_providers, request):
    """Override llama_stack_client to ensure in-memory telemetry providers are used."""
    patch_httpx_for_test_id()
    client = instantiate_llama_stack_client(request.session)
    return client
@pytest.fixture
 def mock_otlp_collector(_telemetry_providers):
    """Provides access to telemetry data and clears between tests."""
    span_exporter, metric_reader, _, _ = _telemetry_providers
    collector = TestCollector(span_exporter, metric_reader)
    yield collector
    collector.clear()
--- a/tests/integration/telemetry/recordings/0de60cd6a6ec3dbfc4a7601e77be8083caf34f49147ad1c25efae1de3f0b25e5.json
+++ b/tests/integration/telemetry/recordings/0de60cd6a6ec3dbfc4a7601e77be8083caf34f49147ad1c25efae1de3f0b25e5.json
@ -0,0 +1,57 @@
 {
  "test_id": "tests/integration/telemetry/test_openai_telemetry.py::test_openai_completion_creates_telemetry[txt=ollama/llama3.2:3b-instruct-fp16]",
  "request": {
    "method": "POST",
    "url": "http://0.0.0.0:11434/v1/v1/chat/completions",
    "headers": {},
    "body": {
      "model": "llama3.2:3b-instruct-fp16",
      "messages": [
        {
          "role": "user",
          "content": "Test OpenAI telemetry creation"
        }
      ],
      "stream": false
    },
    "endpoint": "/v1/chat/completions",
    "model": "llama3.2:3b-instruct-fp16"
  },
  "response": {
    "body": {
      "__type__": "openai.types.chat.chat_completion.ChatCompletion",
      "__data__": {
        "id": "rec-0de60cd6a6ec",
        "choices": [
          {
            "finish_reason": "stop",
            "index": 0,
            "logprobs": null,
            "message": {
              "content": "I'm happy to help you with setting up and testing OpenAI's telemetry creation.\n\nOpenAI provides a feature called \"Telemetry\" which allows developers to collect data about their users' interactions with the model. To test this feature, we need to create a simple application that uses the OpenAI API and sends telemetry data to their servers.\n\nHere's an example code in Python that demonstrates how to create a simple telemetry creator:\n\n```python\nimport os\nfrom openai.api import API\n\n# Initialize the OpenAI API client\napi = API(os.environ['OPENAI_API_KEY'])\n\ndef create_user():\n    # Create a new user entity\n    user_entity = {\n        'id': 'user-123',\n        'name': 'John Doe',\n        'email': 'john.doe@example.com'\n    }\n    \n    # Send the user creation request to OpenAI\n    response = api.users.create(user_entity)\n    print(f\"User created: {response}\")\n\ndef create_transaction():\n    # Create a new transaction entity\n    transaction_entity = {\n        'id': 'tran-123',\n        'user_id': 'user-123',\n        'transaction_type': 'query'\n    }\n    \n    # Send the transaction creation request to OpenAI\n    response = api.transactions.create(transaction_entity)\n    print(f\"Transaction created: {response}\")\n\ndef send_telemetry_data():\n    # Create a new telemetry event entity\n    telemetry_event_entity = {\n        'id': 'telem-123',\n        'transaction_id': 'tran-123',\n        'data': '{ \"event\": \"test\", \"user_id\": 1 }'\n    }\n    \n    # Send the telemetry data to OpenAI\n    response = api.telemetry.create(telemetry_event_entity)\n    print(f\"Telemetry event sent: {response}\")\n\n# Test the telemetry creation\ncreate_user()\ncreate_transaction()\nsend_telemetry_data()\n```\n\nMake sure you replace `OPENAI_API_KEY` with your actual API key. Also, ensure that you have the OpenAI API client library installed by running `pip install openai`.\n\nOnce you've created the test code, run it and observe the behavior of the telemetry creation process.\n\nPlease let me know if you need further modifications or assistance!",
              "refusal": null,
              "role": "assistant",
              "annotations": null,
              "audio": null,
              "function_call": null,
              "tool_calls": null
            }
          }
        ],
        "created": 0,
        "model": "llama3.2:3b-instruct-fp16",
        "object": "chat.completion",
        "service_tier": null,
        "system_fingerprint": "fp_ollama",
        "usage": {
          "completion_tokens": 460,
          "prompt_tokens": 30,
          "total_tokens": 490,
          "completion_tokens_details": null,
          "prompt_tokens_details": null
        }
      }
    },
    "is_streaming": false
  }
 }
--- a/tests/integration/telemetry/recordings/1fcfd86d8111374dc852cfdea6bfdb6a511f92cee84a6325b04ae84878512c30.json
+++ b/tests/integration/telemetry/recordings/1fcfd86d8111374dc852cfdea6bfdb6a511f92cee84a6325b04ae84878512c30.json
@ -0,0 +1,59 @@
 {
  "test_id": "tests/integration/telemetry/test_completions.py::test_telemetry_format_completeness[txt=ollama/llama3.2:3b-instruct-fp16]",
  "request": {
    "method": "POST",
    "url": "http://0.0.0.0:11434/v1/v1/chat/completions",
    "headers": {},
    "body": {
      "model": "llama3.2:3b-instruct-fp16",
      "messages": [
        {
          "role": "user",
          "content": "Test trace openai with temperature 0.7"
        }
      ],
      "max_tokens": 100,
      "stream": false,
      "temperature": 0.7
    },
    "endpoint": "/v1/chat/completions",
    "model": "llama3.2:3b-instruct-fp16"
  },
  "response": {
    "body": {
      "__type__": "openai.types.chat.chat_completion.ChatCompletion",
      "__data__": {
        "id": "rec-1fcfd86d8111",
        "choices": [
          {
            "finish_reason": "length",
            "index": 0,
            "logprobs": null,
            "message": {
              "content": "import torch\nfrom transformers import AutoModelForCausalLM, AutoTokenizer\n\n# Load the pre-trained model and tokenizer\nmodel_name = \"CompVis/transformers-base-uncased\"\nmodel = AutoModelForCausalLM.from_pretrained(model_name)\ntokenizer = AutoTokenizer.from_pretrained(model_name)\n\n# Set the temperature to 0.7\ntemperature = 0.7\n\n# Define a function to generate text\ndef generate_text(prompt, max_length=100):\n    input",
              "refusal": null,
              "role": "assistant",
              "annotations": null,
              "audio": null,
              "function_call": null,
              "tool_calls": null
            }
          }
        ],
        "created": 0,
        "model": "llama3.2:3b-instruct-fp16",
        "object": "chat.completion",
        "service_tier": null,
        "system_fingerprint": "fp_ollama",
        "usage": {
          "completion_tokens": 100,
          "prompt_tokens": 35,
          "total_tokens": 135,
          "completion_tokens_details": null,
          "prompt_tokens_details": null
        }
      }
    },
    "is_streaming": false
  }
 }
--- a/tests/integration/telemetry/recordings/d45c9a9229e7e3f50a6eac139508babe21988649eb321b562f74061f58593c25.json
+++ b/tests/integration/telemetry/recordings/d45c9a9229e7e3f50a6eac139508babe21988649eb321b562f74061f58593c25.json
--- a/tests/integration/telemetry/recordings/db8ffad4840512348c215005128557807ffbed0cf6bf11a52c1d1009878886ef.json
+++ b/tests/integration/telemetry/recordings/db8ffad4840512348c215005128557807ffbed0cf6bf11a52c1d1009878886ef.json
--- a/tests/integration/telemetry/recordings/dba5042d6691c2fbc29f2172c0f175e235f2829cb1c3e49781dd2b1850e28775.json
+++ b/tests/integration/telemetry/recordings/dba5042d6691c2fbc29f2172c0f175e235f2829cb1c3e49781dd2b1850e28775.json
@ -0,0 +1,59 @@
 {
  "test_id": "tests/integration/telemetry/test_completions.py::test_telemetry_format_completeness[txt=llama3.2:3b-instruct-fp16]",
  "request": {
    "method": "POST",
    "url": "http://localhost:11434/v1/v1/chat/completions",
    "headers": {},
    "body": {
      "model": "llama3.2:3b-instruct-fp16",
      "messages": [
        {
          "role": "user",
          "content": "Test trace openai with temperature 0.7"
        }
      ],
      "max_tokens": 100,
      "stream": false,
      "temperature": 0.7
    },
    "endpoint": "/v1/chat/completions",
    "model": "llama3.2:3b-instruct-fp16"
  },
  "response": {
    "body": {
      "__type__": "openai.types.chat.chat_completion.ChatCompletion",
      "__data__": {
        "id": "rec-dba5042d6691",
        "choices": [
          {
            "finish_reason": "length",
            "index": 0,
            "logprobs": null,
            "message": {
              "content": "To test the \"trace\" functionality of OpenAI's GPT-4 model at a temperature of 0.7, you can follow these steps:\n\n1. First, make sure you have an account with OpenAI and have been granted access to their API.\n\n2. You will need to install the `transformers` library, which is the official library for working with Transformers models like GPT-4:\n\n   ```bash\npip install transformers\n```\n\n3. Next, import the necessary",
              "refusal": null,
              "role": "assistant",
              "annotations": null,
              "audio": null,
              "function_call": null,
              "tool_calls": null
            }
          }
        ],
        "created": 0,
        "model": "llama3.2:3b-instruct-fp16",
        "object": "chat.completion",
        "service_tier": null,
        "system_fingerprint": "fp_ollama",
        "usage": {
          "completion_tokens": 100,
          "prompt_tokens": 35,
          "total_tokens": 135,
          "completion_tokens_details": null,
          "prompt_tokens_details": null
        }
      }
    },
    "is_streaming": false
  }
 }
--- a/tests/integration/telemetry/test_completions.py
+++ b/tests/integration/telemetry/test_completions.py
@ -0,0 +1,112 @@
 # Copyright (c) Meta Platforms, Inc. and affiliates.
 # All rights reserved.
 #
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 """Telemetry tests verifying @trace_protocol decorator format using in-memory exporter."""
 import json
 import os
 import pytest
 pytestmark = pytest.mark.skipif(
    os.environ.get("LLAMA_STACK_TEST_STACK_CONFIG_TYPE") == "server",
    reason="In-memory telemetry tests only work in library_client mode (server mode runs in separate process)",
 )
 def test_streaming_chunk_count(mock_otlp_collector, llama_stack_client, text_model_id):
    """Verify streaming adds chunk_count and __type__=async_generator."""
    stream = llama_stack_client.chat.completions.create(
        model=text_model_id,
        messages=[{"role": "user", "content": "Test trace openai 1"}],
        stream=True,
    )
    chunks = list(stream)
    assert len(chunks) > 0
    spans = mock_otlp_collector.get_spans()
    assert len(spans) > 0
    chunk_count = None
    for span in spans:
        if span.attributes.get("__type__") == "async_generator":
            chunk_count = span.attributes.get("chunk_count")
            if chunk_count:
                chunk_count = int(chunk_count)
                break
    assert chunk_count is not None
    assert chunk_count == len(chunks)
 def test_telemetry_format_completeness(mock_otlp_collector, llama_stack_client, text_model_id):
    """Comprehensive validation of telemetry data format including spans and metrics."""
    response = llama_stack_client.chat.completions.create(
        model=text_model_id,
        messages=[{"role": "user", "content": "Test trace openai with temperature 0.7"}],
        temperature=0.7,
        max_tokens=100,
        stream=False,
    )
    # Handle both dict and Pydantic model for usage
    # This occurs due to the replay system returning a dict for usage, but the client returning a Pydantic model
    # TODO: Fix this by making the replay system return a Pydantic model for usage
    usage = response.usage if isinstance(response.usage, dict) else response.usage.model_dump()
    assert usage.get("prompt_tokens") and usage["prompt_tokens"] > 0
    assert usage.get("completion_tokens") and usage["completion_tokens"] > 0
    assert usage.get("total_tokens") and usage["total_tokens"] > 0
    # Verify spans
    spans = mock_otlp_collector.get_spans()
    assert len(spans) == 5
    # we only need this captured one time
    logged_model_id = None
    for span in spans:
        attrs = span.attributes
        assert attrs is not None
        # Root span is created manually by tracing middleware, not by @trace_protocol decorator
        is_root_span = attrs.get("__root__") is True
        if is_root_span:
            # Root spans have different attributes
            assert attrs.get("__location__") in ["library_client", "server"]
        else:
            # Non-root spans are created by @trace_protocol decorator
            assert attrs.get("__autotraced__")
            assert attrs.get("__class__") and attrs.get("__method__")
            assert attrs.get("__type__") in ["async", "sync", "async_generator"]
            args = json.loads(attrs["__args__"])
            if "model_id" in args:
                logged_model_id = args["model_id"]
    assert logged_model_id is not None
    assert logged_model_id == text_model_id
    # TODO: re-enable this once metrics get fixed
    """
    # Verify token usage metrics in response
    metrics = mock_otlp_collector.get_metrics()
    assert metrics
    for metric in metrics:
        assert metric.name in ["completion_tokens", "total_tokens", "prompt_tokens"]
        assert metric.unit == "tokens"
        assert metric.data.data_points and len(metric.data.data_points) == 1
        match metric.name:
            case "completion_tokens":
                assert metric.data.data_points[0].value == usage["completion_tokens"]
            case "total_tokens":
                assert metric.data.data_points[0].value == usage["total_tokens"]
            case "prompt_tokens":
                assert metric.data.data_points[0].value == usage["prompt_tokens"
    """