rework(telemetry): remove legacy telemetry api

2025-12-16 22:02:37 +00:00 · 2025-10-07 17:25:54 -04:00 · 2025-10-07 17:25:54 -04:00 · 8e29d0eb79
commit 8e29d0eb79
parent 0c843ec87f
13 changed files with 397 additions and 1645 deletions
--- a/tests/integration/telemetry/recordings/0de60cd6a6ec3dbfc4a7601e77be8083caf34f49147ad1c25efae1de3f0b25e5.json
+++ b/tests/integration/telemetry/recordings/0de60cd6a6ec3dbfc4a7601e77be8083caf34f49147ad1c25efae1de3f0b25e5.json
@ -1,56 +0,0 @@
-{
-  "request": {
-    "method": "POST",
-    "url": "http://localhost:11434/v1/v1/completions",
-    "headers": {},
-    "body": {
-      "model": "llama3.2:3b-instruct-fp16",
-      "messages": [
-        {
-          "role": "user",
-          "content": "Test OpenAI telemetry creation"
-        }
-      ],
-      "stream": false
-    },
-    "endpoint": "/v1/completions",
-    "model": "llama3.2:3b-instruct-fp16"
-  },
-  "response": {
-    "body": {
-      "__type__": "openai.types.chat.chat_completion.ChatCompletion",
-      "__data__": {
-        "id": "rec-67198cbad48f",
-        "choices": [
-          {
-            "finish_reason": "stop",
-            "index": 0,
-            "logprobs": null,
-            "message": {
-              "content": "import openai\n\n# You can replace this with your own API key\nAPI_KEY = \"your_openai_api_key\"\n\n# Create an OpenAI instance\nopenai_client = openai.Client(api_key=API_KEY)\n\n# Test the telemetry endpoint by creating a new telemetry instance\ntelemetry = openai_client.create_telemetry()\n\nprint(telemetry)",
-              "refusal": null,
-              "role": "assistant",
-              "annotations": null,
-              "audio": null,
-              "function_call": null,
-              "tool_calls": null
-            }
-          }
-        ],
-        "created": 0,
-        "model": "llama3.2:3b-instruct-fp16",
-        "object": "chat.completion",
-        "service_tier": null,
-        "system_fingerprint": "fp_ollama",
-        "usage": {
-          "completion_tokens": 72,
-          "prompt_tokens": 30,
-          "total_tokens": 102,
-          "completion_tokens_details": null,
-          "prompt_tokens_details": null
-        }
-      }
-    },
-    "is_streaming": false
-  }
-}
--- a/tests/integration/telemetry/test_openai_telemetry.py
+++ b/tests/integration/telemetry/test_openai_telemetry.py
@ -1,194 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-
-import time
-from datetime import UTC, datetime
-
-import pytest
-
-
-@pytest.fixture(scope="module", autouse=True)
-def setup_openai_telemetry_data(llama_stack_client, text_model_id):
-    """Setup fixture that creates telemetry data specifically for OpenAI completions testing."""
-
-    # Create OpenAI completion traces
-    for i in range(3):
-        llama_stack_client.chat.completions.create(
-            model=text_model_id,
-            messages=[
-                {
-                    "role": "user",
-                    "content": f"Test trace openai {i}",
-                }
-            ],
-            # stream=False to always capture Metrics.
-            stream=False,
-        )
-
-    # Create additional OpenAI completion traces with different parameters
-    for i in range(2):
-        llama_stack_client.chat.completions.create(
-            model=text_model_id,
-            messages=[
-                {
-                    "role": "user",
-                    "content": f"Test trace openai with temperature {i}",
-                }
-            ],
-            temperature=0.7,
-            max_tokens=100,
-            stream=False,
-        )
-
-    start_time = time.time()
-
-    while time.time() - start_time < 30:
-        traces = llama_stack_client.telemetry.query_traces(limit=10)
-        if len(traces) >= 5:  # 5 OpenAI completion traces
-            break
-        time.sleep(0.1)
-
-    if len(traces) < 5:
-        pytest.fail(
-            f"Failed to create sufficient OpenAI completion telemetry data after 30s. Got {len(traces)} traces."
-        )
-
-    yield
-
-
-def test_openai_traces_basic(llama_stack_client):
-    """Test basic trace querying functionality for OpenAI completions."""
-    all_traces = llama_stack_client.telemetry.query_traces(limit=10)
-
-    assert isinstance(all_traces, list), "Should return a list of traces"
-    assert len(all_traces) >= 5, "Should have at least 5 traces from OpenAI setup"
-
-    # Verify trace structure and data quality
-    first_trace = all_traces[0]
-    assert hasattr(first_trace, "trace_id"), "Trace should have trace_id"
-    assert hasattr(first_trace, "start_time"), "Trace should have start_time"
-    assert hasattr(first_trace, "root_span_id"), "Trace should have root_span_id"
-
-    # Validate trace_id is a valid UUID format
-    assert isinstance(first_trace.trace_id, str) and len(first_trace.trace_id) > 0, (
-        "trace_id should be non-empty string"
-    )
-
-    # Validate start_time format and not in the future
-    now = datetime.now(UTC)
-    if isinstance(first_trace.start_time, str):
-        trace_time = datetime.fromisoformat(first_trace.start_time.replace("Z", "+00:00"))
-    else:
-        # start_time is already a datetime object
-        trace_time = first_trace.start_time
-        if trace_time.tzinfo is None:
-            trace_time = trace_time.replace(tzinfo=UTC)
-
-    # Ensure trace time is not in the future
-    time_diff = (now - trace_time).total_seconds()
-    assert time_diff >= 0, f"Trace start_time should not be in the future, got {time_diff}s"
-
-    # Validate root_span_id exists and is non-empty
-    assert isinstance(first_trace.root_span_id, str) and len(first_trace.root_span_id) > 0, (
-        "root_span_id should be non-empty string"
-    )
-
-    # Test querying specific trace by ID
-    specific_trace = llama_stack_client.telemetry.get_trace(trace_id=first_trace.trace_id)
-    assert specific_trace.trace_id == first_trace.trace_id, "Retrieved trace should match requested ID"
-    assert specific_trace.start_time == first_trace.start_time, "Retrieved trace should have same start_time"
-    assert specific_trace.root_span_id == first_trace.root_span_id, "Retrieved trace should have same root_span_id"
-
-    # Test pagination with proper validation
-    recent_traces = llama_stack_client.telemetry.query_traces(limit=3, offset=0)
-    assert len(recent_traces) <= 3, "Should return at most 3 traces when limit=3"
-    assert len(recent_traces) >= 1, "Should return at least 1 trace"
-
-    # Verify all traces have required fields
-    for trace in recent_traces:
-        assert hasattr(trace, "trace_id") and trace.trace_id, "Each trace should have non-empty trace_id"
-        assert hasattr(trace, "start_time") and trace.start_time, "Each trace should have non-empty start_time"
-        assert hasattr(trace, "root_span_id") and trace.root_span_id, "Each trace should have non-empty root_span_id"
-
-
-def test_openai_spans_basic(llama_stack_client):
-    """Test basic span querying functionality for OpenAI completions."""
-    spans = llama_stack_client.telemetry.query_spans(attribute_filters=[], attributes_to_return=[])
-
-    assert isinstance(spans, list), "Should return a list of spans"
-    assert len(spans) >= 1, "Should have at least one span from OpenAI setup"
-
-    # Verify span structure and data quality
-    first_span = spans[0]
-    required_attrs = ["span_id", "name", "trace_id"]
-    for attr in required_attrs:
-        assert hasattr(first_span, attr), f"Span should have {attr} attribute"
-        assert getattr(first_span, attr), f"Span {attr} should not be empty"
-
-    # Validate span data types and content
-    assert isinstance(first_span.span_id, str) and len(first_span.span_id) > 0, "span_id should be non-empty string"
-    assert isinstance(first_span.name, str) and len(first_span.name) > 0, "span name should be non-empty string"
-    assert isinstance(first_span.trace_id, str) and len(first_span.trace_id) > 0, "trace_id should be non-empty string"
-
-    # Verify span belongs to a valid trace
-    all_traces = llama_stack_client.telemetry.query_traces(limit=10)
-    trace_ids = {t.trace_id for t in all_traces}
-    if first_span.trace_id in trace_ids:
-        trace = llama_stack_client.telemetry.get_trace(trace_id=first_span.trace_id)
-        assert trace is not None, "Should be able to retrieve trace for valid trace_id"
-        assert trace.trace_id == first_span.trace_id, "Trace ID should match span's trace_id"
-
-    # Test with span filtering and validate results
-    filtered_spans = llama_stack_client.telemetry.query_spans(
-        attribute_filters=[{"key": "name", "op": "eq", "value": first_span.name}],
-        attributes_to_return=["name", "span_id"],
-    )
-    assert isinstance(filtered_spans, list), "Should return a list with span name filter"
-
-    # Validate filtered spans if filtering works
-    if len(filtered_spans) > 0:
-        for span in filtered_spans:
-            assert hasattr(span, "name"), "Filtered spans should have name attribute"
-            assert hasattr(span, "span_id"), "Filtered spans should have span_id attribute"
-            assert span.name == first_span.name, "Filtered spans should match the filter criteria"
-            assert isinstance(span.span_id, str) and len(span.span_id) > 0, "Filtered span_id should be valid"
-
-    # Test that all spans have consistent structure
-    for span in spans:
-        for attr in required_attrs:
-            assert hasattr(span, attr) and getattr(span, attr), f"All spans should have non-empty {attr}"
-
-
-def test_openai_completion_creates_telemetry(llama_stack_client, text_model_id):
-    """Test that making OpenAI completion calls actually creates telemetry data."""
-
-    # Get initial trace count
-    initial_traces = llama_stack_client.telemetry.query_traces(limit=20)
-    initial_count = len(initial_traces)
-
-    # Make a new OpenAI completion call
-    response = llama_stack_client.chat.completions.create(
-        model=text_model_id,
-        messages=[{"role": "user", "content": "Test OpenAI telemetry creation"}],
-        stream=False,
-    )
-
-    # Verify we got a response
-    assert response is not None, "Should get a response from OpenAI completion"
-    assert hasattr(response, "choices"), "Response should have choices"
-    assert len(response.choices) > 0, "Response should have at least one choice"
-
-    # Wait for telemetry to be recorded
-    start_time = time.time()
-    while time.time() - start_time < 30:
-        final_traces = llama_stack_client.telemetry.query_traces(limit=20)
-        final_count = len(final_traces)
-        if final_count > initial_count:
-            break
-        time.sleep(0.1)
-
-    # Should have at least as many traces as before (might have more due to other activity)
-    assert final_count >= initial_count, "Should have at least as many traces after OpenAI call"
--- a/tests/integration/telemetry/test_telemetry.py
+++ b/tests/integration/telemetry/test_telemetry.py
@ -1,187 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-
-import time
-from datetime import UTC, datetime
-from uuid import uuid4
-
-import pytest
-from llama_stack_client import Agent
-
-
-@pytest.fixture(scope="module", autouse=True)
-def setup_telemetry_data(llama_stack_client, text_model_id):
-    """Setup fixture that creates telemetry data before tests run."""
-    agent = Agent(llama_stack_client, model=text_model_id, instructions="You are a helpful assistant")
-
-    session_id = agent.create_session(f"test-setup-session-{uuid4()}")
-
-    messages = [
-        "What is 2 + 2?",
-        "Tell me a short joke",
-    ]
-
-    for msg in messages:
-        agent.create_turn(
-            messages=[{"role": "user", "content": msg}],
-            session_id=session_id,
-            stream=False,
-        )
-
-    for i in range(2):
-        llama_stack_client.chat.completions.create(
-            model=text_model_id, messages=[{"role": "user", "content": f"Test trace {i}"}]
-        )
-
-    start_time = time.time()
-
-    while time.time() - start_time < 30:
-        traces = llama_stack_client.telemetry.query_traces(limit=10)
-        if len(traces) >= 4:
-            break
-        time.sleep(0.1)
-
-    if len(traces) < 4:
-        pytest.fail(f"Failed to create sufficient telemetry data after 30s. Got {len(traces)} traces.")
-
-    yield
-
-
-def test_query_traces_basic(llama_stack_client):
-    """Test basic trace querying functionality with proper data validation."""
-    all_traces = llama_stack_client.telemetry.query_traces(limit=5)
-
-    assert isinstance(all_traces, list), "Should return a list of traces"
-    assert len(all_traces) >= 4, "Should have at least 4 traces from setup"
-
-    # Verify trace structure and data quality
-    first_trace = all_traces[0]
-    assert hasattr(first_trace, "trace_id"), "Trace should have trace_id"
-    assert hasattr(first_trace, "start_time"), "Trace should have start_time"
-    assert hasattr(first_trace, "root_span_id"), "Trace should have root_span_id"
-
-    # Validate trace_id is a valid UUID format
-    assert isinstance(first_trace.trace_id, str) and len(first_trace.trace_id) > 0, (
-        "trace_id should be non-empty string"
-    )
-
-    # Validate start_time format and not in the future
-    now = datetime.now(UTC)
-    if isinstance(first_trace.start_time, str):
-        trace_time = datetime.fromisoformat(first_trace.start_time.replace("Z", "+00:00"))
-    else:
-        # start_time is already a datetime object
-        trace_time = first_trace.start_time
-        if trace_time.tzinfo is None:
-            trace_time = trace_time.replace(tzinfo=UTC)
-
-    # Ensure trace time is not in the future (but allow any age in the past for persistent test data)
-    time_diff = (now - trace_time).total_seconds()
-    assert time_diff >= 0, f"Trace start_time should not be in the future, got {time_diff}s"
-
-    # Validate root_span_id exists and is non-empty
-    assert isinstance(first_trace.root_span_id, str) and len(first_trace.root_span_id) > 0, (
-        "root_span_id should be non-empty string"
-    )
-
-    # Test querying specific trace by ID
-    specific_trace = llama_stack_client.telemetry.get_trace(trace_id=first_trace.trace_id)
-    assert specific_trace.trace_id == first_trace.trace_id, "Retrieved trace should match requested ID"
-    assert specific_trace.start_time == first_trace.start_time, "Retrieved trace should have same start_time"
-    assert specific_trace.root_span_id == first_trace.root_span_id, "Retrieved trace should have same root_span_id"
-
-    # Test pagination with proper validation
-    recent_traces = llama_stack_client.telemetry.query_traces(limit=3, offset=0)
-    assert len(recent_traces) <= 3, "Should return at most 3 traces when limit=3"
-    assert len(recent_traces) >= 1, "Should return at least 1 trace"
-
-    # Verify all traces have required fields
-    for trace in recent_traces:
-        assert hasattr(trace, "trace_id") and trace.trace_id, "Each trace should have non-empty trace_id"
-        assert hasattr(trace, "start_time") and trace.start_time, "Each trace should have non-empty start_time"
-        assert hasattr(trace, "root_span_id") and trace.root_span_id, "Each trace should have non-empty root_span_id"
-
-
-def test_query_spans_basic(llama_stack_client):
-    """Test basic span querying functionality with proper validation."""
-    spans = llama_stack_client.telemetry.query_spans(attribute_filters=[], attributes_to_return=[])
-
-    assert isinstance(spans, list), "Should return a list of spans"
-    assert len(spans) >= 1, "Should have at least one span from setup"
-
-    # Verify span structure and data quality
-    first_span = spans[0]
-    required_attrs = ["span_id", "name", "trace_id"]
-    for attr in required_attrs:
-        assert hasattr(first_span, attr), f"Span should have {attr} attribute"
-        assert getattr(first_span, attr), f"Span {attr} should not be empty"
-
-    # Validate span data types and content
-    assert isinstance(first_span.span_id, str) and len(first_span.span_id) > 0, "span_id should be non-empty string"
-    assert isinstance(first_span.name, str) and len(first_span.name) > 0, "span name should be non-empty string"
-    assert isinstance(first_span.trace_id, str) and len(first_span.trace_id) > 0, "trace_id should be non-empty string"
-
-    # Verify span belongs to a valid trace (test with traces we know exist)
-    all_traces = llama_stack_client.telemetry.query_traces(limit=10)
-    trace_ids = {t.trace_id for t in all_traces}
-    if first_span.trace_id in trace_ids:
-        trace = llama_stack_client.telemetry.get_trace(trace_id=first_span.trace_id)
-        assert trace is not None, "Should be able to retrieve trace for valid trace_id"
-        assert trace.trace_id == first_span.trace_id, "Trace ID should match span's trace_id"
-
-    # Test with span filtering and validate results
-    filtered_spans = llama_stack_client.telemetry.query_spans(
-        attribute_filters=[{"key": "name", "op": "eq", "value": first_span.name}],
-        attributes_to_return=["name", "span_id"],
-    )
-    assert isinstance(filtered_spans, list), "Should return a list with span name filter"
-
-    # Validate filtered spans if filtering works
-    if len(filtered_spans) > 0:
-        for span in filtered_spans:
-            assert hasattr(span, "name"), "Filtered spans should have name attribute"
-            assert hasattr(span, "span_id"), "Filtered spans should have span_id attribute"
-            assert span.name == first_span.name, "Filtered spans should match the filter criteria"
-            assert isinstance(span.span_id, str) and len(span.span_id) > 0, "Filtered span_id should be valid"
-
-    # Test that all spans have consistent structure
-    for span in spans:
-        for attr in required_attrs:
-            assert hasattr(span, attr) and getattr(span, attr), f"All spans should have non-empty {attr}"
-
-
-def test_telemetry_pagination(llama_stack_client):
-    """Test pagination in telemetry queries."""
-    # Get total count of traces
-    all_traces = llama_stack_client.telemetry.query_traces(limit=20)
-    total_count = len(all_traces)
-    assert total_count >= 4, "Should have at least 4 traces from setup"
-
-    # Test trace pagination
-    page1 = llama_stack_client.telemetry.query_traces(limit=2, offset=0)
-    page2 = llama_stack_client.telemetry.query_traces(limit=2, offset=2)
-
-    assert len(page1) == 2, "First page should have exactly 2 traces"
-    assert len(page2) >= 1, "Second page should have at least 1 trace"
-
-    # Verify no overlap between pages
-    page1_ids = {t.trace_id for t in page1}
-    page2_ids = {t.trace_id for t in page2}
-    assert len(page1_ids.intersection(page2_ids)) == 0, "Pages should contain different traces"
-
-    # Test ordering
-    ordered_traces = llama_stack_client.telemetry.query_traces(limit=5, order_by=["start_time"])
-    assert len(ordered_traces) >= 4, "Should have at least 4 traces for ordering test"
-
-    # Verify ordering by start_time
-    for i in range(len(ordered_traces) - 1):
-        current_time = ordered_traces[i].start_time
-        next_time = ordered_traces[i + 1].start_time
-        assert current_time <= next_time, f"Traces should be ordered by start_time: {current_time} > {next_time}"
-
-    # Test limit behavior
-    limited = llama_stack_client.telemetry.query_traces(limit=3)
-    assert len(limited) == 3, "Should return exactly 3 traces when limit=3"
--- a/tests/integration/telemetry/test_telemetry_metrics.py
+++ b/tests/integration/telemetry/test_telemetry_metrics.py
@ -1,206 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-
-import time
-from datetime import UTC, datetime, timedelta
-
-import pytest
-
-
-@pytest.fixture(scope="module", autouse=True)
-def setup_telemetry_metrics_data(openai_client, client_with_models, text_model_id):
-    """Setup fixture that creates telemetry metrics data before tests run."""
-
-    # Skip OpenAI tests if running in library mode
-    if not hasattr(client_with_models, "base_url"):
-        pytest.skip("OpenAI client tests not supported with library client")
-
-    prompt_tokens = []
-    completion_tokens = []
-    total_tokens = []
-
-    # Create OpenAI completions to generate metrics using the proper OpenAI client
-    for i in range(5):
-        response = openai_client.chat.completions.create(
-            model=text_model_id,
-            messages=[{"role": "user", "content": f"OpenAI test {i}"}],
-            stream=False,
-        )
-        prompt_tokens.append(response.usage.prompt_tokens)
-        completion_tokens.append(response.usage.completion_tokens)
-        total_tokens.append(response.usage.total_tokens)
-
-    # Wait for metrics to be logged
-    start_time = time.time()
-    while time.time() - start_time < 30:
-        try:
-            # Try to query metrics to see if they're available
-            metrics_response = client_with_models.telemetry.query_metrics(
-                metric_name="completion_tokens",
-                start_time=int((datetime.now(UTC) - timedelta(minutes=5)).timestamp()),
-            )
-            if len(metrics_response[0].values) > 0:
-                break
-        except Exception:
-            pass
-        time.sleep(0.1)
-
-    # Return the token lists for use in tests
-    return {"prompt_tokens": prompt_tokens, "completion_tokens": completion_tokens, "total_tokens": total_tokens}
-
-
-@pytest.mark.skip(reason="Skipping this test until client is regenerated")
-def test_query_metrics_prompt_tokens(client_with_models, text_model_id, setup_telemetry_metrics_data):
-    """Test that prompt_tokens metrics are queryable."""
-    start_time = int((datetime.now(UTC) - timedelta(minutes=10)).timestamp())
-
-    response = client_with_models.telemetry.query_metrics(
-        metric_name="prompt_tokens",
-        start_time=start_time,
-    )
-
-    assert isinstance(response, list)
-
-    assert isinstance(response[0].values, list), "Should return a list of metric series"
-
-    assert response[0].metric == "prompt_tokens"
-
-    # Use the actual values from setup instead of hardcoded values
-    expected_values = setup_telemetry_metrics_data["prompt_tokens"]
-    assert response[0].values[-1].value in expected_values, (
-        f"Expected one of {expected_values}, got {response[0].values[-1].value}"
-    )
-
-
-@pytest.mark.skip(reason="Skipping this test until client is regenerated")
-def test_query_metrics_completion_tokens(client_with_models, text_model_id, setup_telemetry_metrics_data):
-    """Test that completion_tokens metrics are queryable."""
-    start_time = int((datetime.now(UTC) - timedelta(minutes=10)).timestamp())
-
-    response = client_with_models.telemetry.query_metrics(
-        metric_name="completion_tokens",
-        start_time=start_time,
-    )
-
-    assert isinstance(response, list)
-
-    assert isinstance(response[0].values, list), "Should return a list of metric series"
-
-    assert response[0].metric == "completion_tokens"
-
-    # Use the actual values from setup instead of hardcoded values
-    expected_values = setup_telemetry_metrics_data["completion_tokens"]
-    assert response[0].values[-1].value in expected_values, (
-        f"Expected one of {expected_values}, got {response[0].values[-1].value}"
-    )
-
-
-@pytest.mark.skip(reason="Skipping this test until client is regenerated")
-def test_query_metrics_total_tokens(client_with_models, text_model_id, setup_telemetry_metrics_data):
-    """Test that total_tokens metrics are queryable."""
-    start_time = int((datetime.now(UTC) - timedelta(minutes=10)).timestamp())
-
-    response = client_with_models.telemetry.query_metrics(
-        metric_name="total_tokens",
-        start_time=start_time,
-    )
-
-    assert isinstance(response, list)
-
-    assert isinstance(response[0].values, list), "Should return a list of metric series"
-
-    assert response[0].metric == "total_tokens"
-
-    # Use the actual values from setup instead of hardcoded values
-    expected_values = setup_telemetry_metrics_data["total_tokens"]
-    assert response[0].values[-1].value in expected_values, (
-        f"Expected one of {expected_values}, got {response[0].values[-1].value}"
-    )
-
-
-@pytest.mark.skip(reason="Skipping this test until client is regenerated")
-def test_query_metrics_with_time_range(llama_stack_client, text_model_id):
-    """Test that metrics are queryable with time range."""
-    end_time = int(datetime.now(UTC).timestamp())
-    start_time = end_time - 600  # 10 minutes ago
-
-    response = llama_stack_client.telemetry.query_metrics(
-        metric_name="prompt_tokens",
-        start_time=start_time,
-        end_time=end_time,
-    )
-
-    assert isinstance(response, list)
-
-    assert isinstance(response[0].values, list), "Should return a list of metric series"
-
-    assert response[0].metric == "prompt_tokens"
-
-
-@pytest.mark.skip(reason="Skipping this test until client is regenerated")
-def test_query_metrics_with_label_matchers(llama_stack_client, text_model_id):
-    """Test that metrics are queryable with label matchers."""
-    start_time = int((datetime.now(UTC) - timedelta(minutes=10)).timestamp())
-
-    response = llama_stack_client.telemetry.query_metrics(
-        metric_name="prompt_tokens",
-        start_time=start_time,
-        label_matchers=[{"name": "model_id", "value": text_model_id, "operator": "="}],
-    )
-
-    assert isinstance(response[0].values, list), "Should return a list of metric series"
-
-
-@pytest.mark.skip(reason="Skipping this test until client is regenerated")
-def test_query_metrics_nonexistent_metric(llama_stack_client):
-    """Test that querying a nonexistent metric returns empty data."""
-    start_time = int((datetime.now(UTC) - timedelta(minutes=10)).timestamp())
-
-    response = llama_stack_client.telemetry.query_metrics(
-        metric_name="nonexistent_metric",
-        start_time=start_time,
-    )
-
-    assert isinstance(response, list), "Should return an empty list for nonexistent metric"
-    assert len(response) == 0
-
-
-@pytest.mark.skip(reason="Skipping this test until client is regenerated")
-def test_query_metrics_with_granularity(llama_stack_client, text_model_id):
-    """Test that metrics are queryable with different granularity levels."""
-    start_time = int((datetime.now(UTC) - timedelta(minutes=10)).timestamp())
-
-    # Test hourly granularity
-    hourly_response = llama_stack_client.telemetry.query_metrics(
-        metric_name="total_tokens",
-        start_time=start_time,
-        granularity="1h",
-    )
-
-    # Test daily granularity
-    daily_response = llama_stack_client.telemetry.query_metrics(
-        metric_name="total_tokens",
-        start_time=start_time,
-        granularity="1d",
-    )
-
-    # Test no granularity (raw data points)
-    raw_response = llama_stack_client.telemetry.query_metrics(
-        metric_name="total_tokens",
-        start_time=start_time,
-        granularity=None,
-    )
-
-    # All should return valid data
-    assert isinstance(hourly_response[0].values, list), "Hourly granularity should return data"
-    assert isinstance(daily_response[0].values, list), "Daily granularity should return data"
-    assert isinstance(raw_response[0].values, list), "No granularity should return data"
-
-    # Verify that different granularities produce different aggregation levels
-    # (The exact number depends on data distribution, but they should be queryable)
-    assert len(hourly_response[0].values) >= 0, "Hourly granularity should be queryable"
-    assert len(daily_response[0].values) >= 0, "Daily granularity should be queryable"
-    assert len(raw_response[0].values) >= 0, "No granularity should be queryable"