mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-10-12 13:57:57 +00:00
chore!: remove ALL telemetry APIs
# What does this PR do? ## Test Plan
This commit is contained in:
parent
548ccff368
commit
8fc91f97dc
13 changed files with 1875 additions and 7293 deletions
|
@ -1,57 +0,0 @@
|
|||
{
|
||||
"test_id": "tests/integration/telemetry/test_openai_telemetry.py::test_openai_completion_creates_telemetry[txt=ollama/llama3.2:3b-instruct-fp16]",
|
||||
"request": {
|
||||
"method": "POST",
|
||||
"url": "http://0.0.0.0:11434/v1/v1/chat/completions",
|
||||
"headers": {},
|
||||
"body": {
|
||||
"model": "llama3.2:3b-instruct-fp16",
|
||||
"messages": [
|
||||
{
|
||||
"role": "user",
|
||||
"content": "Test OpenAI telemetry creation"
|
||||
}
|
||||
],
|
||||
"stream": false
|
||||
},
|
||||
"endpoint": "/v1/chat/completions",
|
||||
"model": "llama3.2:3b-instruct-fp16"
|
||||
},
|
||||
"response": {
|
||||
"body": {
|
||||
"__type__": "openai.types.chat.chat_completion.ChatCompletion",
|
||||
"__data__": {
|
||||
"id": "rec-0de60cd6a6ec",
|
||||
"choices": [
|
||||
{
|
||||
"finish_reason": "stop",
|
||||
"index": 0,
|
||||
"logprobs": null,
|
||||
"message": {
|
||||
"content": "I'm happy to help you test this! This type of model is designed to provide information and answer questions, rather than engage in conversation or generate creative content. Feel free to ask me anything or test out any features you'd like. I'll do my best to assist you.\r\n\r\nIf you're looking for a way to engage with the model more creatively, there are several things you could try:\r\n\r\n1. Ask me a question on a topic unrelated to the ones I was trained on. This can give me a chance to demonstrate my ability to learn and generalize.\r\n2. Ask me to generate a piece of content - such as a short story or poem - on a specific topic. I can use patterns in the data I was trained on to generate text that is similar in style and structure to what you might find in a published book or article.\r\n3. Play a game with me. We could play games like \"20 Questions\" (where you think of an object, and I try to guess what it is by asking yes-or-no questions), or \"Hangman\". Let me know if there's something else you'd like to do!",
|
||||
"refusal": null,
|
||||
"role": "assistant",
|
||||
"annotations": null,
|
||||
"audio": null,
|
||||
"function_call": null,
|
||||
"tool_calls": null
|
||||
}
|
||||
}
|
||||
],
|
||||
"created": 0,
|
||||
"model": "llama3.2:3b-instruct-fp16",
|
||||
"object": "chat.completion",
|
||||
"service_tier": null,
|
||||
"system_fingerprint": "fp_ollama",
|
||||
"usage": {
|
||||
"completion_tokens": 229,
|
||||
"prompt_tokens": 30,
|
||||
"total_tokens": 259,
|
||||
"completion_tokens_details": null,
|
||||
"prompt_tokens_details": null
|
||||
}
|
||||
}
|
||||
},
|
||||
"is_streaming": false
|
||||
}
|
||||
}
|
|
@ -1,194 +0,0 @@
|
|||
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
# All rights reserved.
|
||||
#
|
||||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
||||
import time
|
||||
from datetime import UTC, datetime
|
||||
|
||||
import pytest
|
||||
|
||||
|
||||
@pytest.fixture(scope="module", autouse=True)
|
||||
def setup_openai_telemetry_data(llama_stack_client, text_model_id):
|
||||
"""Setup fixture that creates telemetry data specifically for OpenAI completions testing."""
|
||||
|
||||
# Create OpenAI completion traces
|
||||
for i in range(3):
|
||||
llama_stack_client.chat.completions.create(
|
||||
model=text_model_id,
|
||||
messages=[
|
||||
{
|
||||
"role": "user",
|
||||
"content": f"Test trace openai {i}",
|
||||
}
|
||||
],
|
||||
# stream=False to always capture Metrics.
|
||||
stream=False,
|
||||
)
|
||||
|
||||
# Create additional OpenAI completion traces with different parameters
|
||||
for i in range(2):
|
||||
llama_stack_client.chat.completions.create(
|
||||
model=text_model_id,
|
||||
messages=[
|
||||
{
|
||||
"role": "user",
|
||||
"content": f"Test trace openai with temperature {i}",
|
||||
}
|
||||
],
|
||||
temperature=0.7,
|
||||
max_tokens=100,
|
||||
stream=False,
|
||||
)
|
||||
|
||||
start_time = time.time()
|
||||
|
||||
while time.time() - start_time < 30:
|
||||
traces = llama_stack_client.telemetry.query_traces(limit=10)
|
||||
if len(traces) >= 5: # 5 OpenAI completion traces
|
||||
break
|
||||
time.sleep(0.1)
|
||||
|
||||
if len(traces) < 5:
|
||||
pytest.fail(
|
||||
f"Failed to create sufficient OpenAI completion telemetry data after 30s. Got {len(traces)} traces."
|
||||
)
|
||||
|
||||
yield
|
||||
|
||||
|
||||
def test_openai_traces_basic(llama_stack_client):
|
||||
"""Test basic trace querying functionality for OpenAI completions."""
|
||||
all_traces = llama_stack_client.telemetry.query_traces(limit=10)
|
||||
|
||||
assert isinstance(all_traces, list), "Should return a list of traces"
|
||||
assert len(all_traces) >= 5, "Should have at least 5 traces from OpenAI setup"
|
||||
|
||||
# Verify trace structure and data quality
|
||||
first_trace = all_traces[0]
|
||||
assert hasattr(first_trace, "trace_id"), "Trace should have trace_id"
|
||||
assert hasattr(first_trace, "start_time"), "Trace should have start_time"
|
||||
assert hasattr(first_trace, "root_span_id"), "Trace should have root_span_id"
|
||||
|
||||
# Validate trace_id is a valid UUID format
|
||||
assert isinstance(first_trace.trace_id, str) and len(first_trace.trace_id) > 0, (
|
||||
"trace_id should be non-empty string"
|
||||
)
|
||||
|
||||
# Validate start_time format and not in the future
|
||||
now = datetime.now(UTC)
|
||||
if isinstance(first_trace.start_time, str):
|
||||
trace_time = datetime.fromisoformat(first_trace.start_time.replace("Z", "+00:00"))
|
||||
else:
|
||||
# start_time is already a datetime object
|
||||
trace_time = first_trace.start_time
|
||||
if trace_time.tzinfo is None:
|
||||
trace_time = trace_time.replace(tzinfo=UTC)
|
||||
|
||||
# Ensure trace time is not in the future
|
||||
time_diff = (now - trace_time).total_seconds()
|
||||
assert time_diff >= 0, f"Trace start_time should not be in the future, got {time_diff}s"
|
||||
|
||||
# Validate root_span_id exists and is non-empty
|
||||
assert isinstance(first_trace.root_span_id, str) and len(first_trace.root_span_id) > 0, (
|
||||
"root_span_id should be non-empty string"
|
||||
)
|
||||
|
||||
# Test querying specific trace by ID
|
||||
specific_trace = llama_stack_client.telemetry.get_trace(trace_id=first_trace.trace_id)
|
||||
assert specific_trace.trace_id == first_trace.trace_id, "Retrieved trace should match requested ID"
|
||||
assert specific_trace.start_time == first_trace.start_time, "Retrieved trace should have same start_time"
|
||||
assert specific_trace.root_span_id == first_trace.root_span_id, "Retrieved trace should have same root_span_id"
|
||||
|
||||
# Test pagination with proper validation
|
||||
recent_traces = llama_stack_client.telemetry.query_traces(limit=3, offset=0)
|
||||
assert len(recent_traces) <= 3, "Should return at most 3 traces when limit=3"
|
||||
assert len(recent_traces) >= 1, "Should return at least 1 trace"
|
||||
|
||||
# Verify all traces have required fields
|
||||
for trace in recent_traces:
|
||||
assert hasattr(trace, "trace_id") and trace.trace_id, "Each trace should have non-empty trace_id"
|
||||
assert hasattr(trace, "start_time") and trace.start_time, "Each trace should have non-empty start_time"
|
||||
assert hasattr(trace, "root_span_id") and trace.root_span_id, "Each trace should have non-empty root_span_id"
|
||||
|
||||
|
||||
def test_openai_spans_basic(llama_stack_client):
|
||||
"""Test basic span querying functionality for OpenAI completions."""
|
||||
spans = llama_stack_client.telemetry.query_spans(attribute_filters=[], attributes_to_return=[])
|
||||
|
||||
assert isinstance(spans, list), "Should return a list of spans"
|
||||
assert len(spans) >= 1, "Should have at least one span from OpenAI setup"
|
||||
|
||||
# Verify span structure and data quality
|
||||
first_span = spans[0]
|
||||
required_attrs = ["span_id", "name", "trace_id"]
|
||||
for attr in required_attrs:
|
||||
assert hasattr(first_span, attr), f"Span should have {attr} attribute"
|
||||
assert getattr(first_span, attr), f"Span {attr} should not be empty"
|
||||
|
||||
# Validate span data types and content
|
||||
assert isinstance(first_span.span_id, str) and len(first_span.span_id) > 0, "span_id should be non-empty string"
|
||||
assert isinstance(first_span.name, str) and len(first_span.name) > 0, "span name should be non-empty string"
|
||||
assert isinstance(first_span.trace_id, str) and len(first_span.trace_id) > 0, "trace_id should be non-empty string"
|
||||
|
||||
# Verify span belongs to a valid trace
|
||||
all_traces = llama_stack_client.telemetry.query_traces(limit=10)
|
||||
trace_ids = {t.trace_id for t in all_traces}
|
||||
if first_span.trace_id in trace_ids:
|
||||
trace = llama_stack_client.telemetry.get_trace(trace_id=first_span.trace_id)
|
||||
assert trace is not None, "Should be able to retrieve trace for valid trace_id"
|
||||
assert trace.trace_id == first_span.trace_id, "Trace ID should match span's trace_id"
|
||||
|
||||
# Test with span filtering and validate results
|
||||
filtered_spans = llama_stack_client.telemetry.query_spans(
|
||||
attribute_filters=[{"key": "name", "op": "eq", "value": first_span.name}],
|
||||
attributes_to_return=["name", "span_id"],
|
||||
)
|
||||
assert isinstance(filtered_spans, list), "Should return a list with span name filter"
|
||||
|
||||
# Validate filtered spans if filtering works
|
||||
if len(filtered_spans) > 0:
|
||||
for span in filtered_spans:
|
||||
assert hasattr(span, "name"), "Filtered spans should have name attribute"
|
||||
assert hasattr(span, "span_id"), "Filtered spans should have span_id attribute"
|
||||
assert span.name == first_span.name, "Filtered spans should match the filter criteria"
|
||||
assert isinstance(span.span_id, str) and len(span.span_id) > 0, "Filtered span_id should be valid"
|
||||
|
||||
# Test that all spans have consistent structure
|
||||
for span in spans:
|
||||
for attr in required_attrs:
|
||||
assert hasattr(span, attr) and getattr(span, attr), f"All spans should have non-empty {attr}"
|
||||
|
||||
|
||||
def test_openai_completion_creates_telemetry(llama_stack_client, text_model_id):
|
||||
"""Test that making OpenAI completion calls actually creates telemetry data."""
|
||||
|
||||
# Get initial trace count
|
||||
initial_traces = llama_stack_client.telemetry.query_traces(limit=20)
|
||||
initial_count = len(initial_traces)
|
||||
|
||||
# Make a new OpenAI completion call
|
||||
response = llama_stack_client.chat.completions.create(
|
||||
model=text_model_id,
|
||||
messages=[{"role": "user", "content": "Test OpenAI telemetry creation"}],
|
||||
stream=False,
|
||||
)
|
||||
|
||||
# Verify we got a response
|
||||
assert response is not None, "Should get a response from OpenAI completion"
|
||||
assert hasattr(response, "choices"), "Response should have choices"
|
||||
assert len(response.choices) > 0, "Response should have at least one choice"
|
||||
|
||||
# Wait for telemetry to be recorded
|
||||
start_time = time.time()
|
||||
while time.time() - start_time < 30:
|
||||
final_traces = llama_stack_client.telemetry.query_traces(limit=20)
|
||||
final_count = len(final_traces)
|
||||
if final_count > initial_count:
|
||||
break
|
||||
time.sleep(0.1)
|
||||
|
||||
# Should have at least as many traces as before (might have more due to other activity)
|
||||
assert final_count >= initial_count, "Should have at least as many traces after OpenAI call"
|
|
@ -1,187 +0,0 @@
|
|||
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
# All rights reserved.
|
||||
#
|
||||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
||||
import time
|
||||
from datetime import UTC, datetime
|
||||
from uuid import uuid4
|
||||
|
||||
import pytest
|
||||
from llama_stack_client import Agent
|
||||
|
||||
|
||||
@pytest.fixture(scope="module", autouse=True)
|
||||
def setup_telemetry_data(llama_stack_client, text_model_id):
|
||||
"""Setup fixture that creates telemetry data before tests run."""
|
||||
agent = Agent(llama_stack_client, model=text_model_id, instructions="You are a helpful assistant")
|
||||
|
||||
session_id = agent.create_session(f"test-setup-session-{uuid4()}")
|
||||
|
||||
messages = [
|
||||
"What is 2 + 2?",
|
||||
"Tell me a short joke",
|
||||
]
|
||||
|
||||
for msg in messages:
|
||||
agent.create_turn(
|
||||
messages=[{"role": "user", "content": msg}],
|
||||
session_id=session_id,
|
||||
stream=False,
|
||||
)
|
||||
|
||||
for i in range(2):
|
||||
llama_stack_client.chat.completions.create(
|
||||
model=text_model_id, messages=[{"role": "user", "content": f"Test trace {i}"}]
|
||||
)
|
||||
|
||||
start_time = time.time()
|
||||
|
||||
while time.time() - start_time < 30:
|
||||
traces = llama_stack_client.telemetry.query_traces(limit=10)
|
||||
if len(traces) >= 4:
|
||||
break
|
||||
time.sleep(0.1)
|
||||
|
||||
if len(traces) < 4:
|
||||
pytest.fail(f"Failed to create sufficient telemetry data after 30s. Got {len(traces)} traces.")
|
||||
|
||||
yield
|
||||
|
||||
|
||||
def test_query_traces_basic(llama_stack_client):
|
||||
"""Test basic trace querying functionality with proper data validation."""
|
||||
all_traces = llama_stack_client.telemetry.query_traces(limit=5)
|
||||
|
||||
assert isinstance(all_traces, list), "Should return a list of traces"
|
||||
assert len(all_traces) >= 4, "Should have at least 4 traces from setup"
|
||||
|
||||
# Verify trace structure and data quality
|
||||
first_trace = all_traces[0]
|
||||
assert hasattr(first_trace, "trace_id"), "Trace should have trace_id"
|
||||
assert hasattr(first_trace, "start_time"), "Trace should have start_time"
|
||||
assert hasattr(first_trace, "root_span_id"), "Trace should have root_span_id"
|
||||
|
||||
# Validate trace_id is a valid UUID format
|
||||
assert isinstance(first_trace.trace_id, str) and len(first_trace.trace_id) > 0, (
|
||||
"trace_id should be non-empty string"
|
||||
)
|
||||
|
||||
# Validate start_time format and not in the future
|
||||
now = datetime.now(UTC)
|
||||
if isinstance(first_trace.start_time, str):
|
||||
trace_time = datetime.fromisoformat(first_trace.start_time.replace("Z", "+00:00"))
|
||||
else:
|
||||
# start_time is already a datetime object
|
||||
trace_time = first_trace.start_time
|
||||
if trace_time.tzinfo is None:
|
||||
trace_time = trace_time.replace(tzinfo=UTC)
|
||||
|
||||
# Ensure trace time is not in the future (but allow any age in the past for persistent test data)
|
||||
time_diff = (now - trace_time).total_seconds()
|
||||
assert time_diff >= 0, f"Trace start_time should not be in the future, got {time_diff}s"
|
||||
|
||||
# Validate root_span_id exists and is non-empty
|
||||
assert isinstance(first_trace.root_span_id, str) and len(first_trace.root_span_id) > 0, (
|
||||
"root_span_id should be non-empty string"
|
||||
)
|
||||
|
||||
# Test querying specific trace by ID
|
||||
specific_trace = llama_stack_client.telemetry.get_trace(trace_id=first_trace.trace_id)
|
||||
assert specific_trace.trace_id == first_trace.trace_id, "Retrieved trace should match requested ID"
|
||||
assert specific_trace.start_time == first_trace.start_time, "Retrieved trace should have same start_time"
|
||||
assert specific_trace.root_span_id == first_trace.root_span_id, "Retrieved trace should have same root_span_id"
|
||||
|
||||
# Test pagination with proper validation
|
||||
recent_traces = llama_stack_client.telemetry.query_traces(limit=3, offset=0)
|
||||
assert len(recent_traces) <= 3, "Should return at most 3 traces when limit=3"
|
||||
assert len(recent_traces) >= 1, "Should return at least 1 trace"
|
||||
|
||||
# Verify all traces have required fields
|
||||
for trace in recent_traces:
|
||||
assert hasattr(trace, "trace_id") and trace.trace_id, "Each trace should have non-empty trace_id"
|
||||
assert hasattr(trace, "start_time") and trace.start_time, "Each trace should have non-empty start_time"
|
||||
assert hasattr(trace, "root_span_id") and trace.root_span_id, "Each trace should have non-empty root_span_id"
|
||||
|
||||
|
||||
def test_query_spans_basic(llama_stack_client):
|
||||
"""Test basic span querying functionality with proper validation."""
|
||||
spans = llama_stack_client.telemetry.query_spans(attribute_filters=[], attributes_to_return=[])
|
||||
|
||||
assert isinstance(spans, list), "Should return a list of spans"
|
||||
assert len(spans) >= 1, "Should have at least one span from setup"
|
||||
|
||||
# Verify span structure and data quality
|
||||
first_span = spans[0]
|
||||
required_attrs = ["span_id", "name", "trace_id"]
|
||||
for attr in required_attrs:
|
||||
assert hasattr(first_span, attr), f"Span should have {attr} attribute"
|
||||
assert getattr(first_span, attr), f"Span {attr} should not be empty"
|
||||
|
||||
# Validate span data types and content
|
||||
assert isinstance(first_span.span_id, str) and len(first_span.span_id) > 0, "span_id should be non-empty string"
|
||||
assert isinstance(first_span.name, str) and len(first_span.name) > 0, "span name should be non-empty string"
|
||||
assert isinstance(first_span.trace_id, str) and len(first_span.trace_id) > 0, "trace_id should be non-empty string"
|
||||
|
||||
# Verify span belongs to a valid trace (test with traces we know exist)
|
||||
all_traces = llama_stack_client.telemetry.query_traces(limit=10)
|
||||
trace_ids = {t.trace_id for t in all_traces}
|
||||
if first_span.trace_id in trace_ids:
|
||||
trace = llama_stack_client.telemetry.get_trace(trace_id=first_span.trace_id)
|
||||
assert trace is not None, "Should be able to retrieve trace for valid trace_id"
|
||||
assert trace.trace_id == first_span.trace_id, "Trace ID should match span's trace_id"
|
||||
|
||||
# Test with span filtering and validate results
|
||||
filtered_spans = llama_stack_client.telemetry.query_spans(
|
||||
attribute_filters=[{"key": "name", "op": "eq", "value": first_span.name}],
|
||||
attributes_to_return=["name", "span_id"],
|
||||
)
|
||||
assert isinstance(filtered_spans, list), "Should return a list with span name filter"
|
||||
|
||||
# Validate filtered spans if filtering works
|
||||
if len(filtered_spans) > 0:
|
||||
for span in filtered_spans:
|
||||
assert hasattr(span, "name"), "Filtered spans should have name attribute"
|
||||
assert hasattr(span, "span_id"), "Filtered spans should have span_id attribute"
|
||||
assert span.name == first_span.name, "Filtered spans should match the filter criteria"
|
||||
assert isinstance(span.span_id, str) and len(span.span_id) > 0, "Filtered span_id should be valid"
|
||||
|
||||
# Test that all spans have consistent structure
|
||||
for span in spans:
|
||||
for attr in required_attrs:
|
||||
assert hasattr(span, attr) and getattr(span, attr), f"All spans should have non-empty {attr}"
|
||||
|
||||
|
||||
def test_telemetry_pagination(llama_stack_client):
|
||||
"""Test pagination in telemetry queries."""
|
||||
# Get total count of traces
|
||||
all_traces = llama_stack_client.telemetry.query_traces(limit=20)
|
||||
total_count = len(all_traces)
|
||||
assert total_count >= 4, "Should have at least 4 traces from setup"
|
||||
|
||||
# Test trace pagination
|
||||
page1 = llama_stack_client.telemetry.query_traces(limit=2, offset=0)
|
||||
page2 = llama_stack_client.telemetry.query_traces(limit=2, offset=2)
|
||||
|
||||
assert len(page1) == 2, "First page should have exactly 2 traces"
|
||||
assert len(page2) >= 1, "Second page should have at least 1 trace"
|
||||
|
||||
# Verify no overlap between pages
|
||||
page1_ids = {t.trace_id for t in page1}
|
||||
page2_ids = {t.trace_id for t in page2}
|
||||
assert len(page1_ids.intersection(page2_ids)) == 0, "Pages should contain different traces"
|
||||
|
||||
# Test ordering
|
||||
ordered_traces = llama_stack_client.telemetry.query_traces(limit=5, order_by=["start_time"])
|
||||
assert len(ordered_traces) >= 4, "Should have at least 4 traces for ordering test"
|
||||
|
||||
# Verify ordering by start_time
|
||||
for i in range(len(ordered_traces) - 1):
|
||||
current_time = ordered_traces[i].start_time
|
||||
next_time = ordered_traces[i + 1].start_time
|
||||
assert current_time <= next_time, f"Traces should be ordered by start_time: {current_time} > {next_time}"
|
||||
|
||||
# Test limit behavior
|
||||
limited = llama_stack_client.telemetry.query_traces(limit=3)
|
||||
assert len(limited) == 3, "Should return exactly 3 traces when limit=3"
|
|
@ -1,206 +0,0 @@
|
|||
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
# All rights reserved.
|
||||
#
|
||||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
||||
import time
|
||||
from datetime import UTC, datetime, timedelta
|
||||
|
||||
import pytest
|
||||
|
||||
|
||||
@pytest.fixture(scope="module", autouse=True)
|
||||
def setup_telemetry_metrics_data(openai_client, client_with_models, text_model_id):
|
||||
"""Setup fixture that creates telemetry metrics data before tests run."""
|
||||
|
||||
# Skip OpenAI tests if running in library mode
|
||||
if not hasattr(client_with_models, "base_url"):
|
||||
pytest.skip("OpenAI client tests not supported with library client")
|
||||
|
||||
prompt_tokens = []
|
||||
completion_tokens = []
|
||||
total_tokens = []
|
||||
|
||||
# Create OpenAI completions to generate metrics using the proper OpenAI client
|
||||
for i in range(5):
|
||||
response = openai_client.chat.completions.create(
|
||||
model=text_model_id,
|
||||
messages=[{"role": "user", "content": f"OpenAI test {i}"}],
|
||||
stream=False,
|
||||
)
|
||||
prompt_tokens.append(response.usage.prompt_tokens)
|
||||
completion_tokens.append(response.usage.completion_tokens)
|
||||
total_tokens.append(response.usage.total_tokens)
|
||||
|
||||
# Wait for metrics to be logged
|
||||
start_time = time.time()
|
||||
while time.time() - start_time < 30:
|
||||
try:
|
||||
# Try to query metrics to see if they're available
|
||||
metrics_response = client_with_models.telemetry.query_metrics(
|
||||
metric_name="completion_tokens",
|
||||
start_time=int((datetime.now(UTC) - timedelta(minutes=5)).timestamp()),
|
||||
)
|
||||
if len(metrics_response[0].values) > 0:
|
||||
break
|
||||
except Exception:
|
||||
pass
|
||||
time.sleep(0.1)
|
||||
|
||||
# Return the token lists for use in tests
|
||||
return {"prompt_tokens": prompt_tokens, "completion_tokens": completion_tokens, "total_tokens": total_tokens}
|
||||
|
||||
|
||||
@pytest.mark.skip(reason="Skipping this test until client is regenerated")
|
||||
def test_query_metrics_prompt_tokens(client_with_models, text_model_id, setup_telemetry_metrics_data):
|
||||
"""Test that prompt_tokens metrics are queryable."""
|
||||
start_time = int((datetime.now(UTC) - timedelta(minutes=10)).timestamp())
|
||||
|
||||
response = client_with_models.telemetry.query_metrics(
|
||||
metric_name="prompt_tokens",
|
||||
start_time=start_time,
|
||||
)
|
||||
|
||||
assert isinstance(response, list)
|
||||
|
||||
assert isinstance(response[0].values, list), "Should return a list of metric series"
|
||||
|
||||
assert response[0].metric == "prompt_tokens"
|
||||
|
||||
# Use the actual values from setup instead of hardcoded values
|
||||
expected_values = setup_telemetry_metrics_data["prompt_tokens"]
|
||||
assert response[0].values[-1].value in expected_values, (
|
||||
f"Expected one of {expected_values}, got {response[0].values[-1].value}"
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.skip(reason="Skipping this test until client is regenerated")
|
||||
def test_query_metrics_completion_tokens(client_with_models, text_model_id, setup_telemetry_metrics_data):
|
||||
"""Test that completion_tokens metrics are queryable."""
|
||||
start_time = int((datetime.now(UTC) - timedelta(minutes=10)).timestamp())
|
||||
|
||||
response = client_with_models.telemetry.query_metrics(
|
||||
metric_name="completion_tokens",
|
||||
start_time=start_time,
|
||||
)
|
||||
|
||||
assert isinstance(response, list)
|
||||
|
||||
assert isinstance(response[0].values, list), "Should return a list of metric series"
|
||||
|
||||
assert response[0].metric == "completion_tokens"
|
||||
|
||||
# Use the actual values from setup instead of hardcoded values
|
||||
expected_values = setup_telemetry_metrics_data["completion_tokens"]
|
||||
assert response[0].values[-1].value in expected_values, (
|
||||
f"Expected one of {expected_values}, got {response[0].values[-1].value}"
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.skip(reason="Skipping this test until client is regenerated")
|
||||
def test_query_metrics_total_tokens(client_with_models, text_model_id, setup_telemetry_metrics_data):
|
||||
"""Test that total_tokens metrics are queryable."""
|
||||
start_time = int((datetime.now(UTC) - timedelta(minutes=10)).timestamp())
|
||||
|
||||
response = client_with_models.telemetry.query_metrics(
|
||||
metric_name="total_tokens",
|
||||
start_time=start_time,
|
||||
)
|
||||
|
||||
assert isinstance(response, list)
|
||||
|
||||
assert isinstance(response[0].values, list), "Should return a list of metric series"
|
||||
|
||||
assert response[0].metric == "total_tokens"
|
||||
|
||||
# Use the actual values from setup instead of hardcoded values
|
||||
expected_values = setup_telemetry_metrics_data["total_tokens"]
|
||||
assert response[0].values[-1].value in expected_values, (
|
||||
f"Expected one of {expected_values}, got {response[0].values[-1].value}"
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.skip(reason="Skipping this test until client is regenerated")
|
||||
def test_query_metrics_with_time_range(llama_stack_client, text_model_id):
|
||||
"""Test that metrics are queryable with time range."""
|
||||
end_time = int(datetime.now(UTC).timestamp())
|
||||
start_time = end_time - 600 # 10 minutes ago
|
||||
|
||||
response = llama_stack_client.telemetry.query_metrics(
|
||||
metric_name="prompt_tokens",
|
||||
start_time=start_time,
|
||||
end_time=end_time,
|
||||
)
|
||||
|
||||
assert isinstance(response, list)
|
||||
|
||||
assert isinstance(response[0].values, list), "Should return a list of metric series"
|
||||
|
||||
assert response[0].metric == "prompt_tokens"
|
||||
|
||||
|
||||
@pytest.mark.skip(reason="Skipping this test until client is regenerated")
|
||||
def test_query_metrics_with_label_matchers(llama_stack_client, text_model_id):
|
||||
"""Test that metrics are queryable with label matchers."""
|
||||
start_time = int((datetime.now(UTC) - timedelta(minutes=10)).timestamp())
|
||||
|
||||
response = llama_stack_client.telemetry.query_metrics(
|
||||
metric_name="prompt_tokens",
|
||||
start_time=start_time,
|
||||
label_matchers=[{"name": "model_id", "value": text_model_id, "operator": "="}],
|
||||
)
|
||||
|
||||
assert isinstance(response[0].values, list), "Should return a list of metric series"
|
||||
|
||||
|
||||
@pytest.mark.skip(reason="Skipping this test until client is regenerated")
|
||||
def test_query_metrics_nonexistent_metric(llama_stack_client):
|
||||
"""Test that querying a nonexistent metric returns empty data."""
|
||||
start_time = int((datetime.now(UTC) - timedelta(minutes=10)).timestamp())
|
||||
|
||||
response = llama_stack_client.telemetry.query_metrics(
|
||||
metric_name="nonexistent_metric",
|
||||
start_time=start_time,
|
||||
)
|
||||
|
||||
assert isinstance(response, list), "Should return an empty list for nonexistent metric"
|
||||
assert len(response) == 0
|
||||
|
||||
|
||||
@pytest.mark.skip(reason="Skipping this test until client is regenerated")
|
||||
def test_query_metrics_with_granularity(llama_stack_client, text_model_id):
|
||||
"""Test that metrics are queryable with different granularity levels."""
|
||||
start_time = int((datetime.now(UTC) - timedelta(minutes=10)).timestamp())
|
||||
|
||||
# Test hourly granularity
|
||||
hourly_response = llama_stack_client.telemetry.query_metrics(
|
||||
metric_name="total_tokens",
|
||||
start_time=start_time,
|
||||
granularity="1h",
|
||||
)
|
||||
|
||||
# Test daily granularity
|
||||
daily_response = llama_stack_client.telemetry.query_metrics(
|
||||
metric_name="total_tokens",
|
||||
start_time=start_time,
|
||||
granularity="1d",
|
||||
)
|
||||
|
||||
# Test no granularity (raw data points)
|
||||
raw_response = llama_stack_client.telemetry.query_metrics(
|
||||
metric_name="total_tokens",
|
||||
start_time=start_time,
|
||||
granularity=None,
|
||||
)
|
||||
|
||||
# All should return valid data
|
||||
assert isinstance(hourly_response[0].values, list), "Hourly granularity should return data"
|
||||
assert isinstance(daily_response[0].values, list), "Daily granularity should return data"
|
||||
assert isinstance(raw_response[0].values, list), "No granularity should return data"
|
||||
|
||||
# Verify that different granularities produce different aggregation levels
|
||||
# (The exact number depends on data distribution, but they should be queryable)
|
||||
assert len(hourly_response[0].values) >= 0, "Hourly granularity should be queryable"
|
||||
assert len(daily_response[0].values) >= 0, "Daily granularity should be queryable"
|
||||
assert len(raw_response[0].values) >= 0, "No granularity should be queryable"
|
Loading…
Add table
Add a link
Reference in a new issue