Merge remote-tracking branch 'origin/main' into stores

This commit is contained in:
Ashwin Bharambe 2025-10-20 10:49:06 -07:00
commit 490b212576
89 changed files with 19353 additions and 8323 deletions

View file

@ -70,10 +70,15 @@ class BatchHelper:
):
"""Wait for a batch to reach a terminal status.
Uses exponential backoff polling strategy for efficient waiting:
- Starts with short intervals (0.1s) for fast batches (e.g., replay mode)
- Doubles interval each iteration up to a maximum
- Adapts automatically to both fast and slow batch processing
Args:
batch_id: The batch ID to monitor
max_wait_time: Maximum time to wait in seconds (default: 60 seconds)
sleep_interval: Time to sleep between checks in seconds (default: 1/10th of max_wait_time, min 1s, max 15s)
sleep_interval: If provided, uses fixed interval instead of exponential backoff
expected_statuses: Set of expected terminal statuses (default: {"completed"})
timeout_action: Action on timeout - "fail" (pytest.fail) or "skip" (pytest.skip)
@ -84,10 +89,6 @@ class BatchHelper:
pytest.Failed: If batch reaches an unexpected status or timeout_action is "fail"
pytest.Skipped: If timeout_action is "skip" on timeout or unexpected status
"""
if sleep_interval is None:
# Default to 1/10th of max_wait_time, with min 1s and max 15s
sleep_interval = max(1, min(15, max_wait_time // 10))
if expected_statuses is None:
expected_statuses = {"completed"}
@ -95,6 +96,15 @@ class BatchHelper:
unexpected_statuses = terminal_statuses - expected_statuses
start_time = time.time()
# Use exponential backoff if no explicit sleep_interval provided
if sleep_interval is None:
current_interval = 0.1 # Start with 100ms
max_interval = 10.0 # Cap at 10 seconds
else:
current_interval = sleep_interval
max_interval = sleep_interval
while time.time() - start_time < max_wait_time:
current_batch = self.client.batches.retrieve(batch_id)
@ -107,7 +117,11 @@ class BatchHelper:
else:
pytest.fail(error_msg)
time.sleep(sleep_interval)
time.sleep(current_interval)
# Exponential backoff: double the interval each time, up to max
if sleep_interval is None:
current_interval = min(current_interval * 2, max_interval)
timeout_msg = f"Batch did not reach expected status {expected_statuses} within {max_wait_time} seconds"
if timeout_action == "skip":

View file

@ -0,0 +1,506 @@
{
"test_id": null,
"request": {
"method": "POST",
"url": "http://0.0.0.0:11434/v1/v1/chat/completions",
"headers": {},
"body": {
"model": "llama3.2:3b-instruct-fp16",
"messages": [
{
"role": "system",
"content": "You are a helpful assistant"
},
{
"role": "user",
"content": "What is 2 + 2?"
},
{
"role": "assistant",
"content": "The answer to the equation 2 + 2 is 4."
},
{
"role": "user",
"content": "Tell me a short joke"
}
],
"max_tokens": 0,
"stream": true
},
"endpoint": "/v1/chat/completions",
"model": "llama3.2:3b-instruct-fp16"
},
"response": {
"body": [
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-ab1a32474062",
"choices": [
{
"delta": {
"content": "Why",
"function_call": null,
"refusal": null,
"role": "assistant",
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "llama3.2:3b-instruct-fp16",
"object": "chat.completion.chunk",
"service_tier": null,
"system_fingerprint": "fp_ollama",
"usage": null
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-ab1a32474062",
"choices": [
{
"delta": {
"content": " did",
"function_call": null,
"refusal": null,
"role": "assistant",
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "llama3.2:3b-instruct-fp16",
"object": "chat.completion.chunk",
"service_tier": null,
"system_fingerprint": "fp_ollama",
"usage": null
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-ab1a32474062",
"choices": [
{
"delta": {
"content": " the",
"function_call": null,
"refusal": null,
"role": "assistant",
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "llama3.2:3b-instruct-fp16",
"object": "chat.completion.chunk",
"service_tier": null,
"system_fingerprint": "fp_ollama",
"usage": null
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-ab1a32474062",
"choices": [
{
"delta": {
"content": " scare",
"function_call": null,
"refusal": null,
"role": "assistant",
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "llama3.2:3b-instruct-fp16",
"object": "chat.completion.chunk",
"service_tier": null,
"system_fingerprint": "fp_ollama",
"usage": null
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-ab1a32474062",
"choices": [
{
"delta": {
"content": "crow",
"function_call": null,
"refusal": null,
"role": "assistant",
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "llama3.2:3b-instruct-fp16",
"object": "chat.completion.chunk",
"service_tier": null,
"system_fingerprint": "fp_ollama",
"usage": null
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-ab1a32474062",
"choices": [
{
"delta": {
"content": " win",
"function_call": null,
"refusal": null,
"role": "assistant",
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "llama3.2:3b-instruct-fp16",
"object": "chat.completion.chunk",
"service_tier": null,
"system_fingerprint": "fp_ollama",
"usage": null
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-ab1a32474062",
"choices": [
{
"delta": {
"content": " an",
"function_call": null,
"refusal": null,
"role": "assistant",
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "llama3.2:3b-instruct-fp16",
"object": "chat.completion.chunk",
"service_tier": null,
"system_fingerprint": "fp_ollama",
"usage": null
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-ab1a32474062",
"choices": [
{
"delta": {
"content": " award",
"function_call": null,
"refusal": null,
"role": "assistant",
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "llama3.2:3b-instruct-fp16",
"object": "chat.completion.chunk",
"service_tier": null,
"system_fingerprint": "fp_ollama",
"usage": null
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-ab1a32474062",
"choices": [
{
"delta": {
"content": "?\n\n",
"function_call": null,
"refusal": null,
"role": "assistant",
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "llama3.2:3b-instruct-fp16",
"object": "chat.completion.chunk",
"service_tier": null,
"system_fingerprint": "fp_ollama",
"usage": null
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-ab1a32474062",
"choices": [
{
"delta": {
"content": "Because",
"function_call": null,
"refusal": null,
"role": "assistant",
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "llama3.2:3b-instruct-fp16",
"object": "chat.completion.chunk",
"service_tier": null,
"system_fingerprint": "fp_ollama",
"usage": null
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-ab1a32474062",
"choices": [
{
"delta": {
"content": " he",
"function_call": null,
"refusal": null,
"role": "assistant",
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "llama3.2:3b-instruct-fp16",
"object": "chat.completion.chunk",
"service_tier": null,
"system_fingerprint": "fp_ollama",
"usage": null
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-ab1a32474062",
"choices": [
{
"delta": {
"content": " was",
"function_call": null,
"refusal": null,
"role": "assistant",
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "llama3.2:3b-instruct-fp16",
"object": "chat.completion.chunk",
"service_tier": null,
"system_fingerprint": "fp_ollama",
"usage": null
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-ab1a32474062",
"choices": [
{
"delta": {
"content": " outstanding",
"function_call": null,
"refusal": null,
"role": "assistant",
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "llama3.2:3b-instruct-fp16",
"object": "chat.completion.chunk",
"service_tier": null,
"system_fingerprint": "fp_ollama",
"usage": null
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-ab1a32474062",
"choices": [
{
"delta": {
"content": " in",
"function_call": null,
"refusal": null,
"role": "assistant",
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "llama3.2:3b-instruct-fp16",
"object": "chat.completion.chunk",
"service_tier": null,
"system_fingerprint": "fp_ollama",
"usage": null
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-ab1a32474062",
"choices": [
{
"delta": {
"content": " his",
"function_call": null,
"refusal": null,
"role": "assistant",
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "llama3.2:3b-instruct-fp16",
"object": "chat.completion.chunk",
"service_tier": null,
"system_fingerprint": "fp_ollama",
"usage": null
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-ab1a32474062",
"choices": [
{
"delta": {
"content": " field",
"function_call": null,
"refusal": null,
"role": "assistant",
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "llama3.2:3b-instruct-fp16",
"object": "chat.completion.chunk",
"service_tier": null,
"system_fingerprint": "fp_ollama",
"usage": null
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-ab1a32474062",
"choices": [
{
"delta": {
"content": "!",
"function_call": null,
"refusal": null,
"role": "assistant",
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "llama3.2:3b-instruct-fp16",
"object": "chat.completion.chunk",
"service_tier": null,
"system_fingerprint": "fp_ollama",
"usage": null
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-ab1a32474062",
"choices": [
{
"delta": {
"content": "",
"function_call": null,
"refusal": null,
"role": "assistant",
"tool_calls": null
},
"finish_reason": "stop",
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "llama3.2:3b-instruct-fp16",
"object": "chat.completion.chunk",
"service_tier": null,
"system_fingerprint": "fp_ollama",
"usage": null
}
}
],
"is_streaming": true
}
}

View file

@ -0,0 +1,88 @@
{
"test_id": null,
"request": {
"method": "POST",
"url": "http://0.0.0.0:11434/v1/v1/models",
"headers": {},
"body": {},
"endpoint": "/v1/models",
"model": ""
},
"response": {
"body": [
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "llama3.2:3b-instruct-fp16",
"created": 1760453641,
"object": "model",
"owned_by": "library"
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "qwen3:4b",
"created": 1757615302,
"object": "model",
"owned_by": "library"
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "gpt-oss:latest",
"created": 1756395223,
"object": "model",
"owned_by": "library"
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "nomic-embed-text:latest",
"created": 1756318548,
"object": "model",
"owned_by": "library"
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "llama3.2:3b",
"created": 1755191039,
"object": "model",
"owned_by": "library"
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "all-minilm:l6-v2",
"created": 1753968177,
"object": "model",
"owned_by": "library"
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "llama3.2:1b",
"created": 1746124735,
"object": "model",
"owned_by": "library"
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "llama3.2:latest",
"created": 1746044170,
"object": "model",
"owned_by": "library"
}
}
],
"is_streaming": false
}
}

View file

@ -42,7 +42,9 @@ def pytest_sessionstart(session):
# Set test stack config type for api_recorder test isolation
stack_config = session.config.getoption("--stack-config", default=None)
if stack_config and (stack_config.startswith("server:") or stack_config.startswith("http")):
if stack_config and (
stack_config.startswith("server:") or stack_config.startswith("docker:") or stack_config.startswith("http")
):
os.environ["LLAMA_STACK_TEST_STACK_CONFIG_TYPE"] = "server"
logger.info(f"Test stack config type: server (stack_config={stack_config})")
else:
@ -139,7 +141,9 @@ def pytest_addoption(parser):
a 'pointer' to the stack. this can be either be:
(a) a template name like `starter`, or
(b) a path to a run.yaml file, or
(c) an adhoc config spec, e.g. `inference=fireworks,safety=llama-guard,agents=meta-reference`
(c) an adhoc config spec, e.g. `inference=fireworks,safety=llama-guard,agents=meta-reference`, or
(d) a server config like `server:ci-tests`, or
(e) a docker config like `docker:ci-tests` (builds and runs container)
"""
),
)

View file

@ -0,0 +1,95 @@
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.
"""Telemetry test configuration using OpenTelemetry SDK exporters.
This conftest provides in-memory telemetry collection for library_client mode only.
Tests using these fixtures should skip in server mode since the in-memory collector
cannot access spans from a separate server process.
"""
from typing import Any
import opentelemetry.metrics as otel_metrics
import opentelemetry.trace as otel_trace
import pytest
from opentelemetry import metrics, trace
from opentelemetry.sdk.metrics import MeterProvider
from opentelemetry.sdk.metrics.export import InMemoryMetricReader
from opentelemetry.sdk.trace import ReadableSpan, TracerProvider
from opentelemetry.sdk.trace.export import SimpleSpanProcessor
from opentelemetry.sdk.trace.export.in_memory_span_exporter import InMemorySpanExporter
import llama_stack.providers.inline.telemetry.meta_reference.telemetry as telemetry_module
from llama_stack.testing.api_recorder import patch_httpx_for_test_id
from tests.integration.fixtures.common import instantiate_llama_stack_client
class TestCollector:
def __init__(self, span_exp, metric_read):
assert span_exp and metric_read
self.span_exporter = span_exp
self.metric_reader = metric_read
def get_spans(self) -> tuple[ReadableSpan, ...]:
return self.span_exporter.get_finished_spans()
def get_metrics(self) -> Any | None:
metrics = self.metric_reader.get_metrics_data()
if metrics and metrics.resource_metrics:
return metrics.resource_metrics[0].scope_metrics[0].metrics
return None
def clear(self) -> None:
self.span_exporter.clear()
self.metric_reader.get_metrics_data()
@pytest.fixture(scope="session")
def _telemetry_providers():
"""Set up in-memory OTEL providers before llama_stack_client initializes."""
# Reset set-once flags to allow re-initialization
if hasattr(otel_trace, "_TRACER_PROVIDER_SET_ONCE"):
otel_trace._TRACER_PROVIDER_SET_ONCE._done = False # type: ignore
if hasattr(otel_metrics, "_METER_PROVIDER_SET_ONCE"):
otel_metrics._METER_PROVIDER_SET_ONCE._done = False # type: ignore
# Create in-memory exporters/readers
span_exporter = InMemorySpanExporter()
tracer_provider = TracerProvider()
tracer_provider.add_span_processor(SimpleSpanProcessor(span_exporter))
trace.set_tracer_provider(tracer_provider)
metric_reader = InMemoryMetricReader()
meter_provider = MeterProvider(metric_readers=[metric_reader])
metrics.set_meter_provider(meter_provider)
# Set module-level provider so TelemetryAdapter uses our in-memory providers
telemetry_module._TRACER_PROVIDER = tracer_provider
yield (span_exporter, metric_reader, tracer_provider, meter_provider)
telemetry_module._TRACER_PROVIDER = None
tracer_provider.shutdown()
meter_provider.shutdown()
@pytest.fixture(scope="session")
def llama_stack_client(_telemetry_providers, request):
"""Override llama_stack_client to ensure in-memory telemetry providers are used."""
patch_httpx_for_test_id()
client = instantiate_llama_stack_client(request.session)
return client
@pytest.fixture
def mock_otlp_collector(_telemetry_providers):
"""Provides access to telemetry data and clears between tests."""
span_exporter, metric_reader, _, _ = _telemetry_providers
collector = TestCollector(span_exporter, metric_reader)
yield collector
collector.clear()

View file

@ -0,0 +1,57 @@
{
"test_id": "tests/integration/telemetry/test_openai_telemetry.py::test_openai_completion_creates_telemetry[txt=ollama/llama3.2:3b-instruct-fp16]",
"request": {
"method": "POST",
"url": "http://0.0.0.0:11434/v1/v1/chat/completions",
"headers": {},
"body": {
"model": "llama3.2:3b-instruct-fp16",
"messages": [
{
"role": "user",
"content": "Test OpenAI telemetry creation"
}
],
"stream": false
},
"endpoint": "/v1/chat/completions",
"model": "llama3.2:3b-instruct-fp16"
},
"response": {
"body": {
"__type__": "openai.types.chat.chat_completion.ChatCompletion",
"__data__": {
"id": "rec-0de60cd6a6ec",
"choices": [
{
"finish_reason": "stop",
"index": 0,
"logprobs": null,
"message": {
"content": "I'm happy to help you with setting up and testing OpenAI's telemetry creation.\n\nOpenAI provides a feature called \"Telemetry\" which allows developers to collect data about their users' interactions with the model. To test this feature, we need to create a simple application that uses the OpenAI API and sends telemetry data to their servers.\n\nHere's an example code in Python that demonstrates how to create a simple telemetry creator:\n\n```python\nimport os\nfrom openai.api import API\n\n# Initialize the OpenAI API client\napi = API(os.environ['OPENAI_API_KEY'])\n\ndef create_user():\n # Create a new user entity\n user_entity = {\n 'id': 'user-123',\n 'name': 'John Doe',\n 'email': 'john.doe@example.com'\n }\n \n # Send the user creation request to OpenAI\n response = api.users.create(user_entity)\n print(f\"User created: {response}\")\n\ndef create_transaction():\n # Create a new transaction entity\n transaction_entity = {\n 'id': 'tran-123',\n 'user_id': 'user-123',\n 'transaction_type': 'query'\n }\n \n # Send the transaction creation request to OpenAI\n response = api.transactions.create(transaction_entity)\n print(f\"Transaction created: {response}\")\n\ndef send_telemetry_data():\n # Create a new telemetry event entity\n telemetry_event_entity = {\n 'id': 'telem-123',\n 'transaction_id': 'tran-123',\n 'data': '{ \"event\": \"test\", \"user_id\": 1 }'\n }\n \n # Send the telemetry data to OpenAI\n response = api.telemetry.create(telemetry_event_entity)\n print(f\"Telemetry event sent: {response}\")\n\n# Test the telemetry creation\ncreate_user()\ncreate_transaction()\nsend_telemetry_data()\n```\n\nMake sure you replace `OPENAI_API_KEY` with your actual API key. Also, ensure that you have the OpenAI API client library installed by running `pip install openai`.\n\nOnce you've created the test code, run it and observe the behavior of the telemetry creation process.\n\nPlease let me know if you need further modifications or assistance!",
"refusal": null,
"role": "assistant",
"annotations": null,
"audio": null,
"function_call": null,
"tool_calls": null
}
}
],
"created": 0,
"model": "llama3.2:3b-instruct-fp16",
"object": "chat.completion",
"service_tier": null,
"system_fingerprint": "fp_ollama",
"usage": {
"completion_tokens": 460,
"prompt_tokens": 30,
"total_tokens": 490,
"completion_tokens_details": null,
"prompt_tokens_details": null
}
}
},
"is_streaming": false
}
}

View file

@ -0,0 +1,59 @@
{
"test_id": "tests/integration/telemetry/test_completions.py::test_telemetry_format_completeness[txt=ollama/llama3.2:3b-instruct-fp16]",
"request": {
"method": "POST",
"url": "http://0.0.0.0:11434/v1/v1/chat/completions",
"headers": {},
"body": {
"model": "llama3.2:3b-instruct-fp16",
"messages": [
{
"role": "user",
"content": "Test trace openai with temperature 0.7"
}
],
"max_tokens": 100,
"stream": false,
"temperature": 0.7
},
"endpoint": "/v1/chat/completions",
"model": "llama3.2:3b-instruct-fp16"
},
"response": {
"body": {
"__type__": "openai.types.chat.chat_completion.ChatCompletion",
"__data__": {
"id": "rec-1fcfd86d8111",
"choices": [
{
"finish_reason": "length",
"index": 0,
"logprobs": null,
"message": {
"content": "import torch\nfrom transformers import AutoModelForCausalLM, AutoTokenizer\n\n# Load the pre-trained model and tokenizer\nmodel_name = \"CompVis/transformers-base-uncased\"\nmodel = AutoModelForCausalLM.from_pretrained(model_name)\ntokenizer = AutoTokenizer.from_pretrained(model_name)\n\n# Set the temperature to 0.7\ntemperature = 0.7\n\n# Define a function to generate text\ndef generate_text(prompt, max_length=100):\n input",
"refusal": null,
"role": "assistant",
"annotations": null,
"audio": null,
"function_call": null,
"tool_calls": null
}
}
],
"created": 0,
"model": "llama3.2:3b-instruct-fp16",
"object": "chat.completion",
"service_tier": null,
"system_fingerprint": "fp_ollama",
"usage": {
"completion_tokens": 100,
"prompt_tokens": 35,
"total_tokens": 135,
"completion_tokens_details": null,
"prompt_tokens_details": null
}
}
},
"is_streaming": false
}
}

View file

@ -0,0 +1,59 @@
{
"test_id": "tests/integration/telemetry/test_completions.py::test_telemetry_format_completeness[txt=llama3.2:3b-instruct-fp16]",
"request": {
"method": "POST",
"url": "http://localhost:11434/v1/v1/chat/completions",
"headers": {},
"body": {
"model": "llama3.2:3b-instruct-fp16",
"messages": [
{
"role": "user",
"content": "Test trace openai with temperature 0.7"
}
],
"max_tokens": 100,
"stream": false,
"temperature": 0.7
},
"endpoint": "/v1/chat/completions",
"model": "llama3.2:3b-instruct-fp16"
},
"response": {
"body": {
"__type__": "openai.types.chat.chat_completion.ChatCompletion",
"__data__": {
"id": "rec-dba5042d6691",
"choices": [
{
"finish_reason": "length",
"index": 0,
"logprobs": null,
"message": {
"content": "To test the \"trace\" functionality of OpenAI's GPT-4 model at a temperature of 0.7, you can follow these steps:\n\n1. First, make sure you have an account with OpenAI and have been granted access to their API.\n\n2. You will need to install the `transformers` library, which is the official library for working with Transformers models like GPT-4:\n\n ```bash\npip install transformers\n```\n\n3. Next, import the necessary",
"refusal": null,
"role": "assistant",
"annotations": null,
"audio": null,
"function_call": null,
"tool_calls": null
}
}
],
"created": 0,
"model": "llama3.2:3b-instruct-fp16",
"object": "chat.completion",
"service_tier": null,
"system_fingerprint": "fp_ollama",
"usage": {
"completion_tokens": 100,
"prompt_tokens": 35,
"total_tokens": 135,
"completion_tokens_details": null,
"prompt_tokens_details": null
}
}
},
"is_streaming": false
}
}

View file

@ -0,0 +1,112 @@
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.
"""Telemetry tests verifying @trace_protocol decorator format using in-memory exporter."""
import json
import os
import pytest
pytestmark = pytest.mark.skipif(
os.environ.get("LLAMA_STACK_TEST_STACK_CONFIG_TYPE") == "server",
reason="In-memory telemetry tests only work in library_client mode (server mode runs in separate process)",
)
def test_streaming_chunk_count(mock_otlp_collector, llama_stack_client, text_model_id):
"""Verify streaming adds chunk_count and __type__=async_generator."""
stream = llama_stack_client.chat.completions.create(
model=text_model_id,
messages=[{"role": "user", "content": "Test trace openai 1"}],
stream=True,
)
chunks = list(stream)
assert len(chunks) > 0
spans = mock_otlp_collector.get_spans()
assert len(spans) > 0
chunk_count = None
for span in spans:
if span.attributes.get("__type__") == "async_generator":
chunk_count = span.attributes.get("chunk_count")
if chunk_count:
chunk_count = int(chunk_count)
break
assert chunk_count is not None
assert chunk_count == len(chunks)
def test_telemetry_format_completeness(mock_otlp_collector, llama_stack_client, text_model_id):
"""Comprehensive validation of telemetry data format including spans and metrics."""
response = llama_stack_client.chat.completions.create(
model=text_model_id,
messages=[{"role": "user", "content": "Test trace openai with temperature 0.7"}],
temperature=0.7,
max_tokens=100,
stream=False,
)
# Handle both dict and Pydantic model for usage
# This occurs due to the replay system returning a dict for usage, but the client returning a Pydantic model
# TODO: Fix this by making the replay system return a Pydantic model for usage
usage = response.usage if isinstance(response.usage, dict) else response.usage.model_dump()
assert usage.get("prompt_tokens") and usage["prompt_tokens"] > 0
assert usage.get("completion_tokens") and usage["completion_tokens"] > 0
assert usage.get("total_tokens") and usage["total_tokens"] > 0
# Verify spans
spans = mock_otlp_collector.get_spans()
assert len(spans) == 5
# we only need this captured one time
logged_model_id = None
for span in spans:
attrs = span.attributes
assert attrs is not None
# Root span is created manually by tracing middleware, not by @trace_protocol decorator
is_root_span = attrs.get("__root__") is True
if is_root_span:
# Root spans have different attributes
assert attrs.get("__location__") in ["library_client", "server"]
else:
# Non-root spans are created by @trace_protocol decorator
assert attrs.get("__autotraced__")
assert attrs.get("__class__") and attrs.get("__method__")
assert attrs.get("__type__") in ["async", "sync", "async_generator"]
args = json.loads(attrs["__args__"])
if "model_id" in args:
logged_model_id = args["model_id"]
assert logged_model_id is not None
assert logged_model_id == text_model_id
# TODO: re-enable this once metrics get fixed
"""
# Verify token usage metrics in response
metrics = mock_otlp_collector.get_metrics()
assert metrics
for metric in metrics:
assert metric.name in ["completion_tokens", "total_tokens", "prompt_tokens"]
assert metric.unit == "tokens"
assert metric.data.data_points and len(metric.data.data_points) == 1
match metric.name:
case "completion_tokens":
assert metric.data.data_points[0].value == usage["completion_tokens"]
case "total_tokens":
assert metric.data.data_points[0].value == usage["total_tokens"]
case "prompt_tokens":
assert metric.data.data_points[0].value == usage["prompt_tokens"
"""

View file

@ -0,0 +1,50 @@
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.
import argparse
from io import StringIO
from unittest.mock import patch
from llama_stack.cli.stack._list_deps import (
run_stack_list_deps_command,
)
def test_stack_list_deps_basic():
args = argparse.Namespace(
config=None,
env_name="test-env",
providers="inference=remote::ollama",
format="deps-only",
)
with patch("sys.stdout", new_callable=StringIO) as mock_stdout:
run_stack_list_deps_command(args)
output = mock_stdout.getvalue()
# deps-only format should NOT include "uv pip install" or "Dependencies for"
assert "uv pip install" not in output
assert "Dependencies for" not in output
# Check that expected dependencies are present
assert "ollama" in output
assert "aiohttp" in output
assert "fastapi" in output
def test_stack_list_deps_with_distro_uv():
args = argparse.Namespace(
config="starter",
env_name=None,
providers=None,
format="uv",
)
with patch("sys.stdout", new_callable=StringIO) as mock_stdout:
run_stack_list_deps_command(args)
output = mock_stdout.getvalue()
assert "uv pip install" in output