diff --git a/llama_stack/apis/conversations/conversations.py b/llama_stack/apis/conversations/conversations.py index 58ae9c35a..4add01879 100644 --- a/llama_stack/apis/conversations/conversations.py +++ b/llama_stack/apis/conversations/conversations.py @@ -20,7 +20,6 @@ from llama_stack.apis.agents.openai_responses import ( OpenAIResponseOutputMessageWebSearchToolCall, ) from llama_stack.apis.version import LLAMA_STACK_API_V1 -from llama_stack.providers.utils.telemetry.trace_protocol import trace_protocol from llama_stack.schema_utils import json_schema_type, register_schema, webmethod Metadata = dict[str, str] @@ -163,7 +162,6 @@ class ConversationItemDeletedResource(BaseModel): @runtime_checkable -@trace_protocol class Conversations(Protocol): """Protocol for conversation management operations.""" diff --git a/llama_stack/apis/files/files.py b/llama_stack/apis/files/files.py index f1d3764db..877e24819 100644 --- a/llama_stack/apis/files/files.py +++ b/llama_stack/apis/files/files.py @@ -12,7 +12,6 @@ from pydantic import BaseModel, Field from llama_stack.apis.common.responses import Order from llama_stack.apis.version import LLAMA_STACK_API_V1 -from llama_stack.providers.utils.telemetry.trace_protocol import trace_protocol from llama_stack.schema_utils import json_schema_type, webmethod @@ -102,7 +101,6 @@ class OpenAIFileDeleteResponse(BaseModel): @runtime_checkable -@trace_protocol class Files(Protocol): """Files diff --git a/llama_stack/apis/inference/inference.py b/llama_stack/apis/inference/inference.py index 62a988ea6..607eb1989 100644 --- a/llama_stack/apis/inference/inference.py +++ b/llama_stack/apis/inference/inference.py @@ -29,7 +29,6 @@ from llama_stack.models.llama.datatypes import ( ToolDefinition, ToolPromptFormat, ) -from llama_stack.providers.utils.telemetry.trace_protocol import trace_protocol from llama_stack.schema_utils import json_schema_type, register_schema, webmethod register_schema(ToolCall) @@ -996,7 +995,6 @@ class ListOpenAIChatCompletionResponse(BaseModel): @runtime_checkable -@trace_protocol class InferenceProvider(Protocol): """ This protocol defines the interface that should be implemented by all inference providers. diff --git a/llama_stack/apis/models/models.py b/llama_stack/apis/models/models.py index 10949cb95..b38044087 100644 --- a/llama_stack/apis/models/models.py +++ b/llama_stack/apis/models/models.py @@ -11,7 +11,6 @@ from pydantic import BaseModel, ConfigDict, Field, field_validator from llama_stack.apis.resource import Resource, ResourceType from llama_stack.apis.version import LLAMA_STACK_API_V1 -from llama_stack.providers.utils.telemetry.trace_protocol import trace_protocol from llama_stack.schema_utils import json_schema_type, webmethod @@ -101,7 +100,6 @@ class OpenAIListModelsResponse(BaseModel): @runtime_checkable -@trace_protocol class Models(Protocol): @webmethod(route="/models", method="GET", level=LLAMA_STACK_API_V1) async def list_models(self) -> ListModelsResponse: diff --git a/llama_stack/apis/prompts/prompts.py b/llama_stack/apis/prompts/prompts.py index b39c363c7..313e5d65e 100644 --- a/llama_stack/apis/prompts/prompts.py +++ b/llama_stack/apis/prompts/prompts.py @@ -11,7 +11,7 @@ from typing import Protocol, runtime_checkable from pydantic import BaseModel, Field, field_validator, model_validator from llama_stack.apis.version import LLAMA_STACK_API_V1 -from llama_stack.providers.utils.telemetry.trace_protocol import trace_protocol + from llama_stack.schema_utils import json_schema_type, webmethod @@ -92,7 +92,6 @@ class ListPromptsResponse(BaseModel): @runtime_checkable -@trace_protocol class Prompts(Protocol): """Prompts diff --git a/llama_stack/apis/safety/safety.py b/llama_stack/apis/safety/safety.py index 2ae74b0a7..6a906ac69 100644 --- a/llama_stack/apis/safety/safety.py +++ b/llama_stack/apis/safety/safety.py @@ -12,7 +12,7 @@ from pydantic import BaseModel, Field from llama_stack.apis.inference import Message from llama_stack.apis.shields import Shield from llama_stack.apis.version import LLAMA_STACK_API_V1 -from llama_stack.providers.utils.telemetry.trace_protocol import trace_protocol + from llama_stack.schema_utils import json_schema_type, webmethod @@ -94,7 +94,6 @@ class ShieldStore(Protocol): @runtime_checkable -@trace_protocol class Safety(Protocol): """Safety diff --git a/llama_stack/apis/shields/shields.py b/llama_stack/apis/shields/shields.py index 5d967cf02..14cd85e65 100644 --- a/llama_stack/apis/shields/shields.py +++ b/llama_stack/apis/shields/shields.py @@ -10,7 +10,7 @@ from pydantic import BaseModel from llama_stack.apis.resource import Resource, ResourceType from llama_stack.apis.version import LLAMA_STACK_API_V1 -from llama_stack.providers.utils.telemetry.trace_protocol import trace_protocol + from llama_stack.schema_utils import json_schema_type, webmethod @@ -48,7 +48,6 @@ class ListShieldsResponse(BaseModel): @runtime_checkable -@trace_protocol class Shields(Protocol): @webmethod(route="/shields", method="GET", level=LLAMA_STACK_API_V1) async def list_shields(self) -> ListShieldsResponse: diff --git a/llama_stack/apis/tools/rag_tool.py b/llama_stack/apis/tools/rag_tool.py index ed7847e23..0844cd4d8 100644 --- a/llama_stack/apis/tools/rag_tool.py +++ b/llama_stack/apis/tools/rag_tool.py @@ -12,7 +12,7 @@ from typing_extensions import runtime_checkable from llama_stack.apis.common.content_types import URL, InterleavedContent from llama_stack.apis.version import LLAMA_STACK_API_V1 -from llama_stack.providers.utils.telemetry.trace_protocol import trace_protocol + from llama_stack.schema_utils import json_schema_type, register_schema, webmethod @@ -184,7 +184,6 @@ class RAGQueryConfig(BaseModel): @runtime_checkable -@trace_protocol class RAGToolRuntime(Protocol): @webmethod(route="/tool-runtime/rag-tool/insert", method="POST", level=LLAMA_STACK_API_V1) async def insert( diff --git a/llama_stack/apis/tools/tools.py b/llama_stack/apis/tools/tools.py index b6a1a2543..af8753dc4 100644 --- a/llama_stack/apis/tools/tools.py +++ b/llama_stack/apis/tools/tools.py @@ -13,7 +13,7 @@ from typing_extensions import runtime_checkable from llama_stack.apis.common.content_types import URL, InterleavedContent from llama_stack.apis.resource import Resource, ResourceType from llama_stack.apis.version import LLAMA_STACK_API_V1 -from llama_stack.providers.utils.telemetry.trace_protocol import trace_protocol + from llama_stack.schema_utils import json_schema_type, webmethod from .rag_tool import RAGToolRuntime @@ -109,7 +109,6 @@ class ListToolDefsResponse(BaseModel): @runtime_checkable -@trace_protocol class ToolGroups(Protocol): @webmethod(route="/toolgroups", method="POST", level=LLAMA_STACK_API_V1) async def register_tool_group( @@ -191,7 +190,6 @@ class SpecialToolGroup(Enum): @runtime_checkable -@trace_protocol class ToolRuntime(Protocol): tool_store: ToolStore | None = None diff --git a/llama_stack/apis/vector_dbs/vector_dbs.py b/llama_stack/apis/vector_dbs/vector_dbs.py index 521d129c6..d93c684f0 100644 --- a/llama_stack/apis/vector_dbs/vector_dbs.py +++ b/llama_stack/apis/vector_dbs/vector_dbs.py @@ -10,7 +10,7 @@ from pydantic import BaseModel from llama_stack.apis.resource import Resource, ResourceType from llama_stack.apis.version import LLAMA_STACK_API_V1 -from llama_stack.providers.utils.telemetry.trace_protocol import trace_protocol + from llama_stack.schema_utils import json_schema_type, webmethod @@ -64,7 +64,6 @@ class ListVectorDBsResponse(BaseModel): @runtime_checkable -@trace_protocol class VectorDBs(Protocol): @webmethod(route="/vector-dbs", method="GET", level=LLAMA_STACK_API_V1) async def list_vector_dbs(self) -> ListVectorDBsResponse: diff --git a/llama_stack/apis/vector_io/vector_io.py b/llama_stack/apis/vector_io/vector_io.py index 238889099..0acf0920a 100644 --- a/llama_stack/apis/vector_io/vector_io.py +++ b/llama_stack/apis/vector_io/vector_io.py @@ -16,7 +16,7 @@ from pydantic import BaseModel, Field from llama_stack.apis.inference import InterleavedContent from llama_stack.apis.vector_dbs import VectorDB from llama_stack.apis.version import LLAMA_STACK_API_V1 -from llama_stack.providers.utils.telemetry.trace_protocol import trace_protocol + from llama_stack.providers.utils.vector_io.vector_utils import generate_chunk_id from llama_stack.schema_utils import json_schema_type, webmethod from llama_stack.strong_typing.schema import register_schema @@ -471,7 +471,6 @@ class VectorDBStore(Protocol): @runtime_checkable -@trace_protocol class VectorIO(Protocol): vector_db_store: VectorDBStore | None = None diff --git a/llama_stack/core/library_client.py b/llama_stack/core/library_client.py index 0d9f9f134..14c10a5de 100644 --- a/llama_stack/core/library_client.py +++ b/llama_stack/core/library_client.py @@ -48,12 +48,7 @@ from llama_stack.core.utils.config import redact_sensitive_fields from llama_stack.core.utils.context import preserve_contexts_async_generator from llama_stack.core.utils.exec import in_notebook from llama_stack.log import get_logger -from llama_stack.providers.utils.telemetry.tracing import ( - CURRENT_TRACE_CONTEXT, - end_trace, - setup_logger, - start_trace, -) + logger = get_logger(name=__name__, category="core") @@ -206,8 +201,6 @@ class AsyncLlamaStackAsLibraryClient(AsyncLlamaStackClient): super().__init__() # when using the library client, we should not log to console since many # of our logs are intended for server-side usage - current_sinks = os.environ.get("TELEMETRY_SINKS", "sqlite").split(",") - os.environ["TELEMETRY_SINKS"] = ",".join(sink for sink in current_sinks if sink != "console") if in_notebook(): import nest_asyncio @@ -293,8 +286,6 @@ class AsyncLlamaStackAsLibraryClient(AsyncLlamaStackClient): raise _e assert self.impls is not None - if Api.telemetry in self.impls: - setup_logger(self.impls[Api.telemetry]) if not os.environ.get("PYTEST_CURRENT_TEST"): console = Console() @@ -384,13 +375,7 @@ class AsyncLlamaStackAsLibraryClient(AsyncLlamaStackClient): body, field_names = self._handle_file_uploads(options, body) body = self._convert_body(path, options.method, body, exclude_params=set(field_names)) - - trace_path = webmethod.descriptive_name or route_path - await start_trace(trace_path, {"__location__": "library_client"}) - try: - result = await matched_func(**body) - finally: - await end_trace() + result = await matched_func(**body) # Handle FastAPI Response objects (e.g., from file content retrieval) if isinstance(result, FastAPIResponse): @@ -448,19 +433,13 @@ class AsyncLlamaStackAsLibraryClient(AsyncLlamaStackClient): body = self._convert_body(path, options.method, body) - trace_path = webmethod.descriptive_name or route_path - await start_trace(trace_path, {"__location__": "library_client"}) - async def gen(): - try: - async for chunk in await func(**body): - data = json.dumps(convert_pydantic_to_json_value(chunk)) - sse_event = f"data: {data}\n\n" - yield sse_event.encode("utf-8") - finally: - await end_trace() + async for chunk in await func(**body): + data = json.dumps(convert_pydantic_to_json_value(chunk)) + sse_event = f"data: {data}\n\n" + yield sse_event.encode("utf-8") - wrapped_gen = preserve_contexts_async_generator(gen(), [CURRENT_TRACE_CONTEXT, PROVIDER_DATA_VAR]) + wrapped_gen = preserve_contexts_async_generator(gen(), [PROVIDER_DATA_VAR]) mock_response = httpx.Response( status_code=httpx.codes.OK, diff --git a/llama_stack/core/server/server.py b/llama_stack/core/server/server.py index 67994e97b..3b3e7644b 100644 --- a/llama_stack/core/server/server.py +++ b/llama_stack/core/server/server.py @@ -58,14 +58,6 @@ from llama_stack.core.utils.config_resolution import Mode, resolve_config_or_dis from llama_stack.core.utils.context import preserve_contexts_async_generator from llama_stack.log import get_logger from llama_stack.providers.datatypes import Api -from llama_stack.providers.inline.telemetry.meta_reference.config import TelemetryConfig -from llama_stack.providers.inline.telemetry.meta_reference.telemetry import ( - TelemetryAdapter, -) -from llama_stack.providers.utils.telemetry.tracing import ( - CURRENT_TRACE_CONTEXT, - setup_logger, -) from .auth import AuthenticationMiddleware from .quota import QuotaMiddleware @@ -237,9 +229,7 @@ def create_dynamic_typed_route(func: Any, method: str, route: str) -> Callable: try: if is_streaming: - gen = preserve_contexts_async_generator( - sse_generator(func(**kwargs)), [CURRENT_TRACE_CONTEXT, PROVIDER_DATA_VAR] - ) + gen = preserve_contexts_async_generator(sse_generator(func(**kwargs)), [PROVIDER_DATA_VAR]) return StreamingResponse(gen, media_type="text/event-stream") else: value = func(**kwargs) @@ -408,11 +398,6 @@ def create_app() -> StackApp: if cors_config: app.add_middleware(CORSMiddleware, **cors_config.model_dump()) - if Api.telemetry in impls: - setup_logger(impls[Api.telemetry]) - else: - setup_logger(TelemetryAdapter(TelemetryConfig(), {})) - # Load external APIs if configured external_apis = load_external_apis(config) all_routes = get_all_api_routes(external_apis) diff --git a/llama_stack/providers/utils/telemetry/trace_protocol.py b/llama_stack/providers/utils/telemetry/trace_protocol.py deleted file mode 100644 index 916f7622a..000000000 --- a/llama_stack/providers/utils/telemetry/trace_protocol.py +++ /dev/null @@ -1,142 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the terms described in the LICENSE file in -# the root directory of this source tree. - -import asyncio -import inspect -import json -from collections.abc import AsyncGenerator, Callable -from functools import wraps -from typing import Any - -from pydantic import BaseModel - -from llama_stack.models.llama.datatypes import Primitive - - -def serialize_value(value: Any) -> Primitive: - return str(_prepare_for_json(value)) - - -def _prepare_for_json(value: Any) -> str: - """Serialize a single value into JSON-compatible format.""" - if value is None: - return "" - elif isinstance(value, str | int | float | bool): - return value - elif hasattr(value, "_name_"): - return value._name_ - elif isinstance(value, BaseModel): - return json.loads(value.model_dump_json()) - elif isinstance(value, list | tuple | set): - return [_prepare_for_json(item) for item in value] - elif isinstance(value, dict): - return {str(k): _prepare_for_json(v) for k, v in value.items()} - else: - try: - json.dumps(value) - return value - except Exception: - return str(value) - - -def trace_protocol[T](cls: type[T]) -> type[T]: - """ - A class decorator that automatically traces all methods in a protocol/base class - and its inheriting classes. - """ - - def trace_method(method: Callable) -> Callable: - is_async = asyncio.iscoroutinefunction(method) - is_async_gen = inspect.isasyncgenfunction(method) - - def create_span_context(self: Any, *args: Any, **kwargs: Any) -> tuple: - class_name = self.__class__.__name__ - method_name = method.__name__ - span_type = "async_generator" if is_async_gen else "async" if is_async else "sync" - sig = inspect.signature(method) - param_names = list(sig.parameters.keys())[1:] # Skip 'self' - combined_args = {} - for i, arg in enumerate(args): - param_name = param_names[i] if i < len(param_names) else f"position_{i + 1}" - combined_args[param_name] = serialize_value(arg) - for k, v in kwargs.items(): - combined_args[str(k)] = serialize_value(v) - - span_attributes = { - "__autotraced__": True, - "__class__": class_name, - "__method__": method_name, - "__type__": span_type, - "__args__": str(combined_args), - } - - return class_name, method_name, span_attributes - - @wraps(method) - async def async_gen_wrapper(self: Any, *args: Any, **kwargs: Any) -> AsyncGenerator: - from llama_stack.providers.utils.telemetry import tracing - - class_name, method_name, span_attributes = create_span_context(self, *args, **kwargs) - - with tracing.span(f"{class_name}.{method_name}", span_attributes) as span: - try: - count = 0 - async for item in method(self, *args, **kwargs): - yield item - count += 1 - finally: - span.set_attribute("chunk_count", count) - - @wraps(method) - async def async_wrapper(self: Any, *args: Any, **kwargs: Any) -> Any: - from llama_stack.providers.utils.telemetry import tracing - - class_name, method_name, span_attributes = create_span_context(self, *args, **kwargs) - - with tracing.span(f"{class_name}.{method_name}", span_attributes) as span: - try: - result = await method(self, *args, **kwargs) - span.set_attribute("output", serialize_value(result)) - return result - except Exception as e: - span.set_attribute("error", str(e)) - raise - - @wraps(method) - def sync_wrapper(self: Any, *args: Any, **kwargs: Any) -> Any: - from llama_stack.providers.utils.telemetry import tracing - - class_name, method_name, span_attributes = create_span_context(self, *args, **kwargs) - - with tracing.span(f"{class_name}.{method_name}", span_attributes) as span: - try: - result = method(self, *args, **kwargs) - span.set_attribute("output", serialize_value(result)) - return result - except Exception as e: - span.set_attribute("error", str(e)) - raise - - if is_async_gen: - return async_gen_wrapper - elif is_async: - return async_wrapper - else: - return sync_wrapper - - original_init_subclass = getattr(cls, "__init_subclass__", None) - - def __init_subclass__(cls_child, **kwargs): # noqa: N807 - if original_init_subclass: - original_init_subclass(**kwargs) - - for name, method in vars(cls_child).items(): - if inspect.isfunction(method) and not name.startswith("_"): - setattr(cls_child, name, trace_method(method)) # noqa: B010 - - cls.__init_subclass__ = classmethod(__init_subclass__) - - return cls diff --git a/llama_stack/providers/utils/telemetry/tracing.py b/llama_stack/providers/utils/telemetry/tracing.py index 62cceb13e..738d2bf03 100644 --- a/llama_stack/providers/utils/telemetry/tracing.py +++ b/llama_stack/providers/utils/telemetry/tracing.py @@ -29,7 +29,6 @@ from llama_stack.apis.telemetry import ( UnstructuredLogEvent, ) from llama_stack.log import get_logger -from llama_stack.providers.utils.telemetry.trace_protocol import serialize_value logger = get_logger(__name__, category="core") diff --git a/tests/unit/providers/vector_io/test_qdrant.py b/tests/unit/providers/vector_io/test_qdrant.py index aab5b6f45..49a1aa0a1 100644 --- a/tests/unit/providers/vector_io/test_qdrant.py +++ b/tests/unit/providers/vector_io/test_qdrant.py @@ -122,7 +122,6 @@ def _prepare_for_json(value: Any) -> str: return str(value) -@patch("llama_stack.providers.utils.telemetry.trace_protocol._prepare_for_json", new=_prepare_for_json) async def test_qdrant_register_and_unregister_vector_db( qdrant_adapter: QdrantVectorIOAdapter, mock_vector_db,