remove legacy tracing

2025-12-12 12:06:04 +00:00 · 2025-10-07 14:32:32 -04:00 · 2025-10-07 14:32:32 -04:00 · 0c843ec87f
commit 0c843ec87f
parent 5f827fd4ed
16 changed files with 15 additions and 211 deletions
--- a/llama_stack/apis/conversations/conversations.py
+++ b/llama_stack/apis/conversations/conversations.py
@ -20,7 +20,6 @@ from llama_stack.apis.agents.openai_responses import (
    OpenAIResponseOutputMessageWebSearchToolCall,
 )
 from llama_stack.apis.version import LLAMA_STACK_API_V1
-from llama_stack.providers.utils.telemetry.trace_protocol import trace_protocol
 from llama_stack.schema_utils import json_schema_type, register_schema, webmethod

 Metadata = dict[str, str]
@ -163,7 +162,6 @@ class ConversationItemDeletedResource(BaseModel):


@runtime_checkable
-@trace_protocol
 class Conversations(Protocol):
    """Protocol for conversation management operations."""

--- a/llama_stack/apis/files/files.py
+++ b/llama_stack/apis/files/files.py
@ -12,7 +12,6 @@ from pydantic import BaseModel, Field

 from llama_stack.apis.common.responses import Order
 from llama_stack.apis.version import LLAMA_STACK_API_V1
-from llama_stack.providers.utils.telemetry.trace_protocol import trace_protocol
 from llama_stack.schema_utils import json_schema_type, webmethod


@ -102,7 +101,6 @@ class OpenAIFileDeleteResponse(BaseModel):


@runtime_checkable
-@trace_protocol
 class Files(Protocol):
    """Files

--- a/llama_stack/apis/inference/inference.py
+++ b/llama_stack/apis/inference/inference.py
@ -29,7 +29,6 @@ from llama_stack.models.llama.datatypes import (
    ToolDefinition,
    ToolPromptFormat,
 )
-from llama_stack.providers.utils.telemetry.trace_protocol import trace_protocol
 from llama_stack.schema_utils import json_schema_type, register_schema, webmethod

 register_schema(ToolCall)
@ -996,7 +995,6 @@ class ListOpenAIChatCompletionResponse(BaseModel):


@runtime_checkable
-@trace_protocol
 class InferenceProvider(Protocol):
    """
    This protocol defines the interface that should be implemented by all inference providers.
--- a/llama_stack/apis/models/models.py
+++ b/llama_stack/apis/models/models.py
@ -11,7 +11,6 @@ from pydantic import BaseModel, ConfigDict, Field, field_validator

 from llama_stack.apis.resource import Resource, ResourceType
 from llama_stack.apis.version import LLAMA_STACK_API_V1
-from llama_stack.providers.utils.telemetry.trace_protocol import trace_protocol
 from llama_stack.schema_utils import json_schema_type, webmethod


@ -101,7 +100,6 @@ class OpenAIListModelsResponse(BaseModel):


@runtime_checkable
-@trace_protocol
 class Models(Protocol):
    @webmethod(route="/models", method="GET", level=LLAMA_STACK_API_V1)
    async def list_models(self) -> ListModelsResponse:
--- a/llama_stack/apis/prompts/prompts.py
+++ b/llama_stack/apis/prompts/prompts.py
@ -11,7 +11,7 @@ from typing import Protocol, runtime_checkable
 from pydantic import BaseModel, Field, field_validator, model_validator

 from llama_stack.apis.version import LLAMA_STACK_API_V1
-from llama_stack.providers.utils.telemetry.trace_protocol import trace_protocol
+
 from llama_stack.schema_utils import json_schema_type, webmethod


@ -92,7 +92,6 @@ class ListPromptsResponse(BaseModel):


@runtime_checkable
-@trace_protocol
 class Prompts(Protocol):
    """Prompts

--- a/llama_stack/apis/safety/safety.py
+++ b/llama_stack/apis/safety/safety.py
@ -12,7 +12,7 @@ from pydantic import BaseModel, Field
 from llama_stack.apis.inference import Message
 from llama_stack.apis.shields import Shield
 from llama_stack.apis.version import LLAMA_STACK_API_V1
-from llama_stack.providers.utils.telemetry.trace_protocol import trace_protocol
+
 from llama_stack.schema_utils import json_schema_type, webmethod


@ -94,7 +94,6 @@ class ShieldStore(Protocol):


@runtime_checkable
-@trace_protocol
 class Safety(Protocol):
    """Safety

--- a/llama_stack/apis/shields/shields.py
+++ b/llama_stack/apis/shields/shields.py
@ -10,7 +10,7 @@ from pydantic import BaseModel

 from llama_stack.apis.resource import Resource, ResourceType
 from llama_stack.apis.version import LLAMA_STACK_API_V1
-from llama_stack.providers.utils.telemetry.trace_protocol import trace_protocol
+
 from llama_stack.schema_utils import json_schema_type, webmethod


@ -48,7 +48,6 @@ class ListShieldsResponse(BaseModel):


@runtime_checkable
-@trace_protocol
 class Shields(Protocol):
    @webmethod(route="/shields", method="GET", level=LLAMA_STACK_API_V1)
    async def list_shields(self) -> ListShieldsResponse:
--- a/llama_stack/apis/tools/rag_tool.py
+++ b/llama_stack/apis/tools/rag_tool.py
@ -12,7 +12,7 @@ from typing_extensions import runtime_checkable

 from llama_stack.apis.common.content_types import URL, InterleavedContent
 from llama_stack.apis.version import LLAMA_STACK_API_V1
-from llama_stack.providers.utils.telemetry.trace_protocol import trace_protocol
+
 from llama_stack.schema_utils import json_schema_type, register_schema, webmethod


@ -184,7 +184,6 @@ class RAGQueryConfig(BaseModel):


@runtime_checkable
-@trace_protocol
 class RAGToolRuntime(Protocol):
    @webmethod(route="/tool-runtime/rag-tool/insert", method="POST", level=LLAMA_STACK_API_V1)
    async def insert(
--- a/llama_stack/apis/tools/tools.py
+++ b/llama_stack/apis/tools/tools.py
@ -13,7 +13,7 @@ from typing_extensions import runtime_checkable
 from llama_stack.apis.common.content_types import URL, InterleavedContent
 from llama_stack.apis.resource import Resource, ResourceType
 from llama_stack.apis.version import LLAMA_STACK_API_V1
-from llama_stack.providers.utils.telemetry.trace_protocol import trace_protocol
+
 from llama_stack.schema_utils import json_schema_type, webmethod

 from .rag_tool import RAGToolRuntime
@ -109,7 +109,6 @@ class ListToolDefsResponse(BaseModel):


@runtime_checkable
-@trace_protocol
 class ToolGroups(Protocol):
    @webmethod(route="/toolgroups", method="POST", level=LLAMA_STACK_API_V1)
    async def register_tool_group(
@ -191,7 +190,6 @@ class SpecialToolGroup(Enum):


@runtime_checkable
-@trace_protocol
 class ToolRuntime(Protocol):
    tool_store: ToolStore | None = None

--- a/llama_stack/apis/vector_dbs/vector_dbs.py
+++ b/llama_stack/apis/vector_dbs/vector_dbs.py
@ -10,7 +10,7 @@ from pydantic import BaseModel

 from llama_stack.apis.resource import Resource, ResourceType
 from llama_stack.apis.version import LLAMA_STACK_API_V1
-from llama_stack.providers.utils.telemetry.trace_protocol import trace_protocol
+
 from llama_stack.schema_utils import json_schema_type, webmethod


@ -64,7 +64,6 @@ class ListVectorDBsResponse(BaseModel):


@runtime_checkable
-@trace_protocol
 class VectorDBs(Protocol):
    @webmethod(route="/vector-dbs", method="GET", level=LLAMA_STACK_API_V1)
    async def list_vector_dbs(self) -> ListVectorDBsResponse:
--- a/llama_stack/apis/vector_io/vector_io.py
+++ b/llama_stack/apis/vector_io/vector_io.py
@ -16,7 +16,7 @@ from pydantic import BaseModel, Field
 from llama_stack.apis.inference import InterleavedContent
 from llama_stack.apis.vector_dbs import VectorDB
 from llama_stack.apis.version import LLAMA_STACK_API_V1
-from llama_stack.providers.utils.telemetry.trace_protocol import trace_protocol
+
 from llama_stack.providers.utils.vector_io.vector_utils import generate_chunk_id
 from llama_stack.schema_utils import json_schema_type, webmethod
 from llama_stack.strong_typing.schema import register_schema
@ -471,7 +471,6 @@ class VectorDBStore(Protocol):


@runtime_checkable
-@trace_protocol
 class VectorIO(Protocol):
    vector_db_store: VectorDBStore | None = None

--- a/llama_stack/core/library_client.py
+++ b/llama_stack/core/library_client.py
@ -48,12 +48,7 @@ from llama_stack.core.utils.config import redact_sensitive_fields
 from llama_stack.core.utils.context import preserve_contexts_async_generator
 from llama_stack.core.utils.exec import in_notebook
 from llama_stack.log import get_logger
-from llama_stack.providers.utils.telemetry.tracing import (
-    CURRENT_TRACE_CONTEXT,
-    end_trace,
-    setup_logger,
-    start_trace,
-)
+

 logger = get_logger(name=__name__, category="core")

@ -206,8 +201,6 @@ class AsyncLlamaStackAsLibraryClient(AsyncLlamaStackClient):
        super().__init__()
        # when using the library client, we should not log to console since many
        # of our logs are intended for server-side usage
-        current_sinks = os.environ.get("TELEMETRY_SINKS", "sqlite").split(",")
-        os.environ["TELEMETRY_SINKS"] = ",".join(sink for sink in current_sinks if sink != "console")

        if in_notebook():
            import nest_asyncio
@ -293,8 +286,6 @@ class AsyncLlamaStackAsLibraryClient(AsyncLlamaStackClient):
            raise _e

        assert self.impls is not None
-        if Api.telemetry in self.impls:
-            setup_logger(self.impls[Api.telemetry])

        if not os.environ.get("PYTEST_CURRENT_TEST"):
            console = Console()
@ -384,13 +375,7 @@ class AsyncLlamaStackAsLibraryClient(AsyncLlamaStackClient):
        body, field_names = self._handle_file_uploads(options, body)

        body = self._convert_body(path, options.method, body, exclude_params=set(field_names))
-
-        trace_path = webmethod.descriptive_name or route_path
-        await start_trace(trace_path, {"__location__": "library_client"})
-        try:
-            result = await matched_func(**body)
-        finally:
-            await end_trace()
+        result = await matched_func(**body)

        # Handle FastAPI Response objects (e.g., from file content retrieval)
        if isinstance(result, FastAPIResponse):
@ -448,19 +433,13 @@ class AsyncLlamaStackAsLibraryClient(AsyncLlamaStackClient):

        body = self._convert_body(path, options.method, body)

-        trace_path = webmethod.descriptive_name or route_path
-        await start_trace(trace_path, {"__location__": "library_client"})
-
        async def gen():
-            try:
-                async for chunk in await func(**body):
-                    data = json.dumps(convert_pydantic_to_json_value(chunk))
-                    sse_event = f"data: {data}\n\n"
-                    yield sse_event.encode("utf-8")
-            finally:
-                await end_trace()
+            async for chunk in await func(**body):
+                data = json.dumps(convert_pydantic_to_json_value(chunk))
+                sse_event = f"data: {data}\n\n"
+                yield sse_event.encode("utf-8")

-        wrapped_gen = preserve_contexts_async_generator(gen(), [CURRENT_TRACE_CONTEXT, PROVIDER_DATA_VAR])
+        wrapped_gen = preserve_contexts_async_generator(gen(), [PROVIDER_DATA_VAR])

        mock_response = httpx.Response(
            status_code=httpx.codes.OK,
--- a/llama_stack/core/server/server.py
+++ b/llama_stack/core/server/server.py
@ -58,14 +58,6 @@ from llama_stack.core.utils.config_resolution import Mode, resolve_config_or_dis
 from llama_stack.core.utils.context import preserve_contexts_async_generator
 from llama_stack.log import get_logger
 from llama_stack.providers.datatypes import Api
-from llama_stack.providers.inline.telemetry.meta_reference.config import TelemetryConfig
-from llama_stack.providers.inline.telemetry.meta_reference.telemetry import (
-    TelemetryAdapter,
-)
-from llama_stack.providers.utils.telemetry.tracing import (
-    CURRENT_TRACE_CONTEXT,
-    setup_logger,
-)

 from .auth import AuthenticationMiddleware
 from .quota import QuotaMiddleware
@ -237,9 +229,7 @@ def create_dynamic_typed_route(func: Any, method: str, route: str) -> Callable:

            try:
                if is_streaming:
-                    gen = preserve_contexts_async_generator(
-                        sse_generator(func(**kwargs)), [CURRENT_TRACE_CONTEXT, PROVIDER_DATA_VAR]
-                    )
+                    gen = preserve_contexts_async_generator(sse_generator(func(**kwargs)), [PROVIDER_DATA_VAR])
                    return StreamingResponse(gen, media_type="text/event-stream")
                else:
                    value = func(**kwargs)
@ -408,11 +398,6 @@ def create_app() -> StackApp:
        if cors_config:
            app.add_middleware(CORSMiddleware, **cors_config.model_dump())

-    if Api.telemetry in impls:
-        setup_logger(impls[Api.telemetry])
-    else:
-        setup_logger(TelemetryAdapter(TelemetryConfig(), {}))
-
    # Load external APIs if configured
    external_apis = load_external_apis(config)
    all_routes = get_all_api_routes(external_apis)
--- a/llama_stack/providers/utils/telemetry/trace_protocol.py
+++ b/llama_stack/providers/utils/telemetry/trace_protocol.py
@ -1,142 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-
-import asyncio
-import inspect
-import json
-from collections.abc import AsyncGenerator, Callable
-from functools import wraps
-from typing import Any
-
-from pydantic import BaseModel
-
-from llama_stack.models.llama.datatypes import Primitive
-
-
-def serialize_value(value: Any) -> Primitive:
-    return str(_prepare_for_json(value))
-
-
-def _prepare_for_json(value: Any) -> str:
-    """Serialize a single value into JSON-compatible format."""
-    if value is None:
-        return ""
-    elif isinstance(value, str | int | float | bool):
-        return value
-    elif hasattr(value, "_name_"):
-        return value._name_
-    elif isinstance(value, BaseModel):
-        return json.loads(value.model_dump_json())
-    elif isinstance(value, list | tuple | set):
-        return [_prepare_for_json(item) for item in value]
-    elif isinstance(value, dict):
-        return {str(k): _prepare_for_json(v) for k, v in value.items()}
-    else:
-        try:
-            json.dumps(value)
-            return value
-        except Exception:
-            return str(value)
-
-
-def trace_protocol[T](cls: type[T]) -> type[T]:
-    """
-    A class decorator that automatically traces all methods in a protocol/base class
-    and its inheriting classes.
-    """
-
-    def trace_method(method: Callable) -> Callable:
-        is_async = asyncio.iscoroutinefunction(method)
-        is_async_gen = inspect.isasyncgenfunction(method)
-
-        def create_span_context(self: Any, *args: Any, **kwargs: Any) -> tuple:
-            class_name = self.__class__.__name__
-            method_name = method.__name__
-            span_type = "async_generator" if is_async_gen else "async" if is_async else "sync"
-            sig = inspect.signature(method)
-            param_names = list(sig.parameters.keys())[1:]  # Skip 'self'
-            combined_args = {}
-            for i, arg in enumerate(args):
-                param_name = param_names[i] if i < len(param_names) else f"position_{i + 1}"
-                combined_args[param_name] = serialize_value(arg)
-            for k, v in kwargs.items():
-                combined_args[str(k)] = serialize_value(v)
-
-            span_attributes = {
-                "__autotraced__": True,
-                "__class__": class_name,
-                "__method__": method_name,
-                "__type__": span_type,
-                "__args__": str(combined_args),
-            }
-
-            return class_name, method_name, span_attributes
-
-        @wraps(method)
-        async def async_gen_wrapper(self: Any, *args: Any, **kwargs: Any) -> AsyncGenerator:
-            from llama_stack.providers.utils.telemetry import tracing
-
-            class_name, method_name, span_attributes = create_span_context(self, *args, **kwargs)
-
-            with tracing.span(f"{class_name}.{method_name}", span_attributes) as span:
-                try:
-                    count = 0
-                    async for item in method(self, *args, **kwargs):
-                        yield item
-                        count += 1
-                finally:
-                    span.set_attribute("chunk_count", count)
-
-        @wraps(method)
-        async def async_wrapper(self: Any, *args: Any, **kwargs: Any) -> Any:
-            from llama_stack.providers.utils.telemetry import tracing
-
-            class_name, method_name, span_attributes = create_span_context(self, *args, **kwargs)
-
-            with tracing.span(f"{class_name}.{method_name}", span_attributes) as span:
-                try:
-                    result = await method(self, *args, **kwargs)
-                    span.set_attribute("output", serialize_value(result))
-                    return result
-                except Exception as e:
-                    span.set_attribute("error", str(e))
-                    raise
-
-        @wraps(method)
-        def sync_wrapper(self: Any, *args: Any, **kwargs: Any) -> Any:
-            from llama_stack.providers.utils.telemetry import tracing
-
-            class_name, method_name, span_attributes = create_span_context(self, *args, **kwargs)
-
-            with tracing.span(f"{class_name}.{method_name}", span_attributes) as span:
-                try:
-                    result = method(self, *args, **kwargs)
-                    span.set_attribute("output", serialize_value(result))
-                    return result
-                except Exception as e:
-                    span.set_attribute("error", str(e))
-                    raise
-
-        if is_async_gen:
-            return async_gen_wrapper
-        elif is_async:
-            return async_wrapper
-        else:
-            return sync_wrapper
-
-    original_init_subclass = getattr(cls, "__init_subclass__", None)
-
-    def __init_subclass__(cls_child, **kwargs):  # noqa: N807
-        if original_init_subclass:
-            original_init_subclass(**kwargs)
-
-        for name, method in vars(cls_child).items():
-            if inspect.isfunction(method) and not name.startswith("_"):
-                setattr(cls_child, name, trace_method(method))  # noqa: B010
-
-    cls.__init_subclass__ = classmethod(__init_subclass__)
-
-    return cls
--- a/llama_stack/providers/utils/telemetry/tracing.py
+++ b/llama_stack/providers/utils/telemetry/tracing.py
@ -29,7 +29,6 @@ from llama_stack.apis.telemetry import (
    UnstructuredLogEvent,
 )
 from llama_stack.log import get_logger
-from llama_stack.providers.utils.telemetry.trace_protocol import serialize_value

 logger = get_logger(__name__, category="core")

--- a/tests/unit/providers/vector_io/test_qdrant.py
+++ b/tests/unit/providers/vector_io/test_qdrant.py
@ -122,7 +122,6 @@ def _prepare_for_json(value: Any) -> str:
    return str(value)


-@patch("llama_stack.providers.utils.telemetry.trace_protocol._prepare_for_json", new=_prepare_for_json)
 async def test_qdrant_register_and_unregister_vector_db(
    qdrant_adapter: QdrantVectorIOAdapter,
    mock_vector_db,