remove legacy tracing

2025-12-12 20:12:33 +00:00 · 2025-10-07 14:32:32 -04:00 · 2025-10-07 14:32:32 -04:00 · 0c843ec87f
commit 0c843ec87f
parent 5f827fd4ed
16 changed files with 15 additions and 211 deletions
--- a/llama_stack/apis/conversations/conversations.py
+++ b/llama_stack/apis/conversations/conversations.py
@ -20,7 +20,6 @@ from llama_stack.apis.agents.openai_responses import (
    OpenAIResponseOutputMessageWebSearchToolCall,
 )
 from llama_stack.apis.version import LLAMA_STACK_API_V1
 from llama_stack.providers.utils.telemetry.trace_protocol import trace_protocol
 from llama_stack.schema_utils import json_schema_type, register_schema, webmethod
 Metadata = dict[str, str]
@ -163,7 +162,6 @@ class ConversationItemDeletedResource(BaseModel):
@runtime_checkable
@trace_protocol
 class Conversations(Protocol):
    """Protocol for conversation management operations."""
--- a/llama_stack/apis/files/files.py
+++ b/llama_stack/apis/files/files.py
@ -12,7 +12,6 @@ from pydantic import BaseModel, Field
 from llama_stack.apis.common.responses import Order
 from llama_stack.apis.version import LLAMA_STACK_API_V1
 from llama_stack.providers.utils.telemetry.trace_protocol import trace_protocol
 from llama_stack.schema_utils import json_schema_type, webmethod
@ -102,7 +101,6 @@ class OpenAIFileDeleteResponse(BaseModel):
@runtime_checkable
@trace_protocol
 class Files(Protocol):
    """Files
--- a/llama_stack/apis/inference/inference.py
+++ b/llama_stack/apis/inference/inference.py
@ -29,7 +29,6 @@ from llama_stack.models.llama.datatypes import (
    ToolDefinition,
    ToolPromptFormat,
 )
 from llama_stack.providers.utils.telemetry.trace_protocol import trace_protocol
 from llama_stack.schema_utils import json_schema_type, register_schema, webmethod
 register_schema(ToolCall)
@ -996,7 +995,6 @@ class ListOpenAIChatCompletionResponse(BaseModel):
@runtime_checkable
@trace_protocol
 class InferenceProvider(Protocol):
    """
    This protocol defines the interface that should be implemented by all inference providers.
--- a/llama_stack/apis/models/models.py
+++ b/llama_stack/apis/models/models.py
@ -11,7 +11,6 @@ from pydantic import BaseModel, ConfigDict, Field, field_validator
 from llama_stack.apis.resource import Resource, ResourceType
 from llama_stack.apis.version import LLAMA_STACK_API_V1
 from llama_stack.providers.utils.telemetry.trace_protocol import trace_protocol
 from llama_stack.schema_utils import json_schema_type, webmethod
@ -101,7 +100,6 @@ class OpenAIListModelsResponse(BaseModel):
@runtime_checkable
@trace_protocol
 class Models(Protocol):
    @webmethod(route="/models", method="GET", level=LLAMA_STACK_API_V1)
    async def list_models(self) -> ListModelsResponse:
--- a/llama_stack/apis/prompts/prompts.py
+++ b/llama_stack/apis/prompts/prompts.py
@ -11,7 +11,7 @@ from typing import Protocol, runtime_checkable
 from pydantic import BaseModel, Field, field_validator, model_validator
 from llama_stack.apis.version import LLAMA_STACK_API_V1
-from llama_stack.providers.utils.telemetry.trace_protocol import trace_protocol
+
 from llama_stack.schema_utils import json_schema_type, webmethod
@ -92,7 +92,6 @@ class ListPromptsResponse(BaseModel):
@runtime_checkable
@trace_protocol
 class Prompts(Protocol):
    """Prompts
--- a/llama_stack/apis/safety/safety.py
+++ b/llama_stack/apis/safety/safety.py
@ -12,7 +12,7 @@ from pydantic import BaseModel, Field
 from llama_stack.apis.inference import Message
 from llama_stack.apis.shields import Shield
 from llama_stack.apis.version import LLAMA_STACK_API_V1
-from llama_stack.providers.utils.telemetry.trace_protocol import trace_protocol
+
 from llama_stack.schema_utils import json_schema_type, webmethod
@ -94,7 +94,6 @@ class ShieldStore(Protocol):
@runtime_checkable
@trace_protocol
 class Safety(Protocol):
    """Safety
--- a/llama_stack/apis/shields/shields.py
+++ b/llama_stack/apis/shields/shields.py
@ -10,7 +10,7 @@ from pydantic import BaseModel
 from llama_stack.apis.resource import Resource, ResourceType
 from llama_stack.apis.version import LLAMA_STACK_API_V1
-from llama_stack.providers.utils.telemetry.trace_protocol import trace_protocol
+
 from llama_stack.schema_utils import json_schema_type, webmethod
@ -48,7 +48,6 @@ class ListShieldsResponse(BaseModel):
@runtime_checkable
@trace_protocol
 class Shields(Protocol):
    @webmethod(route="/shields", method="GET", level=LLAMA_STACK_API_V1)
    async def list_shields(self) -> ListShieldsResponse:
--- a/llama_stack/apis/tools/rag_tool.py
+++ b/llama_stack/apis/tools/rag_tool.py
@ -12,7 +12,7 @@ from typing_extensions import runtime_checkable
 from llama_stack.apis.common.content_types import URL, InterleavedContent
 from llama_stack.apis.version import LLAMA_STACK_API_V1
-from llama_stack.providers.utils.telemetry.trace_protocol import trace_protocol
+
 from llama_stack.schema_utils import json_schema_type, register_schema, webmethod
@ -184,7 +184,6 @@ class RAGQueryConfig(BaseModel):
@runtime_checkable
@trace_protocol
 class RAGToolRuntime(Protocol):
    @webmethod(route="/tool-runtime/rag-tool/insert", method="POST", level=LLAMA_STACK_API_V1)
    async def insert(
--- a/llama_stack/apis/tools/tools.py
+++ b/llama_stack/apis/tools/tools.py
@ -13,7 +13,7 @@ from typing_extensions import runtime_checkable
 from llama_stack.apis.common.content_types import URL, InterleavedContent
 from llama_stack.apis.resource import Resource, ResourceType
 from llama_stack.apis.version import LLAMA_STACK_API_V1
-from llama_stack.providers.utils.telemetry.trace_protocol import trace_protocol
+
 from llama_stack.schema_utils import json_schema_type, webmethod
 from .rag_tool import RAGToolRuntime
@ -109,7 +109,6 @@ class ListToolDefsResponse(BaseModel):
@runtime_checkable
@trace_protocol
 class ToolGroups(Protocol):
    @webmethod(route="/toolgroups", method="POST", level=LLAMA_STACK_API_V1)
    async def register_tool_group(
@ -191,7 +190,6 @@ class SpecialToolGroup(Enum):
@runtime_checkable
@trace_protocol
 class ToolRuntime(Protocol):
    tool_store: ToolStore | None = None
--- a/llama_stack/apis/vector_dbs/vector_dbs.py
+++ b/llama_stack/apis/vector_dbs/vector_dbs.py
@ -10,7 +10,7 @@ from pydantic import BaseModel
 from llama_stack.apis.resource import Resource, ResourceType
 from llama_stack.apis.version import LLAMA_STACK_API_V1
-from llama_stack.providers.utils.telemetry.trace_protocol import trace_protocol
+
 from llama_stack.schema_utils import json_schema_type, webmethod
@ -64,7 +64,6 @@ class ListVectorDBsResponse(BaseModel):
@runtime_checkable
@trace_protocol
 class VectorDBs(Protocol):
    @webmethod(route="/vector-dbs", method="GET", level=LLAMA_STACK_API_V1)
    async def list_vector_dbs(self) -> ListVectorDBsResponse:
--- a/llama_stack/apis/vector_io/vector_io.py
+++ b/llama_stack/apis/vector_io/vector_io.py
@ -16,7 +16,7 @@ from pydantic import BaseModel, Field
 from llama_stack.apis.inference import InterleavedContent
 from llama_stack.apis.vector_dbs import VectorDB
 from llama_stack.apis.version import LLAMA_STACK_API_V1
-from llama_stack.providers.utils.telemetry.trace_protocol import trace_protocol
+
 from llama_stack.providers.utils.vector_io.vector_utils import generate_chunk_id
 from llama_stack.schema_utils import json_schema_type, webmethod
 from llama_stack.strong_typing.schema import register_schema
@ -471,7 +471,6 @@ class VectorDBStore(Protocol):
@runtime_checkable
@trace_protocol
 class VectorIO(Protocol):
    vector_db_store: VectorDBStore | None = None
--- a/llama_stack/core/library_client.py
+++ b/llama_stack/core/library_client.py
@ -48,12 +48,7 @@ from llama_stack.core.utils.config import redact_sensitive_fields
 from llama_stack.core.utils.context import preserve_contexts_async_generator
 from llama_stack.core.utils.exec import in_notebook
 from llama_stack.log import get_logger
-from llama_stack.providers.utils.telemetry.tracing import (
+
    CURRENT_TRACE_CONTEXT,
    end_trace,
    setup_logger,
    start_trace,
 )
 logger = get_logger(name=__name__, category="core")
@ -206,8 +201,6 @@ class AsyncLlamaStackAsLibraryClient(AsyncLlamaStackClient):
        super().__init__()
        # when using the library client, we should not log to console since many
        # of our logs are intended for server-side usage
        current_sinks = os.environ.get("TELEMETRY_SINKS", "sqlite").split(",")
        os.environ["TELEMETRY_SINKS"] = ",".join(sink for sink in current_sinks if sink != "console")
        if in_notebook():
            import nest_asyncio
@ -293,8 +286,6 @@ class AsyncLlamaStackAsLibraryClient(AsyncLlamaStackClient):
            raise _e
        assert self.impls is not None
        if Api.telemetry in self.impls:
            setup_logger(self.impls[Api.telemetry])
        if not os.environ.get("PYTEST_CURRENT_TEST"):
            console = Console()
@ -384,13 +375,7 @@ class AsyncLlamaStackAsLibraryClient(AsyncLlamaStackClient):
        body, field_names = self._handle_file_uploads(options, body)
        body = self._convert_body(path, options.method, body, exclude_params=set(field_names))
-
+        result = await matched_func(**body)
        trace_path = webmethod.descriptive_name or route_path
        await start_trace(trace_path, {"__location__": "library_client"})
        try:
            result = await matched_func(**body)
        finally:
            await end_trace()
        # Handle FastAPI Response objects (e.g., from file content retrieval)
        if isinstance(result, FastAPIResponse):
@ -448,19 +433,13 @@ class AsyncLlamaStackAsLibraryClient(AsyncLlamaStackClient):
        body = self._convert_body(path, options.method, body)
        trace_path = webmethod.descriptive_name or route_path
        await start_trace(trace_path, {"__location__": "library_client"})
        async def gen():
-            try:
+            async for chunk in await func(**body):
-                async for chunk in await func(**body):
+                data = json.dumps(convert_pydantic_to_json_value(chunk))
-                    data = json.dumps(convert_pydantic_to_json_value(chunk))
+                sse_event = f"data: {data}\n\n"
-                    sse_event = f"data: {data}\n\n"
+                yield sse_event.encode("utf-8")
                    yield sse_event.encode("utf-8")
            finally:
                await end_trace()
-        wrapped_gen = preserve_contexts_async_generator(gen(), [CURRENT_TRACE_CONTEXT, PROVIDER_DATA_VAR])
+        wrapped_gen = preserve_contexts_async_generator(gen(), [PROVIDER_DATA_VAR])
        mock_response = httpx.Response(
            status_code=httpx.codes.OK,
--- a/llama_stack/core/server/server.py
+++ b/llama_stack/core/server/server.py
@ -58,14 +58,6 @@ from llama_stack.core.utils.config_resolution import Mode, resolve_config_or_dis
 from llama_stack.core.utils.context import preserve_contexts_async_generator
 from llama_stack.log import get_logger
 from llama_stack.providers.datatypes import Api
 from llama_stack.providers.inline.telemetry.meta_reference.config import TelemetryConfig
 from llama_stack.providers.inline.telemetry.meta_reference.telemetry import (
    TelemetryAdapter,
 )
 from llama_stack.providers.utils.telemetry.tracing import (
    CURRENT_TRACE_CONTEXT,
    setup_logger,
 )
 from .auth import AuthenticationMiddleware
 from .quota import QuotaMiddleware
@ -237,9 +229,7 @@ def create_dynamic_typed_route(func: Any, method: str, route: str) -> Callable:
            try:
                if is_streaming:
-                    gen = preserve_contexts_async_generator(
+                    gen = preserve_contexts_async_generator(sse_generator(func(**kwargs)), [PROVIDER_DATA_VAR])
                        sse_generator(func(**kwargs)), [CURRENT_TRACE_CONTEXT, PROVIDER_DATA_VAR]
                    )
                    return StreamingResponse(gen, media_type="text/event-stream")
                else:
                    value = func(**kwargs)
@ -408,11 +398,6 @@ def create_app() -> StackApp:
        if cors_config:
            app.add_middleware(CORSMiddleware, **cors_config.model_dump())
    if Api.telemetry in impls:
        setup_logger(impls[Api.telemetry])
    else:
        setup_logger(TelemetryAdapter(TelemetryConfig(), {}))
    # Load external APIs if configured
    external_apis = load_external_apis(config)
    all_routes = get_all_api_routes(external_apis)
--- a/llama_stack/providers/utils/telemetry/trace_protocol.py
+++ b/llama_stack/providers/utils/telemetry/trace_protocol.py
@ -1,142 +0,0 @@
 # Copyright (c) Meta Platforms, Inc. and affiliates.
 # All rights reserved.
 #
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 import asyncio
 import inspect
 import json
 from collections.abc import AsyncGenerator, Callable
 from functools import wraps
 from typing import Any
 from pydantic import BaseModel
 from llama_stack.models.llama.datatypes import Primitive
 def serialize_value(value: Any) -> Primitive:
    return str(_prepare_for_json(value))
 def _prepare_for_json(value: Any) -> str:
    """Serialize a single value into JSON-compatible format."""
    if value is None:
        return ""
    elif isinstance(value, str | int | float | bool):
        return value
    elif hasattr(value, "_name_"):
        return value._name_
    elif isinstance(value, BaseModel):
        return json.loads(value.model_dump_json())
    elif isinstance(value, list | tuple | set):
        return [_prepare_for_json(item) for item in value]
    elif isinstance(value, dict):
        return {str(k): _prepare_for_json(v) for k, v in value.items()}
    else:
        try:
            json.dumps(value)
            return value
        except Exception:
            return str(value)
 def trace_protocol[T](cls: type[T]) -> type[T]:
    """
    A class decorator that automatically traces all methods in a protocol/base class
    and its inheriting classes.
    """
    def trace_method(method: Callable) -> Callable:
        is_async = asyncio.iscoroutinefunction(method)
        is_async_gen = inspect.isasyncgenfunction(method)
        def create_span_context(self: Any, *args: Any, **kwargs: Any) -> tuple:
            class_name = self.__class__.__name__
            method_name = method.__name__
            span_type = "async_generator" if is_async_gen else "async" if is_async else "sync"
            sig = inspect.signature(method)
            param_names = list(sig.parameters.keys())[1:]  # Skip 'self'
            combined_args = {}
            for i, arg in enumerate(args):
                param_name = param_names[i] if i < len(param_names) else f"position_{i + 1}"
                combined_args[param_name] = serialize_value(arg)
            for k, v in kwargs.items():
                combined_args[str(k)] = serialize_value(v)
            span_attributes = {
                "__autotraced__": True,
                "__class__": class_name,
                "__method__": method_name,
                "__type__": span_type,
                "__args__": str(combined_args),
            }
            return class_name, method_name, span_attributes
        @wraps(method)
        async def async_gen_wrapper(self: Any, *args: Any, **kwargs: Any) -> AsyncGenerator:
            from llama_stack.providers.utils.telemetry import tracing
            class_name, method_name, span_attributes = create_span_context(self, *args, **kwargs)
            with tracing.span(f"{class_name}.{method_name}", span_attributes) as span:
                try:
                    count = 0
                    async for item in method(self, *args, **kwargs):
                        yield item
                        count += 1
                finally:
                    span.set_attribute("chunk_count", count)
        @wraps(method)
        async def async_wrapper(self: Any, *args: Any, **kwargs: Any) -> Any:
            from llama_stack.providers.utils.telemetry import tracing
            class_name, method_name, span_attributes = create_span_context(self, *args, **kwargs)
            with tracing.span(f"{class_name}.{method_name}", span_attributes) as span:
                try:
                    result = await method(self, *args, **kwargs)
                    span.set_attribute("output", serialize_value(result))
                    return result
                except Exception as e:
                    span.set_attribute("error", str(e))
                    raise
        @wraps(method)
        def sync_wrapper(self: Any, *args: Any, **kwargs: Any) -> Any:
            from llama_stack.providers.utils.telemetry import tracing
            class_name, method_name, span_attributes = create_span_context(self, *args, **kwargs)
            with tracing.span(f"{class_name}.{method_name}", span_attributes) as span:
                try:
                    result = method(self, *args, **kwargs)
                    span.set_attribute("output", serialize_value(result))
                    return result
                except Exception as e:
                    span.set_attribute("error", str(e))
                    raise
        if is_async_gen:
            return async_gen_wrapper
        elif is_async:
            return async_wrapper
        else:
            return sync_wrapper
    original_init_subclass = getattr(cls, "__init_subclass__", None)
    def __init_subclass__(cls_child, **kwargs):  # noqa: N807
        if original_init_subclass:
            original_init_subclass(**kwargs)
        for name, method in vars(cls_child).items():
            if inspect.isfunction(method) and not name.startswith("_"):
                setattr(cls_child, name, trace_method(method))  # noqa: B010
    cls.__init_subclass__ = classmethod(__init_subclass__)
    return cls
--- a/llama_stack/providers/utils/telemetry/tracing.py
+++ b/llama_stack/providers/utils/telemetry/tracing.py
@ -29,7 +29,6 @@ from llama_stack.apis.telemetry import (
    UnstructuredLogEvent,
 )
 from llama_stack.log import get_logger
 from llama_stack.providers.utils.telemetry.trace_protocol import serialize_value
 logger = get_logger(__name__, category="core")
--- a/tests/unit/providers/vector_io/test_qdrant.py
+++ b/tests/unit/providers/vector_io/test_qdrant.py
@ -122,7 +122,6 @@ def _prepare_for_json(value: Any) -> str:
    return str(value)
@patch("llama_stack.providers.utils.telemetry.trace_protocol._prepare_for_json", new=_prepare_for_json)
 async def test_qdrant_register_and_unregister_vector_db(
    qdrant_adapter: QdrantVectorIOAdapter,
    mock_vector_db,