mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-12-03 09:53:45 +00:00
feat: remove core.telemetry as a dependency of llama_stack.apis
Remove circular dependency by moving tracing from API protocol definitions to router implementation layer. Changes: - Create apis/common/tracing.py with marker decorator (zero core dependencies) - Add @trace_protocol marker decorator to 11 protocol classes - Apply actual tracing in core/routers/__init__.py based on protocol marker - Move MetricResponseMixin from core to apis (it's an API response type) - APIs package is now self-contained with zero core dependencies The tracing functionality remains identical - actual trace_protocol from core is applied to router implementations at runtime when both telemetry is enabled and the protocol has the __trace_protocol__ marker. ## Test Plan Manual integration test confirms identical behavior to main branch: ```bash llama stack list-deps --format uv starter | sh export OLLAMA_URL=http://localhost:11434 llama stack run starter curl -X POST http://localhost:8321/v1/chat/completions \ -H "Content-Type: application/json" \ -d '{"model": "ollama/gpt-oss:20b", "messages": [{"role": "user", "content": "Say hello"}], "max_tokens": 10}' Verified identical between main and this branch: - trace_id present in response - metrics array with prompt_tokens, completion_tokens, total_tokens - Server logs show trace_protocol applied to all routers Existing telemetry integration tests (tests/integration/telemetry/) validate trace context propagation and span attributes. Signed-off-by: Charlie Doern <cdoern@redhat.com>
This commit is contained in:
parent
a8a8aa56c0
commit
d9f6815d62
14 changed files with 81 additions and 53 deletions
|
|
@ -34,3 +34,44 @@ class PaginatedResponse(BaseModel):
|
|||
data: list[dict[str, Any]]
|
||||
has_more: bool
|
||||
url: str | None = None
|
||||
|
||||
|
||||
# This is a short term solution to allow inference API to return metrics
|
||||
# The ideal way to do this is to have a way for all response types to include metrics
|
||||
# and all metric events logged to the telemetry API to be included with the response
|
||||
# To do this, we will need to augment all response types with a metrics field.
|
||||
# We have hit a blocker from stainless SDK that prevents us from doing this.
|
||||
# The blocker is that if we were to augment the response types that have a data field
|
||||
# in them like so
|
||||
# class ListModelsResponse(BaseModel):
|
||||
# metrics: Optional[List[MetricEvent]] = None
|
||||
# data: List[Models]
|
||||
# ...
|
||||
# The client SDK will need to access the data by using a .data field, which is not
|
||||
# ergonomic. Stainless SDK does support unwrapping the response type, but it
|
||||
# requires that the response type to only have a single field.
|
||||
|
||||
# We will need a way in the client SDK to signal that the metrics are needed
|
||||
# and if they are needed, the client SDK has to return the full response type
|
||||
# without unwrapping it.
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class MetricInResponse(BaseModel):
|
||||
"""A metric value included in API responses.
|
||||
:param metric: The name of the metric
|
||||
:param value: The numeric value of the metric
|
||||
:param unit: (Optional) The unit of measurement for the metric value
|
||||
"""
|
||||
|
||||
metric: str
|
||||
value: int | float
|
||||
unit: str | None = None
|
||||
|
||||
|
||||
class MetricResponseMixin(BaseModel):
|
||||
"""Mixin class for API responses that can include metrics.
|
||||
:param metrics: (Optional) List of metrics associated with the API response
|
||||
"""
|
||||
|
||||
metrics: list[MetricInResponse] | None = None
|
||||
|
|
|
|||
22
src/llama_stack/apis/common/tracing.py
Normal file
22
src/llama_stack/apis/common/tracing.py
Normal file
|
|
@ -0,0 +1,22 @@
|
|||
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
# All rights reserved.
|
||||
#
|
||||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
||||
|
||||
def trace_protocol(cls):
|
||||
"""
|
||||
Mark a protocol for automatic tracing when telemetry is enabled.
|
||||
|
||||
This is a metadata-only decorator with no dependencies on core.
|
||||
Actual tracing is applied by core routers at runtime if telemetry is enabled.
|
||||
|
||||
Usage:
|
||||
@runtime_checkable
|
||||
@trace_protocol
|
||||
class MyProtocol(Protocol):
|
||||
...
|
||||
"""
|
||||
cls.__trace_protocol__ = True
|
||||
return cls
|
||||
|
|
@ -20,8 +20,8 @@ from llama_stack.apis.agents.openai_responses import (
|
|||
OpenAIResponseOutputMessageMCPListTools,
|
||||
OpenAIResponseOutputMessageWebSearchToolCall,
|
||||
)
|
||||
from llama_stack.apis.common.tracing import trace_protocol
|
||||
from llama_stack.apis.version import LLAMA_STACK_API_V1
|
||||
from llama_stack.core.telemetry.trace_protocol import trace_protocol
|
||||
from llama_stack.schema_utils import json_schema_type, register_schema, webmethod
|
||||
|
||||
Metadata = dict[str, str]
|
||||
|
|
|
|||
|
|
@ -11,8 +11,8 @@ from fastapi import File, Form, Response, UploadFile
|
|||
from pydantic import BaseModel, Field
|
||||
|
||||
from llama_stack.apis.common.responses import Order
|
||||
from llama_stack.apis.common.tracing import trace_protocol
|
||||
from llama_stack.apis.version import LLAMA_STACK_API_V1
|
||||
from llama_stack.core.telemetry.trace_protocol import trace_protocol
|
||||
from llama_stack.schema_utils import json_schema_type, webmethod
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -19,11 +19,10 @@ from pydantic import BaseModel, Field, field_validator
|
|||
from typing_extensions import TypedDict
|
||||
|
||||
from llama_stack.apis.common.content_types import ContentDelta, InterleavedContent
|
||||
from llama_stack.apis.common.responses import Order
|
||||
from llama_stack.apis.common.responses import MetricResponseMixin, Order
|
||||
from llama_stack.apis.common.tracing import trace_protocol
|
||||
from llama_stack.apis.models import Model
|
||||
from llama_stack.apis.version import LLAMA_STACK_API_V1, LLAMA_STACK_API_V1ALPHA
|
||||
from llama_stack.core.telemetry.telemetry import MetricResponseMixin
|
||||
from llama_stack.core.telemetry.trace_protocol import trace_protocol
|
||||
from llama_stack.models.llama.datatypes import (
|
||||
BuiltinTool,
|
||||
StopReason,
|
||||
|
|
|
|||
|
|
@ -9,9 +9,9 @@ from typing import Any, Literal, Protocol, runtime_checkable
|
|||
|
||||
from pydantic import BaseModel, ConfigDict, Field, field_validator
|
||||
|
||||
from llama_stack.apis.common.tracing import trace_protocol
|
||||
from llama_stack.apis.resource import Resource, ResourceType
|
||||
from llama_stack.apis.version import LLAMA_STACK_API_V1
|
||||
from llama_stack.core.telemetry.trace_protocol import trace_protocol
|
||||
from llama_stack.schema_utils import json_schema_type, webmethod
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -10,8 +10,8 @@ from typing import Protocol, runtime_checkable
|
|||
|
||||
from pydantic import BaseModel, Field, field_validator, model_validator
|
||||
|
||||
from llama_stack.apis.common.tracing import trace_protocol
|
||||
from llama_stack.apis.version import LLAMA_STACK_API_V1
|
||||
from llama_stack.core.telemetry.trace_protocol import trace_protocol
|
||||
from llama_stack.schema_utils import json_schema_type, webmethod
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -9,10 +9,10 @@ from typing import Any, Protocol, runtime_checkable
|
|||
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
from llama_stack.apis.common.tracing import trace_protocol
|
||||
from llama_stack.apis.inference import OpenAIMessageParam
|
||||
from llama_stack.apis.shields import Shield
|
||||
from llama_stack.apis.version import LLAMA_STACK_API_V1
|
||||
from llama_stack.core.telemetry.trace_protocol import trace_protocol
|
||||
from llama_stack.schema_utils import json_schema_type, webmethod
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -8,9 +8,9 @@ from typing import Any, Literal, Protocol, runtime_checkable
|
|||
|
||||
from pydantic import BaseModel
|
||||
|
||||
from llama_stack.apis.common.tracing import trace_protocol
|
||||
from llama_stack.apis.resource import Resource, ResourceType
|
||||
from llama_stack.apis.version import LLAMA_STACK_API_V1
|
||||
from llama_stack.core.telemetry.trace_protocol import trace_protocol
|
||||
from llama_stack.schema_utils import json_schema_type, webmethod
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -11,8 +11,8 @@ from pydantic import BaseModel, Field, field_validator
|
|||
from typing_extensions import runtime_checkable
|
||||
|
||||
from llama_stack.apis.common.content_types import URL, InterleavedContent
|
||||
from llama_stack.apis.common.tracing import trace_protocol
|
||||
from llama_stack.apis.version import LLAMA_STACK_API_V1
|
||||
from llama_stack.core.telemetry.trace_protocol import trace_protocol
|
||||
from llama_stack.schema_utils import json_schema_type, register_schema, webmethod
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -11,9 +11,9 @@ from pydantic import BaseModel
|
|||
from typing_extensions import runtime_checkable
|
||||
|
||||
from llama_stack.apis.common.content_types import URL, InterleavedContent
|
||||
from llama_stack.apis.common.tracing import trace_protocol
|
||||
from llama_stack.apis.resource import Resource, ResourceType
|
||||
from llama_stack.apis.version import LLAMA_STACK_API_V1
|
||||
from llama_stack.core.telemetry.trace_protocol import trace_protocol
|
||||
from llama_stack.schema_utils import json_schema_type, webmethod
|
||||
|
||||
from .rag_tool import RAGToolRuntime
|
||||
|
|
|
|||
|
|
@ -13,10 +13,10 @@ from typing import Annotated, Any, Literal, Protocol, runtime_checkable
|
|||
from fastapi import Body
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
from llama_stack.apis.common.tracing import trace_protocol
|
||||
from llama_stack.apis.inference import InterleavedContent
|
||||
from llama_stack.apis.vector_stores import VectorStore
|
||||
from llama_stack.apis.version import LLAMA_STACK_API_V1
|
||||
from llama_stack.core.telemetry.trace_protocol import trace_protocol
|
||||
from llama_stack.schema_utils import json_schema_type, webmethod
|
||||
from llama_stack.strong_typing.schema import register_schema
|
||||
|
||||
|
|
|
|||
|
|
@ -93,4 +93,11 @@ async def get_auto_router_impl(
|
|||
|
||||
impl = api_to_routers[api.value](routing_table, **api_to_dep_impl)
|
||||
await impl.initialize()
|
||||
|
||||
# Apply tracing to router implementation if telemetry is enabled and protocol wants tracing
|
||||
if run_config.telemetry.enabled and getattr(impl.__class__, "__trace_protocol__", False):
|
||||
from llama_stack.core.telemetry.trace_protocol import trace_protocol
|
||||
|
||||
trace_protocol(impl.__class__)
|
||||
|
||||
return impl
|
||||
|
|
|
|||
|
|
@ -163,47 +163,6 @@ class MetricEvent(EventCommon):
|
|||
unit: str
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class MetricInResponse(BaseModel):
|
||||
"""A metric value included in API responses.
|
||||
:param metric: The name of the metric
|
||||
:param value: The numeric value of the metric
|
||||
:param unit: (Optional) The unit of measurement for the metric value
|
||||
"""
|
||||
|
||||
metric: str
|
||||
value: int | float
|
||||
unit: str | None = None
|
||||
|
||||
|
||||
# This is a short term solution to allow inference API to return metrics
|
||||
# The ideal way to do this is to have a way for all response types to include metrics
|
||||
# and all metric events logged to the telemetry API to be included with the response
|
||||
# To do this, we will need to augment all response types with a metrics field.
|
||||
# We have hit a blocker from stainless SDK that prevents us from doing this.
|
||||
# The blocker is that if we were to augment the response types that have a data field
|
||||
# in them like so
|
||||
# class ListModelsResponse(BaseModel):
|
||||
# metrics: Optional[List[MetricEvent]] = None
|
||||
# data: List[Models]
|
||||
# ...
|
||||
# The client SDK will need to access the data by using a .data field, which is not
|
||||
# ergonomic. Stainless SDK does support unwrapping the response type, but it
|
||||
# requires that the response type to only have a single field.
|
||||
|
||||
# We will need a way in the client SDK to signal that the metrics are needed
|
||||
# and if they are needed, the client SDK has to return the full response type
|
||||
# without unwrapping it.
|
||||
|
||||
|
||||
class MetricResponseMixin(BaseModel):
|
||||
"""Mixin class for API responses that can include metrics.
|
||||
:param metrics: (Optional) List of metrics associated with the API response
|
||||
"""
|
||||
|
||||
metrics: list[MetricInResponse] | None = None
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class StructuredLogType(Enum):
|
||||
"""The type of structured log event payload.
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue