chore(telemetry): code cleanup

# What does this PR do?


## Test Plan
# What does this PR do?


## Test Plan
This commit is contained in:
Eric Huang 2025-10-23 15:25:30 -07:00
parent d12e5f0999
commit 0962228c73
36 changed files with 90 additions and 179 deletions

View file

@ -21,7 +21,7 @@ from llama_stack.apis.agents.openai_responses import (
OpenAIResponseOutputMessageWebSearchToolCall, OpenAIResponseOutputMessageWebSearchToolCall,
) )
from llama_stack.apis.version import LLAMA_STACK_API_V1 from llama_stack.apis.version import LLAMA_STACK_API_V1
from llama_stack.providers.utils.telemetry.trace_protocol import trace_protocol from llama_stack.core.telemetry.trace_protocol import trace_protocol
from llama_stack.schema_utils import json_schema_type, register_schema, webmethod from llama_stack.schema_utils import json_schema_type, register_schema, webmethod
Metadata = dict[str, str] Metadata = dict[str, str]

View file

@ -12,7 +12,7 @@ from pydantic import BaseModel, Field
from llama_stack.apis.common.responses import Order from llama_stack.apis.common.responses import Order
from llama_stack.apis.version import LLAMA_STACK_API_V1 from llama_stack.apis.version import LLAMA_STACK_API_V1
from llama_stack.providers.utils.telemetry.trace_protocol import trace_protocol from llama_stack.core.telemetry.trace_protocol import trace_protocol
from llama_stack.schema_utils import json_schema_type, webmethod from llama_stack.schema_utils import json_schema_type, webmethod

View file

@ -23,6 +23,7 @@ from llama_stack.apis.common.responses import Order
from llama_stack.apis.models import Model from llama_stack.apis.models import Model
from llama_stack.apis.telemetry import MetricResponseMixin from llama_stack.apis.telemetry import MetricResponseMixin
from llama_stack.apis.version import LLAMA_STACK_API_V1, LLAMA_STACK_API_V1ALPHA from llama_stack.apis.version import LLAMA_STACK_API_V1, LLAMA_STACK_API_V1ALPHA
from llama_stack.core.telemetry.trace_protocol import trace_protocol
from llama_stack.models.llama.datatypes import ( from llama_stack.models.llama.datatypes import (
BuiltinTool, BuiltinTool,
StopReason, StopReason,
@ -30,7 +31,6 @@ from llama_stack.models.llama.datatypes import (
ToolDefinition, ToolDefinition,
ToolPromptFormat, ToolPromptFormat,
) )
from llama_stack.providers.utils.telemetry.trace_protocol import trace_protocol
from llama_stack.schema_utils import json_schema_type, register_schema, webmethod from llama_stack.schema_utils import json_schema_type, register_schema, webmethod
register_schema(ToolCall) register_schema(ToolCall)

View file

@ -11,7 +11,7 @@ from pydantic import BaseModel, ConfigDict, Field, field_validator
from llama_stack.apis.resource import Resource, ResourceType from llama_stack.apis.resource import Resource, ResourceType
from llama_stack.apis.version import LLAMA_STACK_API_V1 from llama_stack.apis.version import LLAMA_STACK_API_V1
from llama_stack.providers.utils.telemetry.trace_protocol import trace_protocol from llama_stack.core.telemetry.trace_protocol import trace_protocol
from llama_stack.schema_utils import json_schema_type, webmethod from llama_stack.schema_utils import json_schema_type, webmethod

View file

@ -11,7 +11,7 @@ from typing import Protocol, runtime_checkable
from pydantic import BaseModel, Field, field_validator, model_validator from pydantic import BaseModel, Field, field_validator, model_validator
from llama_stack.apis.version import LLAMA_STACK_API_V1 from llama_stack.apis.version import LLAMA_STACK_API_V1
from llama_stack.providers.utils.telemetry.trace_protocol import trace_protocol from llama_stack.core.telemetry.trace_protocol import trace_protocol
from llama_stack.schema_utils import json_schema_type, webmethod from llama_stack.schema_utils import json_schema_type, webmethod

View file

@ -12,7 +12,7 @@ from pydantic import BaseModel, Field
from llama_stack.apis.inference import OpenAIMessageParam from llama_stack.apis.inference import OpenAIMessageParam
from llama_stack.apis.shields import Shield from llama_stack.apis.shields import Shield
from llama_stack.apis.version import LLAMA_STACK_API_V1 from llama_stack.apis.version import LLAMA_STACK_API_V1
from llama_stack.providers.utils.telemetry.trace_protocol import trace_protocol from llama_stack.core.telemetry.trace_protocol import trace_protocol
from llama_stack.schema_utils import json_schema_type, webmethod from llama_stack.schema_utils import json_schema_type, webmethod

View file

@ -10,7 +10,7 @@ from pydantic import BaseModel
from llama_stack.apis.resource import Resource, ResourceType from llama_stack.apis.resource import Resource, ResourceType
from llama_stack.apis.version import LLAMA_STACK_API_V1 from llama_stack.apis.version import LLAMA_STACK_API_V1
from llama_stack.providers.utils.telemetry.trace_protocol import trace_protocol from llama_stack.core.telemetry.trace_protocol import trace_protocol
from llama_stack.schema_utils import json_schema_type, webmethod from llama_stack.schema_utils import json_schema_type, webmethod

View file

@ -12,7 +12,7 @@ from typing_extensions import runtime_checkable
from llama_stack.apis.common.content_types import URL, InterleavedContent from llama_stack.apis.common.content_types import URL, InterleavedContent
from llama_stack.apis.version import LLAMA_STACK_API_V1 from llama_stack.apis.version import LLAMA_STACK_API_V1
from llama_stack.providers.utils.telemetry.trace_protocol import trace_protocol from llama_stack.core.telemetry.trace_protocol import trace_protocol
from llama_stack.schema_utils import json_schema_type, register_schema, webmethod from llama_stack.schema_utils import json_schema_type, register_schema, webmethod

View file

@ -13,7 +13,7 @@ from typing_extensions import runtime_checkable
from llama_stack.apis.common.content_types import URL, InterleavedContent from llama_stack.apis.common.content_types import URL, InterleavedContent
from llama_stack.apis.resource import Resource, ResourceType from llama_stack.apis.resource import Resource, ResourceType
from llama_stack.apis.version import LLAMA_STACK_API_V1 from llama_stack.apis.version import LLAMA_STACK_API_V1
from llama_stack.providers.utils.telemetry.trace_protocol import trace_protocol from llama_stack.core.telemetry.trace_protocol import trace_protocol
from llama_stack.schema_utils import json_schema_type, webmethod from llama_stack.schema_utils import json_schema_type, webmethod
from .rag_tool import RAGToolRuntime from .rag_tool import RAGToolRuntime

View file

@ -17,7 +17,7 @@ from pydantic import BaseModel, Field
from llama_stack.apis.inference import InterleavedContent from llama_stack.apis.inference import InterleavedContent
from llama_stack.apis.vector_stores import VectorStore from llama_stack.apis.vector_stores import VectorStore
from llama_stack.apis.version import LLAMA_STACK_API_V1 from llama_stack.apis.version import LLAMA_STACK_API_V1
from llama_stack.providers.utils.telemetry.trace_protocol import trace_protocol from llama_stack.core.telemetry.trace_protocol import trace_protocol
from llama_stack.providers.utils.vector_io.vector_utils import generate_chunk_id from llama_stack.providers.utils.vector_io.vector_utils import generate_chunk_id
from llama_stack.schema_utils import json_schema_type, webmethod from llama_stack.schema_utils import json_schema_type, webmethod
from llama_stack.strong_typing.schema import register_schema from llama_stack.strong_typing.schema import register_schema

View file

@ -15,10 +15,10 @@ import yaml
from llama_stack.cli.stack.utils import ImageType from llama_stack.cli.stack.utils import ImageType
from llama_stack.cli.subcommand import Subcommand from llama_stack.cli.subcommand import Subcommand
from llama_stack.core.datatypes import LoggingConfig, StackRunConfig from llama_stack.core.datatypes import StackRunConfig
from llama_stack.core.stack import cast_image_name_to_string, replace_env_vars from llama_stack.core.stack import cast_image_name_to_string, replace_env_vars
from llama_stack.core.utils.config_resolution import Mode, resolve_config_or_distro from llama_stack.core.utils.config_resolution import Mode, resolve_config_or_distro
from llama_stack.log import get_logger from llama_stack.log import LoggingConfig, get_logger
REPO_ROOT = Path(__file__).parent.parent.parent.parent REPO_ROOT = Path(__file__).parent.parent.parent.parent

View file

@ -31,6 +31,7 @@ from llama_stack.core.storage.datatypes import (
StorageBackendType, StorageBackendType,
StorageConfig, StorageConfig,
) )
from llama_stack.log import LoggingConfig
from llama_stack.providers.datatypes import Api, ProviderSpec from llama_stack.providers.datatypes import Api, ProviderSpec
LLAMA_STACK_BUILD_CONFIG_VERSION = 2 LLAMA_STACK_BUILD_CONFIG_VERSION = 2
@ -195,14 +196,6 @@ class TelemetryConfig(BaseModel):
enabled: bool = Field(default=False, description="enable or disable telemetry") enabled: bool = Field(default=False, description="enable or disable telemetry")
class LoggingConfig(BaseModel):
category_levels: dict[str, str] = Field(
default_factory=dict,
description="""
Dictionary of different logging configurations for different portions (ex: core, server) of llama stack""",
)
class OAuth2JWKSConfig(BaseModel): class OAuth2JWKSConfig(BaseModel):
# The JWKS URI for collecting public keys # The JWKS URI for collecting public keys
uri: str uri: str

View file

@ -25,7 +25,7 @@ from llama_stack.providers.datatypes import (
logger = get_logger(name=__name__, category="core") logger = get_logger(name=__name__, category="core")
INTERNAL_APIS = {Api.inspect, Api.providers, Api.prompts, Api.conversations, Api.telemetry} INTERNAL_APIS = {Api.inspect, Api.providers, Api.prompts, Api.conversations}
def stack_apis() -> list[Api]: def stack_apis() -> list[Api]:

View file

@ -44,11 +44,12 @@ from llama_stack.core.stack import (
get_stack_run_config_from_distro, get_stack_run_config_from_distro,
replace_env_vars, replace_env_vars,
) )
from llama_stack.core.telemetry import Telemetry
from llama_stack.core.telemetry.tracing import CURRENT_TRACE_CONTEXT, end_trace, setup_logger, start_trace
from llama_stack.core.utils.config import redact_sensitive_fields from llama_stack.core.utils.config import redact_sensitive_fields
from llama_stack.core.utils.context import preserve_contexts_async_generator from llama_stack.core.utils.context import preserve_contexts_async_generator
from llama_stack.core.utils.exec import in_notebook from llama_stack.core.utils.exec import in_notebook
from llama_stack.log import get_logger, setup_logging from llama_stack.log import get_logger, setup_logging
from llama_stack.providers.utils.telemetry.tracing import CURRENT_TRACE_CONTEXT, end_trace, setup_logger, start_trace
from llama_stack.strong_typing.inspection import is_unwrapped_body_param from llama_stack.strong_typing.inspection import is_unwrapped_body_param
logger = get_logger(name=__name__, category="core") logger = get_logger(name=__name__, category="core")
@ -293,8 +294,8 @@ class AsyncLlamaStackAsLibraryClient(AsyncLlamaStackClient):
raise _e raise _e
assert self.impls is not None assert self.impls is not None
if Api.telemetry in self.impls: if self.config.telemetry.enabled:
setup_logger(self.impls[Api.telemetry]) setup_logger(Telemetry())
if not os.environ.get("PYTEST_CURRENT_TEST"): if not os.environ.get("PYTEST_CURRENT_TEST"):
console = Console() console = Console()

View file

@ -27,7 +27,6 @@ from llama_stack.apis.safety import Safety
from llama_stack.apis.scoring import Scoring from llama_stack.apis.scoring import Scoring
from llama_stack.apis.scoring_functions import ScoringFunctions from llama_stack.apis.scoring_functions import ScoringFunctions
from llama_stack.apis.shields import Shields from llama_stack.apis.shields import Shields
from llama_stack.apis.telemetry import Telemetry
from llama_stack.apis.tools import ToolGroups, ToolRuntime from llama_stack.apis.tools import ToolGroups, ToolRuntime
from llama_stack.apis.vector_io import VectorIO from llama_stack.apis.vector_io import VectorIO
from llama_stack.apis.vector_stores import VectorStore from llama_stack.apis.vector_stores import VectorStore
@ -98,7 +97,7 @@ def api_protocol_map(external_apis: dict[Api, ExternalApiSpec] | None = None) ->
Api.files: Files, Api.files: Files,
Api.prompts: Prompts, Api.prompts: Prompts,
Api.conversations: Conversations, Api.conversations: Conversations,
Api.telemetry: Telemetry, # Api.telemetry: Telemetry,
} }
if external_apis: if external_apis:
@ -241,24 +240,6 @@ def validate_and_prepare_providers(
key = api_str if api not in router_apis else f"inner-{api_str}" key = api_str if api not in router_apis else f"inner-{api_str}"
providers_with_specs[key] = specs providers_with_specs[key] = specs
# TODO: remove this logic, telemetry should not have providers.
# if telemetry has been enabled in the config initialize our internal impl
# telemetry is not an external API so it SHOULD NOT be auto-routed.
if run_config.telemetry.enabled:
specs = {}
p = InlineProviderSpec(
api=Api.telemetry,
provider_type="inline::meta-reference",
pip_packages=[],
optional_api_dependencies=[Api.datasetio],
module="llama_stack.providers.inline.telemetry.meta_reference",
config_class="llama_stack.providers.inline.telemetry.meta_reference.config.TelemetryConfig",
description="Meta's reference implementation of telemetry and observability using OpenTelemetry.",
)
spec = ProviderWithSpec(spec=p, provider_type="inline::meta-reference", provider_id="meta-reference")
specs["meta-reference"] = spec
providers_with_specs["telemetry"] = specs
return providers_with_specs return providers_with_specs

View file

@ -72,14 +72,6 @@ async def get_auto_router_impl(
raise ValueError(f"API {api.value} not found in router map") raise ValueError(f"API {api.value} not found in router map")
api_to_dep_impl = {} api_to_dep_impl = {}
if run_config.telemetry.enabled:
api_to_deps = {
"inference": {"telemetry": Api.telemetry},
}
for dep_name, dep_api in api_to_deps.get(api.value, {}).items():
if dep_api in deps:
api_to_dep_impl[dep_name] = deps[dep_api]
# TODO: move pass configs to routers instead # TODO: move pass configs to routers instead
if api == Api.inference: if api == Api.inference:
inference_ref = run_config.storage.stores.inference inference_ref = run_config.storage.stores.inference
@ -92,6 +84,7 @@ async def get_auto_router_impl(
) )
await inference_store.initialize() await inference_store.initialize()
api_to_dep_impl["store"] = inference_store api_to_dep_impl["store"] = inference_store
api_to_dep_impl["telemetry_enabled"] = run_config.telemetry.enabled
elif api == Api.vector_io: elif api == Api.vector_io:
api_to_dep_impl["vector_stores_config"] = run_config.vector_stores api_to_dep_impl["vector_stores_config"] = run_config.vector_stores

View file

@ -53,13 +53,13 @@ from llama_stack.apis.inference.inference import (
OpenAIChatCompletionContentPartTextParam, OpenAIChatCompletionContentPartTextParam,
) )
from llama_stack.apis.models import Model, ModelType from llama_stack.apis.models import Model, ModelType
from llama_stack.apis.telemetry import MetricEvent, MetricInResponse, Telemetry from llama_stack.apis.telemetry import MetricEvent, MetricInResponse
from llama_stack.core.telemetry.tracing import enqueue_event, get_current_span
from llama_stack.log import get_logger from llama_stack.log import get_logger
from llama_stack.models.llama.llama3.chat_format import ChatFormat from llama_stack.models.llama.llama3.chat_format import ChatFormat
from llama_stack.models.llama.llama3.tokenizer import Tokenizer from llama_stack.models.llama.llama3.tokenizer import Tokenizer
from llama_stack.providers.datatypes import HealthResponse, HealthStatus, RoutingTable from llama_stack.providers.datatypes import HealthResponse, HealthStatus, RoutingTable
from llama_stack.providers.utils.inference.inference_store import InferenceStore from llama_stack.providers.utils.inference.inference_store import InferenceStore
from llama_stack.providers.utils.telemetry.tracing import enqueue_event, get_current_span
logger = get_logger(name=__name__, category="core::routers") logger = get_logger(name=__name__, category="core::routers")
@ -70,14 +70,14 @@ class InferenceRouter(Inference):
def __init__( def __init__(
self, self,
routing_table: RoutingTable, routing_table: RoutingTable,
telemetry: Telemetry | None = None,
store: InferenceStore | None = None, store: InferenceStore | None = None,
telemetry_enabled: bool = False,
) -> None: ) -> None:
logger.debug("Initializing InferenceRouter") logger.debug("Initializing InferenceRouter")
self.routing_table = routing_table self.routing_table = routing_table
self.telemetry = telemetry self.telemetry_enabled = telemetry_enabled
self.store = store self.store = store
if self.telemetry: if self.telemetry_enabled:
self.tokenizer = Tokenizer.get_instance() self.tokenizer = Tokenizer.get_instance()
self.formatter = ChatFormat(self.tokenizer) self.formatter = ChatFormat(self.tokenizer)
@ -159,7 +159,7 @@ class InferenceRouter(Inference):
model: Model, model: Model,
) -> list[MetricInResponse]: ) -> list[MetricInResponse]:
metrics = self._construct_metrics(prompt_tokens, completion_tokens, total_tokens, model) metrics = self._construct_metrics(prompt_tokens, completion_tokens, total_tokens, model)
if self.telemetry: if self.telemetry_enabled:
for metric in metrics: for metric in metrics:
enqueue_event(metric) enqueue_event(metric)
return [MetricInResponse(metric=metric.metric, value=metric.value) for metric in metrics] return [MetricInResponse(metric=metric.metric, value=metric.value) for metric in metrics]
@ -223,7 +223,7 @@ class InferenceRouter(Inference):
# that we do not return an AsyncIterator, our tests expect a stream of chunks we cannot intercept currently. # that we do not return an AsyncIterator, our tests expect a stream of chunks we cannot intercept currently.
response = await provider.openai_completion(params) response = await provider.openai_completion(params)
if self.telemetry: if self.telemetry_enabled:
metrics = self._construct_metrics( metrics = self._construct_metrics(
prompt_tokens=response.usage.prompt_tokens, prompt_tokens=response.usage.prompt_tokens,
completion_tokens=response.usage.completion_tokens, completion_tokens=response.usage.completion_tokens,
@ -285,7 +285,7 @@ class InferenceRouter(Inference):
if self.store: if self.store:
asyncio.create_task(self.store.store_chat_completion(response, params.messages)) asyncio.create_task(self.store.store_chat_completion(response, params.messages))
if self.telemetry: if self.telemetry_enabled:
metrics = self._construct_metrics( metrics = self._construct_metrics(
prompt_tokens=response.usage.prompt_tokens, prompt_tokens=response.usage.prompt_tokens,
completion_tokens=response.usage.completion_tokens, completion_tokens=response.usage.completion_tokens,
@ -393,7 +393,7 @@ class InferenceRouter(Inference):
else: else:
if hasattr(chunk, "delta"): if hasattr(chunk, "delta"):
completion_text += chunk.delta completion_text += chunk.delta
if hasattr(chunk, "stop_reason") and chunk.stop_reason and self.telemetry: if hasattr(chunk, "stop_reason") and chunk.stop_reason and self.telemetry_enabled:
complete = True complete = True
completion_tokens = await self._count_tokens(completion_text) completion_tokens = await self._count_tokens(completion_text)
# if we are done receiving tokens # if we are done receiving tokens
@ -401,7 +401,7 @@ class InferenceRouter(Inference):
total_tokens = (prompt_tokens or 0) + (completion_tokens or 0) total_tokens = (prompt_tokens or 0) + (completion_tokens or 0)
# Create a separate span for streaming completion metrics # Create a separate span for streaming completion metrics
if self.telemetry: if self.telemetry_enabled:
# Log metrics in the new span context # Log metrics in the new span context
completion_metrics = self._construct_metrics( completion_metrics = self._construct_metrics(
prompt_tokens=prompt_tokens, prompt_tokens=prompt_tokens,
@ -450,7 +450,7 @@ class InferenceRouter(Inference):
total_tokens = (prompt_tokens or 0) + (completion_tokens or 0) total_tokens = (prompt_tokens or 0) + (completion_tokens or 0)
# Create a separate span for completion metrics # Create a separate span for completion metrics
if self.telemetry: if self.telemetry_enabled:
# Log metrics in the new span context # Log metrics in the new span context
completion_metrics = self._construct_metrics( completion_metrics = self._construct_metrics(
prompt_tokens=prompt_tokens, prompt_tokens=prompt_tokens,
@ -548,7 +548,7 @@ class InferenceRouter(Inference):
completion_text += "".join(choice_data["content_parts"]) completion_text += "".join(choice_data["content_parts"])
# Add metrics to the chunk # Add metrics to the chunk
if self.telemetry and hasattr(chunk, "usage") and chunk.usage: if self.telemetry_enabled and hasattr(chunk, "usage") and chunk.usage:
metrics = self._construct_metrics( metrics = self._construct_metrics(
prompt_tokens=chunk.usage.prompt_tokens, prompt_tokens=chunk.usage.prompt_tokens,
completion_tokens=chunk.usage.completion_tokens, completion_tokens=chunk.usage.completion_tokens,

View file

@ -36,7 +36,6 @@ from llama_stack.apis.common.responses import PaginatedResponse
from llama_stack.core.access_control.access_control import AccessDeniedError from llama_stack.core.access_control.access_control import AccessDeniedError
from llama_stack.core.datatypes import ( from llama_stack.core.datatypes import (
AuthenticationRequiredError, AuthenticationRequiredError,
LoggingConfig,
StackRunConfig, StackRunConfig,
process_cors_config, process_cors_config,
) )
@ -53,19 +52,13 @@ from llama_stack.core.stack import (
cast_image_name_to_string, cast_image_name_to_string,
replace_env_vars, replace_env_vars,
) )
from llama_stack.core.telemetry import Telemetry
from llama_stack.core.telemetry.tracing import CURRENT_TRACE_CONTEXT, setup_logger
from llama_stack.core.utils.config import redact_sensitive_fields from llama_stack.core.utils.config import redact_sensitive_fields
from llama_stack.core.utils.config_resolution import Mode, resolve_config_or_distro from llama_stack.core.utils.config_resolution import Mode, resolve_config_or_distro
from llama_stack.core.utils.context import preserve_contexts_async_generator from llama_stack.core.utils.context import preserve_contexts_async_generator
from llama_stack.log import get_logger, setup_logging from llama_stack.log import LoggingConfig, get_logger, setup_logging
from llama_stack.providers.datatypes import Api from llama_stack.providers.datatypes import Api
from llama_stack.providers.inline.telemetry.meta_reference.config import TelemetryConfig
from llama_stack.providers.inline.telemetry.meta_reference.telemetry import (
TelemetryAdapter,
)
from llama_stack.providers.utils.telemetry.tracing import (
CURRENT_TRACE_CONTEXT,
setup_logger,
)
from .auth import AuthenticationMiddleware from .auth import AuthenticationMiddleware
from .quota import QuotaMiddleware from .quota import QuotaMiddleware
@ -451,9 +444,7 @@ def create_app() -> StackApp:
app.add_middleware(CORSMiddleware, **cors_config.model_dump()) app.add_middleware(CORSMiddleware, **cors_config.model_dump())
if config.telemetry.enabled: if config.telemetry.enabled:
setup_logger(impls[Api.telemetry]) setup_logger(Telemetry())
else:
setup_logger(TelemetryAdapter(TelemetryConfig(), {}))
# Load external APIs if configured # Load external APIs if configured
external_apis = load_external_apis(config) external_apis = load_external_apis(config)

View file

@ -7,8 +7,8 @@ from aiohttp import hdrs
from llama_stack.core.external import ExternalApiSpec from llama_stack.core.external import ExternalApiSpec
from llama_stack.core.server.routes import find_matching_route, initialize_route_impls from llama_stack.core.server.routes import find_matching_route, initialize_route_impls
from llama_stack.core.telemetry.tracing import end_trace, start_trace
from llama_stack.log import get_logger from llama_stack.log import get_logger
from llama_stack.providers.utils.telemetry.tracing import end_trace, start_trace
logger = get_logger(name=__name__, category="core::server") logger = get_logger(name=__name__, category="core::server")

View file

@ -0,0 +1,26 @@
from .telemetry import Telemetry
from .trace_protocol import serialize_value, trace_protocol
from .tracing import (
CURRENT_TRACE_CONTEXT,
ROOT_SPAN_MARKERS,
end_trace,
enqueue_event,
get_current_span,
setup_logger,
span,
start_trace,
)
__all__ = [
"Telemetry",
"trace_protocol",
"serialize_value",
"CURRENT_TRACE_CONTEXT",
"ROOT_SPAN_MARKERS",
"end_trace",
"enqueue_event",
"get_current_span",
"setup_logger",
"span",
"start_trace",
]

View file

@ -24,14 +24,13 @@ from llama_stack.apis.telemetry import (
SpanStartPayload, SpanStartPayload,
SpanStatus, SpanStatus,
StructuredLogEvent, StructuredLogEvent,
Telemetry,
UnstructuredLogEvent, UnstructuredLogEvent,
) )
from llama_stack.core.datatypes import Api from llama_stack.apis.telemetry import (
Telemetry as TelemetryBase,
)
from llama_stack.core.telemetry.tracing import ROOT_SPAN_MARKERS
from llama_stack.log import get_logger from llama_stack.log import get_logger
from llama_stack.providers.utils.telemetry.tracing import ROOT_SPAN_MARKERS
from .config import TelemetryConfig
_GLOBAL_STORAGE: dict[str, dict[str | int, Any]] = { _GLOBAL_STORAGE: dict[str, dict[str | int, Any]] = {
"active_spans": {}, "active_spans": {},
@ -50,9 +49,8 @@ def is_tracing_enabled(tracer):
return span.is_recording() return span.is_recording()
class TelemetryAdapter(Telemetry): class Telemetry(TelemetryBase):
def __init__(self, _config: TelemetryConfig, deps: dict[Api, Any]) -> None: def __init__(self) -> None:
self.datasetio_api = deps.get(Api.datasetio)
self.meter = None self.meter = None
global _TRACER_PROVIDER global _TRACER_PROVIDER

View file

@ -77,7 +77,7 @@ def trace_protocol[T](cls: type[T]) -> type[T]:
@wraps(method) @wraps(method)
async def async_gen_wrapper(self: Any, *args: Any, **kwargs: Any) -> AsyncGenerator: async def async_gen_wrapper(self: Any, *args: Any, **kwargs: Any) -> AsyncGenerator:
from llama_stack.providers.utils.telemetry import tracing from llama_stack.core.telemetry import tracing
class_name, method_name, span_attributes = create_span_context(self, *args, **kwargs) class_name, method_name, span_attributes = create_span_context(self, *args, **kwargs)
@ -92,7 +92,7 @@ def trace_protocol[T](cls: type[T]) -> type[T]:
@wraps(method) @wraps(method)
async def async_wrapper(self: Any, *args: Any, **kwargs: Any) -> Any: async def async_wrapper(self: Any, *args: Any, **kwargs: Any) -> Any:
from llama_stack.providers.utils.telemetry import tracing from llama_stack.core.telemetry import tracing
class_name, method_name, span_attributes = create_span_context(self, *args, **kwargs) class_name, method_name, span_attributes = create_span_context(self, *args, **kwargs)
@ -107,7 +107,7 @@ def trace_protocol[T](cls: type[T]) -> type[T]:
@wraps(method) @wraps(method)
def sync_wrapper(self: Any, *args: Any, **kwargs: Any) -> Any: def sync_wrapper(self: Any, *args: Any, **kwargs: Any) -> Any:
from llama_stack.providers.utils.telemetry import tracing from llama_stack.core.telemetry import tracing
class_name, method_name, span_attributes = create_span_context(self, *args, **kwargs) class_name, method_name, span_attributes = create_span_context(self, *args, **kwargs)

View file

@ -28,8 +28,8 @@ from llama_stack.apis.telemetry import (
Telemetry, Telemetry,
UnstructuredLogEvent, UnstructuredLogEvent,
) )
from llama_stack.core.telemetry.trace_protocol import serialize_value
from llama_stack.log import get_logger from llama_stack.log import get_logger
from llama_stack.providers.utils.telemetry.trace_protocol import serialize_value
logger = get_logger(__name__, category="core") logger = get_logger(__name__, category="core")

View file

@ -9,15 +9,23 @@ import os
import re import re
from logging.config import dictConfig # allow-direct-logging from logging.config import dictConfig # allow-direct-logging
from pydantic import BaseModel, Field
from rich.console import Console from rich.console import Console
from rich.errors import MarkupError from rich.errors import MarkupError
from rich.logging import RichHandler from rich.logging import RichHandler
from llama_stack.core.datatypes import LoggingConfig
# Default log level # Default log level
DEFAULT_LOG_LEVEL = logging.INFO DEFAULT_LOG_LEVEL = logging.INFO
class LoggingConfig(BaseModel):
category_levels: dict[str, str] = Field(
default_factory=dict,
description="""
Dictionary of different logging configurations for different portions (ex: core, server) of llama stack""",
)
# Predefined categories # Predefined categories
CATEGORIES = [ CATEGORIES = [
"core", "core",

View file

@ -67,6 +67,7 @@ from llama_stack.apis.safety import Safety
from llama_stack.apis.tools import ToolGroups, ToolInvocationResult, ToolRuntime from llama_stack.apis.tools import ToolGroups, ToolInvocationResult, ToolRuntime
from llama_stack.apis.vector_io import VectorIO from llama_stack.apis.vector_io import VectorIO
from llama_stack.core.datatypes import AccessRule from llama_stack.core.datatypes import AccessRule
from llama_stack.core.telemetry import tracing
from llama_stack.log import get_logger from llama_stack.log import get_logger
from llama_stack.models.llama.datatypes import ( from llama_stack.models.llama.datatypes import (
BuiltinTool, BuiltinTool,
@ -78,7 +79,6 @@ from llama_stack.providers.utils.inference.openai_compat import (
convert_tooldef_to_openai_tool, convert_tooldef_to_openai_tool,
) )
from llama_stack.providers.utils.kvstore import KVStore from llama_stack.providers.utils.kvstore import KVStore
from llama_stack.providers.utils.telemetry import tracing
from .persistence import AgentPersistence from .persistence import AgentPersistence
from .safety import SafetyException, ShieldRunnerMixin from .safety import SafetyException, ShieldRunnerMixin

View file

@ -65,9 +65,9 @@ from llama_stack.apis.inference import (
OpenAIChoice, OpenAIChoice,
OpenAIMessageParam, OpenAIMessageParam,
) )
from llama_stack.core.telemetry import tracing
from llama_stack.log import get_logger from llama_stack.log import get_logger
from llama_stack.providers.utils.inference.prompt_adapter import interleaved_content_as_str from llama_stack.providers.utils.inference.prompt_adapter import interleaved_content_as_str
from llama_stack.providers.utils.telemetry import tracing
from .types import ChatCompletionContext, ChatCompletionResult from .types import ChatCompletionContext, ChatCompletionResult
from .utils import ( from .utils import (

View file

@ -37,8 +37,8 @@ from llama_stack.apis.inference import (
) )
from llama_stack.apis.tools import ToolGroups, ToolInvocationResult, ToolRuntime from llama_stack.apis.tools import ToolGroups, ToolInvocationResult, ToolRuntime
from llama_stack.apis.vector_io import VectorIO from llama_stack.apis.vector_io import VectorIO
from llama_stack.core.telemetry import tracing
from llama_stack.log import get_logger from llama_stack.log import get_logger
from llama_stack.providers.utils.telemetry import tracing
from .types import ChatCompletionContext, ToolExecutionResult from .types import ChatCompletionContext, ToolExecutionResult

View file

@ -8,8 +8,8 @@ import asyncio
from llama_stack.apis.inference import Message from llama_stack.apis.inference import Message
from llama_stack.apis.safety import Safety, SafetyViolation, ViolationLevel from llama_stack.apis.safety import Safety, SafetyViolation, ViolationLevel
from llama_stack.core.telemetry import tracing
from llama_stack.log import get_logger from llama_stack.log import get_logger
from llama_stack.providers.utils.telemetry import tracing
log = get_logger(name=__name__, category="agents::meta_reference") log = get_logger(name=__name__, category="agents::meta_reference")

View file

@ -1,5 +0,0 @@
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.

View file

@ -1,21 +0,0 @@
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.
from typing import Any
from llama_stack.core.datatypes import Api
from .config import TelemetryConfig, TelemetrySink
__all__ = ["TelemetryConfig", "TelemetrySink"]
async def get_provider_impl(config: TelemetryConfig, deps: dict[Api, Any]):
from .telemetry import TelemetryAdapter
impl = TelemetryAdapter(config, deps)
await impl.initialize()
return impl

View file

@ -1,47 +0,0 @@
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.
from enum import StrEnum
from typing import Any
from pydantic import BaseModel, Field, field_validator
class TelemetrySink(StrEnum):
OTEL_TRACE = "otel_trace"
OTEL_METRIC = "otel_metric"
CONSOLE = "console"
class TelemetryConfig(BaseModel):
otel_exporter_otlp_endpoint: str | None = Field(
default=None,
description="The OpenTelemetry collector endpoint URL (base URL for traces, metrics, and logs). If not set, the SDK will use OTEL_EXPORTER_OTLP_ENDPOINT environment variable.",
)
service_name: str = Field(
# service name is always the same, use zero-width space to avoid clutter
default="\u200b",
description="The service name to use for telemetry",
)
sinks: list[TelemetrySink] = Field(
default_factory=list,
description="List of telemetry sinks to enable (possible values: otel_trace, otel_metric, console)",
)
@field_validator("sinks", mode="before")
@classmethod
def validate_sinks(cls, v):
if isinstance(v, str):
return [TelemetrySink(sink.strip()) for sink in v.split(",")]
return v or []
@classmethod
def sample_run_config(cls, __distro_dir__: str) -> dict[str, Any]:
return {
"service_name": "${env.OTEL_SERVICE_NAME:=\u200b}",
"sinks": "${env.TELEMETRY_SINKS:=}",
"otel_exporter_otlp_endpoint": "${env.OTEL_EXPORTER_OTLP_ENDPOINT:=}",
}

View file

@ -22,11 +22,11 @@ from llama_stack.apis.inference.inference import (
) )
from llama_stack.apis.models import Model from llama_stack.apis.models import Model
from llama_stack.apis.models.models import ModelType from llama_stack.apis.models.models import ModelType
from llama_stack.core.telemetry.tracing import get_current_span
from llama_stack.log import get_logger from llama_stack.log import get_logger
from llama_stack.providers.remote.inference.watsonx.config import WatsonXConfig from llama_stack.providers.remote.inference.watsonx.config import WatsonXConfig
from llama_stack.providers.utils.inference.litellm_openai_mixin import LiteLLMOpenAIMixin from llama_stack.providers.utils.inference.litellm_openai_mixin import LiteLLMOpenAIMixin
from llama_stack.providers.utils.inference.openai_compat import prepare_openai_completion_params from llama_stack.providers.utils.inference.openai_compat import prepare_openai_completion_params
from llama_stack.providers.utils.telemetry.tracing import get_current_span
logger = get_logger(name=__name__, category="providers::remote::watsonx") logger = get_logger(name=__name__, category="providers::remote::watsonx")

View file

@ -256,7 +256,7 @@ class LiteLLMOpenAIMixin(
params: OpenAIChatCompletionRequestWithExtraBody, params: OpenAIChatCompletionRequestWithExtraBody,
) -> OpenAIChatCompletion | AsyncIterator[OpenAIChatCompletionChunk]: ) -> OpenAIChatCompletion | AsyncIterator[OpenAIChatCompletionChunk]:
# Add usage tracking for streaming when telemetry is active # Add usage tracking for streaming when telemetry is active
from llama_stack.providers.utils.telemetry.tracing import get_current_span from llama_stack.core.telemetry.tracing import get_current_span
stream_options = params.stream_options stream_options = params.stream_options
if params.stream and get_current_span() is not None: if params.stream and get_current_span() is not None:

View file

@ -1,5 +0,0 @@
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.

View file

@ -23,7 +23,7 @@ from opentelemetry.sdk.trace import ReadableSpan, TracerProvider
from opentelemetry.sdk.trace.export import SimpleSpanProcessor from opentelemetry.sdk.trace.export import SimpleSpanProcessor
from opentelemetry.sdk.trace.export.in_memory_span_exporter import InMemorySpanExporter from opentelemetry.sdk.trace.export.in_memory_span_exporter import InMemorySpanExporter
import llama_stack.providers.inline.telemetry.meta_reference.telemetry as telemetry_module import llama_stack.core.telemetry.telemetry as telemetry_module
from llama_stack.testing.api_recorder import patch_httpx_for_test_id from llama_stack.testing.api_recorder import patch_httpx_for_test_id
from tests.integration.fixtures.common import instantiate_llama_stack_client from tests.integration.fixtures.common import instantiate_llama_stack_client

View file

@ -196,8 +196,6 @@ class TestProviderRegistry:
assert internal_api not in apis, f"Internal API {internal_api} should not be in providable_apis" assert internal_api not in apis, f"Internal API {internal_api} should not be in providable_apis"
for api in apis: for api in apis:
if api == Api.telemetry:
continue
module_name = f"llama_stack.providers.registry.{api.name.lower()}" module_name = f"llama_stack.providers.registry.{api.name.lower()}"
try: try:
importlib.import_module(module_name) importlib.import_module(module_name)