mirror of
				https://github.com/meta-llama/llama-stack.git
				synced 2025-10-25 01:01:13 +00:00 
			
		
		
		
	chore(telemetry): code cleanup (#3897)
	
		
			
	
		
	
	
		
	
		
			Some checks failed
		
		
	
	
		
			
				
	
				SqlStore Integration Tests / test-postgres (3.12) (push) Failing after 0s
				
			
		
			
				
	
				Integration Auth Tests / test-matrix (oauth2_token) (push) Failing after 1s
				
			
		
			
				
	
				Test External Providers Installed via Module / test-external-providers-from-module (venv) (push) Has been skipped
				
			
		
			
				
	
				Integration Tests (Replay) / Integration Tests (, , , client=, ) (push) Failing after 3s
				
			
		
			
				
	
				Python Package Build Test / build (3.12) (push) Failing after 2s
				
			
		
			
				
	
				SqlStore Integration Tests / test-postgres (3.13) (push) Failing after 4s
				
			
		
			
				
	
				Python Package Build Test / build (3.13) (push) Failing after 3s
				
			
		
			
				
	
				Test External API and Providers / test-external (venv) (push) Failing after 4s
				
			
		
			
				
	
				Vector IO Integration Tests / test-matrix (push) Failing after 6s
				
			
		
			
				
	
				Unit Tests / unit-tests (3.12) (push) Failing after 4s
				
			
		
			
				
	
				Unit Tests / unit-tests (3.13) (push) Failing after 4s
				
			
		
			
				
	
				API Conformance Tests / check-schema-compatibility (push) Successful in 14s
				
			
		
			
				
	
				UI Tests / ui-tests (22) (push) Successful in 43s
				
			
		
			
				
	
				Pre-commit / pre-commit (push) Successful in 1m35s
				
			
		
		
	
	
				
					
				
			
		
			Some checks failed
		
		
	
	SqlStore Integration Tests / test-postgres (3.12) (push) Failing after 0s
				
			Integration Auth Tests / test-matrix (oauth2_token) (push) Failing after 1s
				
			Test External Providers Installed via Module / test-external-providers-from-module (venv) (push) Has been skipped
				
			Integration Tests (Replay) / Integration Tests (, , , client=, ) (push) Failing after 3s
				
			Python Package Build Test / build (3.12) (push) Failing after 2s
				
			SqlStore Integration Tests / test-postgres (3.13) (push) Failing after 4s
				
			Python Package Build Test / build (3.13) (push) Failing after 3s
				
			Test External API and Providers / test-external (venv) (push) Failing after 4s
				
			Vector IO Integration Tests / test-matrix (push) Failing after 6s
				
			Unit Tests / unit-tests (3.12) (push) Failing after 4s
				
			Unit Tests / unit-tests (3.13) (push) Failing after 4s
				
			API Conformance Tests / check-schema-compatibility (push) Successful in 14s
				
			UI Tests / ui-tests (22) (push) Successful in 43s
				
			Pre-commit / pre-commit (push) Successful in 1m35s
				
			# What does this PR do? Clean up telemetry code since the telemetry API has been remove. - moved telemetry files out of providers to core - removed from Api ## Test Plan ❯ OTEL_SERVICE_NAME=llama_stack OTEL_EXPORTER_OTLP_ENDPOINT=http://localhost:4318 uv run llama stack run starter ❯ curl http://localhost:8321/v1/chat/completions \ -H "Content-Type: application/json" \ -d '{ "model": "openai/gpt-4o-mini", "messages": [ { "role": "user", "content": "Hello!" } ] }' -> verify traces in Grafana CI
This commit is contained in:
		
							parent
							
								
									9916cb3b17
								
							
						
					
					
						commit
						8265d4efc8
					
				
					 37 changed files with 148 additions and 227 deletions
				
			
		|  | @ -21,7 +21,7 @@ from llama_stack.apis.agents.openai_responses import ( | |||
|     OpenAIResponseOutputMessageWebSearchToolCall, | ||||
| ) | ||||
| from llama_stack.apis.version import LLAMA_STACK_API_V1 | ||||
| from llama_stack.providers.utils.telemetry.trace_protocol import trace_protocol | ||||
| from llama_stack.core.telemetry.trace_protocol import trace_protocol | ||||
| from llama_stack.schema_utils import json_schema_type, register_schema, webmethod | ||||
| 
 | ||||
| Metadata = dict[str, str] | ||||
|  |  | |||
|  | @ -117,8 +117,6 @@ class Api(Enum, metaclass=DynamicApiMeta): | |||
|     post_training = "post_training" | ||||
|     tool_runtime = "tool_runtime" | ||||
| 
 | ||||
|     telemetry = "telemetry" | ||||
| 
 | ||||
|     models = "models" | ||||
|     shields = "shields" | ||||
|     vector_stores = "vector_stores"  # only used for routing table | ||||
|  |  | |||
|  | @ -12,7 +12,7 @@ from pydantic import BaseModel, Field | |||
| 
 | ||||
| from llama_stack.apis.common.responses import Order | ||||
| from llama_stack.apis.version import LLAMA_STACK_API_V1 | ||||
| from llama_stack.providers.utils.telemetry.trace_protocol import trace_protocol | ||||
| from llama_stack.core.telemetry.trace_protocol import trace_protocol | ||||
| from llama_stack.schema_utils import json_schema_type, webmethod | ||||
| 
 | ||||
| 
 | ||||
|  |  | |||
|  | @ -23,6 +23,7 @@ from llama_stack.apis.common.responses import Order | |||
| from llama_stack.apis.models import Model | ||||
| from llama_stack.apis.telemetry import MetricResponseMixin | ||||
| from llama_stack.apis.version import LLAMA_STACK_API_V1, LLAMA_STACK_API_V1ALPHA | ||||
| from llama_stack.core.telemetry.trace_protocol import trace_protocol | ||||
| from llama_stack.models.llama.datatypes import ( | ||||
|     BuiltinTool, | ||||
|     StopReason, | ||||
|  | @ -30,7 +31,6 @@ from llama_stack.models.llama.datatypes import ( | |||
|     ToolDefinition, | ||||
|     ToolPromptFormat, | ||||
| ) | ||||
| from llama_stack.providers.utils.telemetry.trace_protocol import trace_protocol | ||||
| from llama_stack.schema_utils import json_schema_type, register_schema, webmethod | ||||
| 
 | ||||
| register_schema(ToolCall) | ||||
|  |  | |||
|  | @ -11,7 +11,7 @@ from pydantic import BaseModel, ConfigDict, Field, field_validator | |||
| 
 | ||||
| from llama_stack.apis.resource import Resource, ResourceType | ||||
| from llama_stack.apis.version import LLAMA_STACK_API_V1 | ||||
| from llama_stack.providers.utils.telemetry.trace_protocol import trace_protocol | ||||
| from llama_stack.core.telemetry.trace_protocol import trace_protocol | ||||
| from llama_stack.schema_utils import json_schema_type, webmethod | ||||
| 
 | ||||
| 
 | ||||
|  |  | |||
|  | @ -11,7 +11,7 @@ from typing import Protocol, runtime_checkable | |||
| from pydantic import BaseModel, Field, field_validator, model_validator | ||||
| 
 | ||||
| from llama_stack.apis.version import LLAMA_STACK_API_V1 | ||||
| from llama_stack.providers.utils.telemetry.trace_protocol import trace_protocol | ||||
| from llama_stack.core.telemetry.trace_protocol import trace_protocol | ||||
| from llama_stack.schema_utils import json_schema_type, webmethod | ||||
| 
 | ||||
| 
 | ||||
|  |  | |||
|  | @ -12,7 +12,7 @@ from pydantic import BaseModel, Field | |||
| from llama_stack.apis.inference import OpenAIMessageParam | ||||
| from llama_stack.apis.shields import Shield | ||||
| from llama_stack.apis.version import LLAMA_STACK_API_V1 | ||||
| from llama_stack.providers.utils.telemetry.trace_protocol import trace_protocol | ||||
| from llama_stack.core.telemetry.trace_protocol import trace_protocol | ||||
| from llama_stack.schema_utils import json_schema_type, webmethod | ||||
| 
 | ||||
| 
 | ||||
|  |  | |||
|  | @ -10,7 +10,7 @@ from pydantic import BaseModel | |||
| 
 | ||||
| from llama_stack.apis.resource import Resource, ResourceType | ||||
| from llama_stack.apis.version import LLAMA_STACK_API_V1 | ||||
| from llama_stack.providers.utils.telemetry.trace_protocol import trace_protocol | ||||
| from llama_stack.core.telemetry.trace_protocol import trace_protocol | ||||
| from llama_stack.schema_utils import json_schema_type, webmethod | ||||
| 
 | ||||
| 
 | ||||
|  |  | |||
|  | @ -12,7 +12,7 @@ from typing_extensions import runtime_checkable | |||
| 
 | ||||
| from llama_stack.apis.common.content_types import URL, InterleavedContent | ||||
| from llama_stack.apis.version import LLAMA_STACK_API_V1 | ||||
| from llama_stack.providers.utils.telemetry.trace_protocol import trace_protocol | ||||
| from llama_stack.core.telemetry.trace_protocol import trace_protocol | ||||
| from llama_stack.schema_utils import json_schema_type, register_schema, webmethod | ||||
| 
 | ||||
| 
 | ||||
|  |  | |||
|  | @ -13,7 +13,7 @@ from typing_extensions import runtime_checkable | |||
| from llama_stack.apis.common.content_types import URL, InterleavedContent | ||||
| from llama_stack.apis.resource import Resource, ResourceType | ||||
| from llama_stack.apis.version import LLAMA_STACK_API_V1 | ||||
| from llama_stack.providers.utils.telemetry.trace_protocol import trace_protocol | ||||
| from llama_stack.core.telemetry.trace_protocol import trace_protocol | ||||
| from llama_stack.schema_utils import json_schema_type, webmethod | ||||
| 
 | ||||
| from .rag_tool import RAGToolRuntime | ||||
|  |  | |||
|  | @ -17,7 +17,7 @@ from pydantic import BaseModel, Field | |||
| from llama_stack.apis.inference import InterleavedContent | ||||
| from llama_stack.apis.vector_stores import VectorStore | ||||
| from llama_stack.apis.version import LLAMA_STACK_API_V1 | ||||
| from llama_stack.providers.utils.telemetry.trace_protocol import trace_protocol | ||||
| from llama_stack.core.telemetry.trace_protocol import trace_protocol | ||||
| from llama_stack.providers.utils.vector_io.vector_utils import generate_chunk_id | ||||
| from llama_stack.schema_utils import json_schema_type, webmethod | ||||
| from llama_stack.strong_typing.schema import register_schema | ||||
|  |  | |||
|  | @ -15,10 +15,10 @@ import yaml | |||
| 
 | ||||
| from llama_stack.cli.stack.utils import ImageType | ||||
| from llama_stack.cli.subcommand import Subcommand | ||||
| from llama_stack.core.datatypes import LoggingConfig, StackRunConfig | ||||
| from llama_stack.core.datatypes import StackRunConfig | ||||
| from llama_stack.core.stack import cast_image_name_to_string, replace_env_vars | ||||
| from llama_stack.core.utils.config_resolution import Mode, resolve_config_or_distro | ||||
| from llama_stack.log import get_logger | ||||
| from llama_stack.log import LoggingConfig, get_logger | ||||
| 
 | ||||
| REPO_ROOT = Path(__file__).parent.parent.parent.parent | ||||
| 
 | ||||
|  |  | |||
|  | @ -31,6 +31,7 @@ from llama_stack.core.storage.datatypes import ( | |||
|     StorageBackendType, | ||||
|     StorageConfig, | ||||
| ) | ||||
| from llama_stack.log import LoggingConfig | ||||
| from llama_stack.providers.datatypes import Api, ProviderSpec | ||||
| 
 | ||||
| LLAMA_STACK_BUILD_CONFIG_VERSION = 2 | ||||
|  | @ -195,14 +196,6 @@ class TelemetryConfig(BaseModel): | |||
|     enabled: bool = Field(default=False, description="enable or disable telemetry") | ||||
| 
 | ||||
| 
 | ||||
| class LoggingConfig(BaseModel): | ||||
|     category_levels: dict[str, str] = Field( | ||||
|         default_factory=dict, | ||||
|         description=""" | ||||
|  Dictionary of different logging configurations for different portions (ex: core, server) of llama stack""", | ||||
|     ) | ||||
| 
 | ||||
| 
 | ||||
| class OAuth2JWKSConfig(BaseModel): | ||||
|     # The JWKS URI for collecting public keys | ||||
|     uri: str | ||||
|  |  | |||
|  | @ -25,7 +25,7 @@ from llama_stack.providers.datatypes import ( | |||
| logger = get_logger(name=__name__, category="core") | ||||
| 
 | ||||
| 
 | ||||
| INTERNAL_APIS = {Api.inspect, Api.providers, Api.prompts, Api.conversations, Api.telemetry} | ||||
| INTERNAL_APIS = {Api.inspect, Api.providers, Api.prompts, Api.conversations} | ||||
| 
 | ||||
| 
 | ||||
| def stack_apis() -> list[Api]: | ||||
|  |  | |||
|  | @ -32,7 +32,7 @@ from termcolor import cprint | |||
| 
 | ||||
| from llama_stack.core.build import print_pip_install_help | ||||
| from llama_stack.core.configure import parse_and_maybe_upgrade_config | ||||
| from llama_stack.core.datatypes import Api, BuildConfig, BuildProvider, DistributionSpec | ||||
| from llama_stack.core.datatypes import BuildConfig, BuildProvider, DistributionSpec | ||||
| from llama_stack.core.request_headers import ( | ||||
|     PROVIDER_DATA_VAR, | ||||
|     request_provider_data_context, | ||||
|  | @ -44,11 +44,12 @@ from llama_stack.core.stack import ( | |||
|     get_stack_run_config_from_distro, | ||||
|     replace_env_vars, | ||||
| ) | ||||
| from llama_stack.core.telemetry import Telemetry | ||||
| from llama_stack.core.telemetry.tracing import CURRENT_TRACE_CONTEXT, end_trace, setup_logger, start_trace | ||||
| from llama_stack.core.utils.config import redact_sensitive_fields | ||||
| from llama_stack.core.utils.context import preserve_contexts_async_generator | ||||
| from llama_stack.core.utils.exec import in_notebook | ||||
| from llama_stack.log import get_logger, setup_logging | ||||
| from llama_stack.providers.utils.telemetry.tracing import CURRENT_TRACE_CONTEXT, end_trace, setup_logger, start_trace | ||||
| from llama_stack.strong_typing.inspection import is_unwrapped_body_param | ||||
| 
 | ||||
| logger = get_logger(name=__name__, category="core") | ||||
|  | @ -293,8 +294,8 @@ class AsyncLlamaStackAsLibraryClient(AsyncLlamaStackClient): | |||
|             raise _e | ||||
| 
 | ||||
|         assert self.impls is not None | ||||
|         if Api.telemetry in self.impls: | ||||
|             setup_logger(self.impls[Api.telemetry]) | ||||
|         if self.config.telemetry.enabled: | ||||
|             setup_logger(Telemetry()) | ||||
| 
 | ||||
|         if not os.environ.get("PYTEST_CURRENT_TEST"): | ||||
|             console = Console() | ||||
|  |  | |||
|  | @ -27,7 +27,6 @@ from llama_stack.apis.safety import Safety | |||
| from llama_stack.apis.scoring import Scoring | ||||
| from llama_stack.apis.scoring_functions import ScoringFunctions | ||||
| from llama_stack.apis.shields import Shields | ||||
| from llama_stack.apis.telemetry import Telemetry | ||||
| from llama_stack.apis.tools import ToolGroups, ToolRuntime | ||||
| from llama_stack.apis.vector_io import VectorIO | ||||
| from llama_stack.apis.vector_stores import VectorStore | ||||
|  | @ -49,7 +48,6 @@ from llama_stack.providers.datatypes import ( | |||
|     Api, | ||||
|     BenchmarksProtocolPrivate, | ||||
|     DatasetsProtocolPrivate, | ||||
|     InlineProviderSpec, | ||||
|     ModelsProtocolPrivate, | ||||
|     ProviderSpec, | ||||
|     RemoteProviderConfig, | ||||
|  | @ -98,7 +96,6 @@ def api_protocol_map(external_apis: dict[Api, ExternalApiSpec] | None = None) -> | |||
|         Api.files: Files, | ||||
|         Api.prompts: Prompts, | ||||
|         Api.conversations: Conversations, | ||||
|         Api.telemetry: Telemetry, | ||||
|     } | ||||
| 
 | ||||
|     if external_apis: | ||||
|  | @ -241,24 +238,6 @@ def validate_and_prepare_providers( | |||
|         key = api_str if api not in router_apis else f"inner-{api_str}" | ||||
|         providers_with_specs[key] = specs | ||||
| 
 | ||||
|     # TODO: remove this logic, telemetry should not have providers. | ||||
|     # if telemetry has been enabled in the config initialize our internal impl | ||||
|     # telemetry is not an external API so it SHOULD NOT be auto-routed. | ||||
|     if run_config.telemetry.enabled: | ||||
|         specs = {} | ||||
|         p = InlineProviderSpec( | ||||
|             api=Api.telemetry, | ||||
|             provider_type="inline::meta-reference", | ||||
|             pip_packages=[], | ||||
|             optional_api_dependencies=[Api.datasetio], | ||||
|             module="llama_stack.providers.inline.telemetry.meta_reference", | ||||
|             config_class="llama_stack.providers.inline.telemetry.meta_reference.config.TelemetryConfig", | ||||
|             description="Meta's reference implementation of telemetry and observability using OpenTelemetry.", | ||||
|         ) | ||||
|         spec = ProviderWithSpec(spec=p, provider_type="inline::meta-reference", provider_id="meta-reference") | ||||
|         specs["meta-reference"] = spec | ||||
|         providers_with_specs["telemetry"] = specs | ||||
| 
 | ||||
|     return providers_with_specs | ||||
| 
 | ||||
| 
 | ||||
|  |  | |||
|  | @ -72,14 +72,6 @@ async def get_auto_router_impl( | |||
|         raise ValueError(f"API {api.value} not found in router map") | ||||
| 
 | ||||
|     api_to_dep_impl = {} | ||||
|     if run_config.telemetry.enabled: | ||||
|         api_to_deps = { | ||||
|             "inference": {"telemetry": Api.telemetry}, | ||||
|         } | ||||
|         for dep_name, dep_api in api_to_deps.get(api.value, {}).items(): | ||||
|             if dep_api in deps: | ||||
|                 api_to_dep_impl[dep_name] = deps[dep_api] | ||||
| 
 | ||||
|     # TODO: move pass configs to routers instead | ||||
|     if api == Api.inference: | ||||
|         inference_ref = run_config.storage.stores.inference | ||||
|  | @ -92,6 +84,7 @@ async def get_auto_router_impl( | |||
|         ) | ||||
|         await inference_store.initialize() | ||||
|         api_to_dep_impl["store"] = inference_store | ||||
|         api_to_dep_impl["telemetry_enabled"] = run_config.telemetry.enabled | ||||
| 
 | ||||
|     elif api == Api.vector_io: | ||||
|         api_to_dep_impl["vector_stores_config"] = run_config.vector_stores | ||||
|  |  | |||
|  | @ -53,13 +53,13 @@ from llama_stack.apis.inference.inference import ( | |||
|     OpenAIChatCompletionContentPartTextParam, | ||||
| ) | ||||
| from llama_stack.apis.models import Model, ModelType | ||||
| from llama_stack.apis.telemetry import MetricEvent, MetricInResponse, Telemetry | ||||
| from llama_stack.apis.telemetry import MetricEvent, MetricInResponse | ||||
| from llama_stack.core.telemetry.tracing import enqueue_event, get_current_span | ||||
| from llama_stack.log import get_logger | ||||
| from llama_stack.models.llama.llama3.chat_format import ChatFormat | ||||
| from llama_stack.models.llama.llama3.tokenizer import Tokenizer | ||||
| from llama_stack.providers.datatypes import HealthResponse, HealthStatus, RoutingTable | ||||
| from llama_stack.providers.utils.inference.inference_store import InferenceStore | ||||
| from llama_stack.providers.utils.telemetry.tracing import enqueue_event, get_current_span | ||||
| 
 | ||||
| logger = get_logger(name=__name__, category="core::routers") | ||||
| 
 | ||||
|  | @ -70,14 +70,14 @@ class InferenceRouter(Inference): | |||
|     def __init__( | ||||
|         self, | ||||
|         routing_table: RoutingTable, | ||||
|         telemetry: Telemetry | None = None, | ||||
|         store: InferenceStore | None = None, | ||||
|         telemetry_enabled: bool = False, | ||||
|     ) -> None: | ||||
|         logger.debug("Initializing InferenceRouter") | ||||
|         self.routing_table = routing_table | ||||
|         self.telemetry = telemetry | ||||
|         self.telemetry_enabled = telemetry_enabled | ||||
|         self.store = store | ||||
|         if self.telemetry: | ||||
|         if self.telemetry_enabled: | ||||
|             self.tokenizer = Tokenizer.get_instance() | ||||
|             self.formatter = ChatFormat(self.tokenizer) | ||||
| 
 | ||||
|  | @ -159,7 +159,7 @@ class InferenceRouter(Inference): | |||
|         model: Model, | ||||
|     ) -> list[MetricInResponse]: | ||||
|         metrics = self._construct_metrics(prompt_tokens, completion_tokens, total_tokens, model) | ||||
|         if self.telemetry: | ||||
|         if self.telemetry_enabled: | ||||
|             for metric in metrics: | ||||
|                 enqueue_event(metric) | ||||
|         return [MetricInResponse(metric=metric.metric, value=metric.value) for metric in metrics] | ||||
|  | @ -223,7 +223,7 @@ class InferenceRouter(Inference): | |||
|             # that we do not return an AsyncIterator, our tests expect a stream of chunks we cannot intercept currently. | ||||
| 
 | ||||
|         response = await provider.openai_completion(params) | ||||
|         if self.telemetry: | ||||
|         if self.telemetry_enabled: | ||||
|             metrics = self._construct_metrics( | ||||
|                 prompt_tokens=response.usage.prompt_tokens, | ||||
|                 completion_tokens=response.usage.completion_tokens, | ||||
|  | @ -285,7 +285,7 @@ class InferenceRouter(Inference): | |||
|         if self.store: | ||||
|             asyncio.create_task(self.store.store_chat_completion(response, params.messages)) | ||||
| 
 | ||||
|         if self.telemetry: | ||||
|         if self.telemetry_enabled: | ||||
|             metrics = self._construct_metrics( | ||||
|                 prompt_tokens=response.usage.prompt_tokens, | ||||
|                 completion_tokens=response.usage.completion_tokens, | ||||
|  | @ -393,7 +393,7 @@ class InferenceRouter(Inference): | |||
|             else: | ||||
|                 if hasattr(chunk, "delta"): | ||||
|                     completion_text += chunk.delta | ||||
|                 if hasattr(chunk, "stop_reason") and chunk.stop_reason and self.telemetry: | ||||
|                 if hasattr(chunk, "stop_reason") and chunk.stop_reason and self.telemetry_enabled: | ||||
|                     complete = True | ||||
|                     completion_tokens = await self._count_tokens(completion_text) | ||||
|             # if we are done receiving tokens | ||||
|  | @ -401,7 +401,7 @@ class InferenceRouter(Inference): | |||
|                 total_tokens = (prompt_tokens or 0) + (completion_tokens or 0) | ||||
| 
 | ||||
|                 # Create a separate span for streaming completion metrics | ||||
|                 if self.telemetry: | ||||
|                 if self.telemetry_enabled: | ||||
|                     # Log metrics in the new span context | ||||
|                     completion_metrics = self._construct_metrics( | ||||
|                         prompt_tokens=prompt_tokens, | ||||
|  | @ -450,7 +450,7 @@ class InferenceRouter(Inference): | |||
|         total_tokens = (prompt_tokens or 0) + (completion_tokens or 0) | ||||
| 
 | ||||
|         # Create a separate span for completion metrics | ||||
|         if self.telemetry: | ||||
|         if self.telemetry_enabled: | ||||
|             # Log metrics in the new span context | ||||
|             completion_metrics = self._construct_metrics( | ||||
|                 prompt_tokens=prompt_tokens, | ||||
|  | @ -548,7 +548,7 @@ class InferenceRouter(Inference): | |||
|                         completion_text += "".join(choice_data["content_parts"]) | ||||
| 
 | ||||
|                     # Add metrics to the chunk | ||||
|                     if self.telemetry and hasattr(chunk, "usage") and chunk.usage: | ||||
|                     if self.telemetry_enabled and hasattr(chunk, "usage") and chunk.usage: | ||||
|                         metrics = self._construct_metrics( | ||||
|                             prompt_tokens=chunk.usage.prompt_tokens, | ||||
|                             completion_tokens=chunk.usage.completion_tokens, | ||||
|  |  | |||
|  | @ -36,7 +36,6 @@ from llama_stack.apis.common.responses import PaginatedResponse | |||
| from llama_stack.core.access_control.access_control import AccessDeniedError | ||||
| from llama_stack.core.datatypes import ( | ||||
|     AuthenticationRequiredError, | ||||
|     LoggingConfig, | ||||
|     StackRunConfig, | ||||
|     process_cors_config, | ||||
| ) | ||||
|  | @ -53,19 +52,13 @@ from llama_stack.core.stack import ( | |||
|     cast_image_name_to_string, | ||||
|     replace_env_vars, | ||||
| ) | ||||
| from llama_stack.core.telemetry import Telemetry | ||||
| from llama_stack.core.telemetry.tracing import CURRENT_TRACE_CONTEXT, setup_logger | ||||
| from llama_stack.core.utils.config import redact_sensitive_fields | ||||
| from llama_stack.core.utils.config_resolution import Mode, resolve_config_or_distro | ||||
| from llama_stack.core.utils.context import preserve_contexts_async_generator | ||||
| from llama_stack.log import get_logger, setup_logging | ||||
| from llama_stack.log import LoggingConfig, get_logger, setup_logging | ||||
| from llama_stack.providers.datatypes import Api | ||||
| from llama_stack.providers.inline.telemetry.meta_reference.config import TelemetryConfig | ||||
| from llama_stack.providers.inline.telemetry.meta_reference.telemetry import ( | ||||
|     TelemetryAdapter, | ||||
| ) | ||||
| from llama_stack.providers.utils.telemetry.tracing import ( | ||||
|     CURRENT_TRACE_CONTEXT, | ||||
|     setup_logger, | ||||
| ) | ||||
| 
 | ||||
| from .auth import AuthenticationMiddleware | ||||
| from .quota import QuotaMiddleware | ||||
|  | @ -451,9 +444,7 @@ def create_app() -> StackApp: | |||
|             app.add_middleware(CORSMiddleware, **cors_config.model_dump()) | ||||
| 
 | ||||
|     if config.telemetry.enabled: | ||||
|         setup_logger(impls[Api.telemetry]) | ||||
|     else: | ||||
|         setup_logger(TelemetryAdapter(TelemetryConfig(), {})) | ||||
|         setup_logger(Telemetry()) | ||||
| 
 | ||||
|     # Load external APIs if configured | ||||
|     external_apis = load_external_apis(config) | ||||
|  | @ -511,7 +502,8 @@ def create_app() -> StackApp: | |||
|     app.exception_handler(RequestValidationError)(global_exception_handler) | ||||
|     app.exception_handler(Exception)(global_exception_handler) | ||||
| 
 | ||||
|     app.add_middleware(TracingMiddleware, impls=impls, external_apis=external_apis) | ||||
|     if config.telemetry.enabled: | ||||
|         app.add_middleware(TracingMiddleware, impls=impls, external_apis=external_apis) | ||||
| 
 | ||||
|     return app | ||||
| 
 | ||||
|  |  | |||
|  | @ -7,8 +7,8 @@ from aiohttp import hdrs | |||
| 
 | ||||
| from llama_stack.core.external import ExternalApiSpec | ||||
| from llama_stack.core.server.routes import find_matching_route, initialize_route_impls | ||||
| from llama_stack.core.telemetry.tracing import end_trace, start_trace | ||||
| from llama_stack.log import get_logger | ||||
| from llama_stack.providers.utils.telemetry.tracing import end_trace, start_trace | ||||
| 
 | ||||
| logger = get_logger(name=__name__, category="core::server") | ||||
| 
 | ||||
|  |  | |||
							
								
								
									
										32
									
								
								llama_stack/core/telemetry/__init__.py
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										32
									
								
								llama_stack/core/telemetry/__init__.py
									
										
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,32 @@ | |||
| # Copyright (c) Meta Platforms, Inc. and affiliates. | ||||
| # All rights reserved. | ||||
| # | ||||
| # This source code is licensed under the terms described in the LICENSE file in | ||||
| # the root directory of this source tree. | ||||
| 
 | ||||
| from .telemetry import Telemetry | ||||
| from .trace_protocol import serialize_value, trace_protocol | ||||
| from .tracing import ( | ||||
|     CURRENT_TRACE_CONTEXT, | ||||
|     ROOT_SPAN_MARKERS, | ||||
|     end_trace, | ||||
|     enqueue_event, | ||||
|     get_current_span, | ||||
|     setup_logger, | ||||
|     span, | ||||
|     start_trace, | ||||
| ) | ||||
| 
 | ||||
| __all__ = [ | ||||
|     "Telemetry", | ||||
|     "trace_protocol", | ||||
|     "serialize_value", | ||||
|     "CURRENT_TRACE_CONTEXT", | ||||
|     "ROOT_SPAN_MARKERS", | ||||
|     "end_trace", | ||||
|     "enqueue_event", | ||||
|     "get_current_span", | ||||
|     "setup_logger", | ||||
|     "span", | ||||
|     "start_trace", | ||||
| ] | ||||
|  | @ -24,14 +24,13 @@ from llama_stack.apis.telemetry import ( | |||
|     SpanStartPayload, | ||||
|     SpanStatus, | ||||
|     StructuredLogEvent, | ||||
|     Telemetry, | ||||
|     UnstructuredLogEvent, | ||||
| ) | ||||
| from llama_stack.core.datatypes import Api | ||||
| from llama_stack.apis.telemetry import ( | ||||
|     Telemetry as TelemetryBase, | ||||
| ) | ||||
| from llama_stack.core.telemetry.tracing import ROOT_SPAN_MARKERS | ||||
| from llama_stack.log import get_logger | ||||
| from llama_stack.providers.utils.telemetry.tracing import ROOT_SPAN_MARKERS | ||||
| 
 | ||||
| from .config import TelemetryConfig | ||||
| 
 | ||||
| _GLOBAL_STORAGE: dict[str, dict[str | int, Any]] = { | ||||
|     "active_spans": {}, | ||||
|  | @ -50,9 +49,8 @@ def is_tracing_enabled(tracer): | |||
|         return span.is_recording() | ||||
| 
 | ||||
| 
 | ||||
| class TelemetryAdapter(Telemetry): | ||||
|     def __init__(self, _config: TelemetryConfig, deps: dict[Api, Any]) -> None: | ||||
|         self.datasetio_api = deps.get(Api.datasetio) | ||||
| class Telemetry(TelemetryBase): | ||||
|     def __init__(self) -> None: | ||||
|         self.meter = None | ||||
| 
 | ||||
|         global _TRACER_PROVIDER | ||||
|  | @ -9,27 +9,29 @@ import inspect | |||
| import json | ||||
| from collections.abc import AsyncGenerator, Callable | ||||
| from functools import wraps | ||||
| from typing import Any | ||||
| from typing import Any, cast | ||||
| 
 | ||||
| from pydantic import BaseModel | ||||
| 
 | ||||
| from llama_stack.models.llama.datatypes import Primitive | ||||
| 
 | ||||
| type JSONValue = Primitive | list["JSONValue"] | dict[str, "JSONValue"] | ||||
| 
 | ||||
| def serialize_value(value: Any) -> Primitive: | ||||
| 
 | ||||
| def serialize_value(value: Any) -> str: | ||||
|     return str(_prepare_for_json(value)) | ||||
| 
 | ||||
| 
 | ||||
| def _prepare_for_json(value: Any) -> str: | ||||
| def _prepare_for_json(value: Any) -> JSONValue: | ||||
|     """Serialize a single value into JSON-compatible format.""" | ||||
|     if value is None: | ||||
|         return "" | ||||
|     elif isinstance(value, str | int | float | bool): | ||||
|         return value | ||||
|     elif hasattr(value, "_name_"): | ||||
|         return value._name_ | ||||
|         return cast(str, value._name_) | ||||
|     elif isinstance(value, BaseModel): | ||||
|         return json.loads(value.model_dump_json()) | ||||
|         return cast(JSONValue, json.loads(value.model_dump_json())) | ||||
|     elif isinstance(value, list | tuple | set): | ||||
|         return [_prepare_for_json(item) for item in value] | ||||
|     elif isinstance(value, dict): | ||||
|  | @ -37,35 +39,35 @@ def _prepare_for_json(value: Any) -> str: | |||
|     else: | ||||
|         try: | ||||
|             json.dumps(value) | ||||
|             return value | ||||
|             return cast(JSONValue, value) | ||||
|         except Exception: | ||||
|             return str(value) | ||||
| 
 | ||||
| 
 | ||||
| def trace_protocol[T](cls: type[T]) -> type[T]: | ||||
| def trace_protocol[T: type[Any]](cls: T) -> T: | ||||
|     """ | ||||
|     A class decorator that automatically traces all methods in a protocol/base class | ||||
|     and its inheriting classes. | ||||
|     """ | ||||
| 
 | ||||
|     def trace_method(method: Callable) -> Callable: | ||||
|     def trace_method(method: Callable[..., Any]) -> Callable[..., Any]: | ||||
|         is_async = asyncio.iscoroutinefunction(method) | ||||
|         is_async_gen = inspect.isasyncgenfunction(method) | ||||
| 
 | ||||
|         def create_span_context(self: Any, *args: Any, **kwargs: Any) -> tuple: | ||||
|         def create_span_context(self: Any, *args: Any, **kwargs: Any) -> tuple[str, str, dict[str, Primitive]]: | ||||
|             class_name = self.__class__.__name__ | ||||
|             method_name = method.__name__ | ||||
|             span_type = "async_generator" if is_async_gen else "async" if is_async else "sync" | ||||
|             sig = inspect.signature(method) | ||||
|             param_names = list(sig.parameters.keys())[1:]  # Skip 'self' | ||||
|             combined_args = {} | ||||
|             combined_args: dict[str, str] = {} | ||||
|             for i, arg in enumerate(args): | ||||
|                 param_name = param_names[i] if i < len(param_names) else f"position_{i + 1}" | ||||
|                 combined_args[param_name] = serialize_value(arg) | ||||
|             for k, v in kwargs.items(): | ||||
|                 combined_args[str(k)] = serialize_value(v) | ||||
| 
 | ||||
|             span_attributes = { | ||||
|             span_attributes: dict[str, Primitive] = { | ||||
|                 "__autotraced__": True, | ||||
|                 "__class__": class_name, | ||||
|                 "__method__": method_name, | ||||
|  | @ -76,8 +78,8 @@ def trace_protocol[T](cls: type[T]) -> type[T]: | |||
|             return class_name, method_name, span_attributes | ||||
| 
 | ||||
|         @wraps(method) | ||||
|         async def async_gen_wrapper(self: Any, *args: Any, **kwargs: Any) -> AsyncGenerator: | ||||
|             from llama_stack.providers.utils.telemetry import tracing | ||||
|         async def async_gen_wrapper(self: Any, *args: Any, **kwargs: Any) -> AsyncGenerator[Any, None]: | ||||
|             from llama_stack.core.telemetry import tracing | ||||
| 
 | ||||
|             class_name, method_name, span_attributes = create_span_context(self, *args, **kwargs) | ||||
| 
 | ||||
|  | @ -92,7 +94,7 @@ def trace_protocol[T](cls: type[T]) -> type[T]: | |||
| 
 | ||||
|         @wraps(method) | ||||
|         async def async_wrapper(self: Any, *args: Any, **kwargs: Any) -> Any: | ||||
|             from llama_stack.providers.utils.telemetry import tracing | ||||
|             from llama_stack.core.telemetry import tracing | ||||
| 
 | ||||
|             class_name, method_name, span_attributes = create_span_context(self, *args, **kwargs) | ||||
| 
 | ||||
|  | @ -107,7 +109,7 @@ def trace_protocol[T](cls: type[T]) -> type[T]: | |||
| 
 | ||||
|         @wraps(method) | ||||
|         def sync_wrapper(self: Any, *args: Any, **kwargs: Any) -> Any: | ||||
|             from llama_stack.providers.utils.telemetry import tracing | ||||
|             from llama_stack.core.telemetry import tracing | ||||
| 
 | ||||
|             class_name, method_name, span_attributes = create_span_context(self, *args, **kwargs) | ||||
| 
 | ||||
|  | @ -127,16 +129,17 @@ def trace_protocol[T](cls: type[T]) -> type[T]: | |||
|         else: | ||||
|             return sync_wrapper | ||||
| 
 | ||||
|     original_init_subclass = getattr(cls, "__init_subclass__", None) | ||||
|     original_init_subclass = cast(Callable[..., Any] | None, getattr(cls, "__init_subclass__", None)) | ||||
| 
 | ||||
|     def __init_subclass__(cls_child, **kwargs):  # noqa: N807 | ||||
|     def __init_subclass__(cls_child: type[Any], **kwargs: Any) -> None:  # noqa: N807 | ||||
|         if original_init_subclass: | ||||
|             original_init_subclass(**kwargs) | ||||
|             cast(Callable[..., None], original_init_subclass)(**kwargs) | ||||
| 
 | ||||
|         for name, method in vars(cls_child).items(): | ||||
|             if inspect.isfunction(method) and not name.startswith("_"): | ||||
|                 setattr(cls_child, name, trace_method(method))  # noqa: B010 | ||||
| 
 | ||||
|     cls.__init_subclass__ = classmethod(__init_subclass__) | ||||
|     cls_any = cast(Any, cls) | ||||
|     cls_any.__init_subclass__ = classmethod(__init_subclass__) | ||||
| 
 | ||||
|     return cls | ||||
|  | @ -15,7 +15,7 @@ import time | |||
| from collections.abc import Callable | ||||
| from datetime import UTC, datetime | ||||
| from functools import wraps | ||||
| from typing import Any | ||||
| from typing import Any, Self | ||||
| 
 | ||||
| from llama_stack.apis.telemetry import ( | ||||
|     Event, | ||||
|  | @ -28,8 +28,8 @@ from llama_stack.apis.telemetry import ( | |||
|     Telemetry, | ||||
|     UnstructuredLogEvent, | ||||
| ) | ||||
| from llama_stack.core.telemetry.trace_protocol import serialize_value | ||||
| from llama_stack.log import get_logger | ||||
| from llama_stack.providers.utils.telemetry.trace_protocol import serialize_value | ||||
| 
 | ||||
| logger = get_logger(__name__, category="core") | ||||
| 
 | ||||
|  | @ -89,9 +89,6 @@ def generate_trace_id() -> str: | |||
|     return trace_id_to_str(trace_id) | ||||
| 
 | ||||
| 
 | ||||
| CURRENT_TRACE_CONTEXT = contextvars.ContextVar("trace_context", default=None) | ||||
| BACKGROUND_LOGGER = None | ||||
| 
 | ||||
| LOG_QUEUE_FULL_LOG_INTERVAL_SECONDS = 60.0 | ||||
| 
 | ||||
| 
 | ||||
|  | @ -104,7 +101,7 @@ class BackgroundLogger: | |||
|         self._last_queue_full_log_time: float = 0.0 | ||||
|         self._dropped_since_last_notice: int = 0 | ||||
| 
 | ||||
|     def log_event(self, event): | ||||
|     def log_event(self, event: Event) -> None: | ||||
|         try: | ||||
|             self.log_queue.put_nowait(event) | ||||
|         except queue.Full: | ||||
|  | @ -137,10 +134,13 @@ class BackgroundLogger: | |||
|             finally: | ||||
|                 self.log_queue.task_done() | ||||
| 
 | ||||
|     def __del__(self): | ||||
|     def __del__(self) -> None: | ||||
|         self.log_queue.join() | ||||
| 
 | ||||
| 
 | ||||
| BACKGROUND_LOGGER: BackgroundLogger | None = None | ||||
| 
 | ||||
| 
 | ||||
| def enqueue_event(event: Event) -> None: | ||||
|     """Enqueue a telemetry event to the background logger if available. | ||||
| 
 | ||||
|  | @ -155,13 +155,12 @@ def enqueue_event(event: Event) -> None: | |||
| 
 | ||||
| 
 | ||||
| class TraceContext: | ||||
|     spans: list[Span] = [] | ||||
| 
 | ||||
|     def __init__(self, logger: BackgroundLogger, trace_id: str): | ||||
|         self.logger = logger | ||||
|         self.trace_id = trace_id | ||||
|         self.spans: list[Span] = [] | ||||
| 
 | ||||
|     def push_span(self, name: str, attributes: dict[str, Any] = None) -> Span: | ||||
|     def push_span(self, name: str, attributes: dict[str, Any] | None = None) -> Span: | ||||
|         current_span = self.get_current_span() | ||||
|         span = Span( | ||||
|             span_id=generate_span_id(), | ||||
|  | @ -188,7 +187,7 @@ class TraceContext: | |||
|         self.spans.append(span) | ||||
|         return span | ||||
| 
 | ||||
|     def pop_span(self, status: SpanStatus = SpanStatus.OK): | ||||
|     def pop_span(self, status: SpanStatus = SpanStatus.OK) -> None: | ||||
|         span = self.spans.pop() | ||||
|         if span is not None: | ||||
|             self.logger.log_event( | ||||
|  | @ -203,10 +202,15 @@ class TraceContext: | |||
|                 ) | ||||
|             ) | ||||
| 
 | ||||
|     def get_current_span(self): | ||||
|     def get_current_span(self) -> Span | None: | ||||
|         return self.spans[-1] if self.spans else None | ||||
| 
 | ||||
| 
 | ||||
| CURRENT_TRACE_CONTEXT: contextvars.ContextVar[TraceContext | None] = contextvars.ContextVar( | ||||
|     "trace_context", default=None | ||||
| ) | ||||
| 
 | ||||
| 
 | ||||
| def setup_logger(api: Telemetry, level: int = logging.INFO): | ||||
|     global BACKGROUND_LOGGER | ||||
| 
 | ||||
|  | @ -217,12 +221,12 @@ def setup_logger(api: Telemetry, level: int = logging.INFO): | |||
|     root_logger.addHandler(TelemetryHandler()) | ||||
| 
 | ||||
| 
 | ||||
| async def start_trace(name: str, attributes: dict[str, Any] = None) -> TraceContext: | ||||
| async def start_trace(name: str, attributes: dict[str, Any] | None = None) -> TraceContext | None: | ||||
|     global CURRENT_TRACE_CONTEXT, BACKGROUND_LOGGER | ||||
| 
 | ||||
|     if BACKGROUND_LOGGER is None: | ||||
|         logger.debug("No Telemetry implementation set. Skipping trace initialization...") | ||||
|         return | ||||
|         return None | ||||
| 
 | ||||
|     trace_id = generate_trace_id() | ||||
|     context = TraceContext(BACKGROUND_LOGGER, trace_id) | ||||
|  | @ -269,7 +273,7 @@ def severity(levelname: str) -> LogSeverity: | |||
| # TODO: ideally, the actual emitting should be done inside a separate daemon | ||||
| # process completely isolated from the server | ||||
| class TelemetryHandler(logging.Handler): | ||||
|     def emit(self, record: logging.LogRecord): | ||||
|     def emit(self, record: logging.LogRecord) -> None: | ||||
|         # horrendous hack to avoid logging from asyncio and getting into an infinite loop | ||||
|         if record.module in ("asyncio", "selector_events"): | ||||
|             return | ||||
|  | @ -293,17 +297,17 @@ class TelemetryHandler(logging.Handler): | |||
|             ) | ||||
|         ) | ||||
| 
 | ||||
|     def close(self): | ||||
|     def close(self) -> None: | ||||
|         pass | ||||
| 
 | ||||
| 
 | ||||
| class SpanContextManager: | ||||
|     def __init__(self, name: str, attributes: dict[str, Any] = None): | ||||
|     def __init__(self, name: str, attributes: dict[str, Any] | None = None): | ||||
|         self.name = name | ||||
|         self.attributes = attributes | ||||
|         self.span = None | ||||
|         self.span: Span | None = None | ||||
| 
 | ||||
|     def __enter__(self): | ||||
|     def __enter__(self) -> Self: | ||||
|         global CURRENT_TRACE_CONTEXT | ||||
|         context = CURRENT_TRACE_CONTEXT.get() | ||||
|         if not context: | ||||
|  | @ -313,7 +317,7 @@ class SpanContextManager: | |||
|         self.span = context.push_span(self.name, self.attributes) | ||||
|         return self | ||||
| 
 | ||||
|     def __exit__(self, exc_type, exc_value, traceback): | ||||
|     def __exit__(self, exc_type, exc_value, traceback) -> None: | ||||
|         global CURRENT_TRACE_CONTEXT | ||||
|         context = CURRENT_TRACE_CONTEXT.get() | ||||
|         if not context: | ||||
|  | @ -322,13 +326,13 @@ class SpanContextManager: | |||
| 
 | ||||
|         context.pop_span() | ||||
| 
 | ||||
|     def set_attribute(self, key: str, value: Any): | ||||
|     def set_attribute(self, key: str, value: Any) -> None: | ||||
|         if self.span: | ||||
|             if self.span.attributes is None: | ||||
|                 self.span.attributes = {} | ||||
|             self.span.attributes[key] = serialize_value(value) | ||||
| 
 | ||||
|     async def __aenter__(self): | ||||
|     async def __aenter__(self) -> Self: | ||||
|         global CURRENT_TRACE_CONTEXT | ||||
|         context = CURRENT_TRACE_CONTEXT.get() | ||||
|         if not context: | ||||
|  | @ -338,7 +342,7 @@ class SpanContextManager: | |||
|         self.span = context.push_span(self.name, self.attributes) | ||||
|         return self | ||||
| 
 | ||||
|     async def __aexit__(self, exc_type, exc_value, traceback): | ||||
|     async def __aexit__(self, exc_type, exc_value, traceback) -> None: | ||||
|         global CURRENT_TRACE_CONTEXT | ||||
|         context = CURRENT_TRACE_CONTEXT.get() | ||||
|         if not context: | ||||
|  | @ -347,19 +351,19 @@ class SpanContextManager: | |||
| 
 | ||||
|         context.pop_span() | ||||
| 
 | ||||
|     def __call__(self, func: Callable): | ||||
|     def __call__(self, func: Callable[..., Any]) -> Callable[..., Any]: | ||||
|         @wraps(func) | ||||
|         def sync_wrapper(*args, **kwargs): | ||||
|         def sync_wrapper(*args: Any, **kwargs: Any) -> Any: | ||||
|             with self: | ||||
|                 return func(*args, **kwargs) | ||||
| 
 | ||||
|         @wraps(func) | ||||
|         async def async_wrapper(*args, **kwargs): | ||||
|         async def async_wrapper(*args: Any, **kwargs: Any) -> Any: | ||||
|             async with self: | ||||
|                 return await func(*args, **kwargs) | ||||
| 
 | ||||
|         @wraps(func) | ||||
|         def wrapper(*args, **kwargs): | ||||
|         def wrapper(*args: Any, **kwargs: Any) -> Any: | ||||
|             if asyncio.iscoroutinefunction(func): | ||||
|                 return async_wrapper(*args, **kwargs) | ||||
|             else: | ||||
|  | @ -368,7 +372,7 @@ class SpanContextManager: | |||
|         return wrapper | ||||
| 
 | ||||
| 
 | ||||
| def span(name: str, attributes: dict[str, Any] = None): | ||||
| def span(name: str, attributes: dict[str, Any] | None = None) -> SpanContextManager: | ||||
|     return SpanContextManager(name, attributes) | ||||
| 
 | ||||
| 
 | ||||
|  | @ -9,15 +9,23 @@ import os | |||
| import re | ||||
| from logging.config import dictConfig  # allow-direct-logging | ||||
| 
 | ||||
| from pydantic import BaseModel, Field | ||||
| from rich.console import Console | ||||
| from rich.errors import MarkupError | ||||
| from rich.logging import RichHandler | ||||
| 
 | ||||
| from llama_stack.core.datatypes import LoggingConfig | ||||
| 
 | ||||
| # Default log level | ||||
| DEFAULT_LOG_LEVEL = logging.INFO | ||||
| 
 | ||||
| 
 | ||||
| class LoggingConfig(BaseModel): | ||||
|     category_levels: dict[str, str] = Field( | ||||
|         default_factory=dict, | ||||
|         description=""" | ||||
| Dictionary of different logging configurations for different portions (ex: core, server) of llama stack""", | ||||
|     ) | ||||
| 
 | ||||
| 
 | ||||
| # Predefined categories | ||||
| CATEGORIES = [ | ||||
|     "core", | ||||
|  |  | |||
|  | @ -67,6 +67,7 @@ from llama_stack.apis.safety import Safety | |||
| from llama_stack.apis.tools import ToolGroups, ToolInvocationResult, ToolRuntime | ||||
| from llama_stack.apis.vector_io import VectorIO | ||||
| from llama_stack.core.datatypes import AccessRule | ||||
| from llama_stack.core.telemetry import tracing | ||||
| from llama_stack.log import get_logger | ||||
| from llama_stack.models.llama.datatypes import ( | ||||
|     BuiltinTool, | ||||
|  | @ -78,7 +79,6 @@ from llama_stack.providers.utils.inference.openai_compat import ( | |||
|     convert_tooldef_to_openai_tool, | ||||
| ) | ||||
| from llama_stack.providers.utils.kvstore import KVStore | ||||
| from llama_stack.providers.utils.telemetry import tracing | ||||
| 
 | ||||
| from .persistence import AgentPersistence | ||||
| from .safety import SafetyException, ShieldRunnerMixin | ||||
|  |  | |||
|  | @ -65,9 +65,9 @@ from llama_stack.apis.inference import ( | |||
|     OpenAIChoice, | ||||
|     OpenAIMessageParam, | ||||
| ) | ||||
| from llama_stack.core.telemetry import tracing | ||||
| from llama_stack.log import get_logger | ||||
| from llama_stack.providers.utils.inference.prompt_adapter import interleaved_content_as_str | ||||
| from llama_stack.providers.utils.telemetry import tracing | ||||
| 
 | ||||
| from .types import ChatCompletionContext, ChatCompletionResult | ||||
| from .utils import ( | ||||
|  |  | |||
|  | @ -37,8 +37,8 @@ from llama_stack.apis.inference import ( | |||
| ) | ||||
| from llama_stack.apis.tools import ToolGroups, ToolInvocationResult, ToolRuntime | ||||
| from llama_stack.apis.vector_io import VectorIO | ||||
| from llama_stack.core.telemetry import tracing | ||||
| from llama_stack.log import get_logger | ||||
| from llama_stack.providers.utils.telemetry import tracing | ||||
| 
 | ||||
| from .types import ChatCompletionContext, ToolExecutionResult | ||||
| 
 | ||||
|  |  | |||
|  | @ -8,8 +8,8 @@ import asyncio | |||
| 
 | ||||
| from llama_stack.apis.inference import Message | ||||
| from llama_stack.apis.safety import Safety, SafetyViolation, ViolationLevel | ||||
| from llama_stack.core.telemetry import tracing | ||||
| from llama_stack.log import get_logger | ||||
| from llama_stack.providers.utils.telemetry import tracing | ||||
| 
 | ||||
| log = get_logger(name=__name__, category="agents::meta_reference") | ||||
| 
 | ||||
|  |  | |||
|  | @ -1,5 +0,0 @@ | |||
| # Copyright (c) Meta Platforms, Inc. and affiliates. | ||||
| # All rights reserved. | ||||
| # | ||||
| # This source code is licensed under the terms described in the LICENSE file in | ||||
| # the root directory of this source tree. | ||||
|  | @ -1,21 +0,0 @@ | |||
| # Copyright (c) Meta Platforms, Inc. and affiliates. | ||||
| # All rights reserved. | ||||
| # | ||||
| # This source code is licensed under the terms described in the LICENSE file in | ||||
| # the root directory of this source tree. | ||||
| 
 | ||||
| from typing import Any | ||||
| 
 | ||||
| from llama_stack.core.datatypes import Api | ||||
| 
 | ||||
| from .config import TelemetryConfig, TelemetrySink | ||||
| 
 | ||||
| __all__ = ["TelemetryConfig", "TelemetrySink"] | ||||
| 
 | ||||
| 
 | ||||
| async def get_provider_impl(config: TelemetryConfig, deps: dict[Api, Any]): | ||||
|     from .telemetry import TelemetryAdapter | ||||
| 
 | ||||
|     impl = TelemetryAdapter(config, deps) | ||||
|     await impl.initialize() | ||||
|     return impl | ||||
|  | @ -1,47 +0,0 @@ | |||
| # Copyright (c) Meta Platforms, Inc. and affiliates. | ||||
| # All rights reserved. | ||||
| # | ||||
| # This source code is licensed under the terms described in the LICENSE file in | ||||
| # the root directory of this source tree. | ||||
| 
 | ||||
| from enum import StrEnum | ||||
| from typing import Any | ||||
| 
 | ||||
| from pydantic import BaseModel, Field, field_validator | ||||
| 
 | ||||
| 
 | ||||
| class TelemetrySink(StrEnum): | ||||
|     OTEL_TRACE = "otel_trace" | ||||
|     OTEL_METRIC = "otel_metric" | ||||
|     CONSOLE = "console" | ||||
| 
 | ||||
| 
 | ||||
| class TelemetryConfig(BaseModel): | ||||
|     otel_exporter_otlp_endpoint: str | None = Field( | ||||
|         default=None, | ||||
|         description="The OpenTelemetry collector endpoint URL (base URL for traces, metrics, and logs). If not set, the SDK will use OTEL_EXPORTER_OTLP_ENDPOINT environment variable.", | ||||
|     ) | ||||
|     service_name: str = Field( | ||||
|         # service name is always the same, use zero-width space to avoid clutter | ||||
|         default="\u200b", | ||||
|         description="The service name to use for telemetry", | ||||
|     ) | ||||
|     sinks: list[TelemetrySink] = Field( | ||||
|         default_factory=list, | ||||
|         description="List of telemetry sinks to enable (possible values: otel_trace, otel_metric, console)", | ||||
|     ) | ||||
| 
 | ||||
|     @field_validator("sinks", mode="before") | ||||
|     @classmethod | ||||
|     def validate_sinks(cls, v): | ||||
|         if isinstance(v, str): | ||||
|             return [TelemetrySink(sink.strip()) for sink in v.split(",")] | ||||
|         return v or [] | ||||
| 
 | ||||
|     @classmethod | ||||
|     def sample_run_config(cls, __distro_dir__: str) -> dict[str, Any]: | ||||
|         return { | ||||
|             "service_name": "${env.OTEL_SERVICE_NAME:=\u200b}", | ||||
|             "sinks": "${env.TELEMETRY_SINKS:=}", | ||||
|             "otel_exporter_otlp_endpoint": "${env.OTEL_EXPORTER_OTLP_ENDPOINT:=}", | ||||
|         } | ||||
|  | @ -22,11 +22,11 @@ from llama_stack.apis.inference.inference import ( | |||
| ) | ||||
| from llama_stack.apis.models import Model | ||||
| from llama_stack.apis.models.models import ModelType | ||||
| from llama_stack.core.telemetry.tracing import get_current_span | ||||
| from llama_stack.log import get_logger | ||||
| from llama_stack.providers.remote.inference.watsonx.config import WatsonXConfig | ||||
| from llama_stack.providers.utils.inference.litellm_openai_mixin import LiteLLMOpenAIMixin | ||||
| from llama_stack.providers.utils.inference.openai_compat import prepare_openai_completion_params | ||||
| from llama_stack.providers.utils.telemetry.tracing import get_current_span | ||||
| 
 | ||||
| logger = get_logger(name=__name__, category="providers::remote::watsonx") | ||||
| 
 | ||||
|  |  | |||
|  | @ -256,7 +256,7 @@ class LiteLLMOpenAIMixin( | |||
|         params: OpenAIChatCompletionRequestWithExtraBody, | ||||
|     ) -> OpenAIChatCompletion | AsyncIterator[OpenAIChatCompletionChunk]: | ||||
|         # Add usage tracking for streaming when telemetry is active | ||||
|         from llama_stack.providers.utils.telemetry.tracing import get_current_span | ||||
|         from llama_stack.core.telemetry.tracing import get_current_span | ||||
| 
 | ||||
|         stream_options = params.stream_options | ||||
|         if params.stream and get_current_span() is not None: | ||||
|  |  | |||
|  | @ -1,5 +0,0 @@ | |||
| # Copyright (c) Meta Platforms, Inc. and affiliates. | ||||
| # All rights reserved. | ||||
| # | ||||
| # This source code is licensed under the terms described in the LICENSE file in | ||||
| # the root directory of this source tree. | ||||
|  | @ -23,7 +23,7 @@ from opentelemetry.sdk.trace import ReadableSpan, TracerProvider | |||
| from opentelemetry.sdk.trace.export import SimpleSpanProcessor | ||||
| from opentelemetry.sdk.trace.export.in_memory_span_exporter import InMemorySpanExporter | ||||
| 
 | ||||
| import llama_stack.providers.inline.telemetry.meta_reference.telemetry as telemetry_module | ||||
| import llama_stack.core.telemetry.telemetry as telemetry_module | ||||
| from llama_stack.testing.api_recorder import patch_httpx_for_test_id | ||||
| from tests.integration.fixtures.common import instantiate_llama_stack_client | ||||
| 
 | ||||
|  |  | |||
|  | @ -196,8 +196,6 @@ class TestProviderRegistry: | |||
|             assert internal_api not in apis, f"Internal API {internal_api} should not be in providable_apis" | ||||
| 
 | ||||
|         for api in apis: | ||||
|             if api == Api.telemetry: | ||||
|                 continue | ||||
|             module_name = f"llama_stack.providers.registry.{api.name.lower()}" | ||||
|             try: | ||||
|                 importlib.import_module(module_name) | ||||
|  |  | |||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue