Merge 76da90c1fc into sapling-pr-archive-ehhuang

2025-12-06 18:40:57 +00:00 · 2025-10-20 14:58:14 -07:00 · 2025-10-20 14:58:14 -07:00 · f064b90161
commit f064b90161
parent 6cb148dbe6 76da90c1fc
16 changed files with 342 additions and 395 deletions
--- a/llama_stack/core/datatypes.py
+++ b/llama_stack/core/datatypes.py
@ -422,6 +422,18 @@ def process_cors_config(cors_config: bool | CORSConfig | None) -> CORSConfig | N
    raise ValueError(f"Expected bool or CORSConfig, got {type(cors_config).__name__}")
 class RegisteredResources(BaseModel):
    """Registry of resources available in the distribution."""
    models: list[ModelInput] = Field(default_factory=list)
    shields: list[ShieldInput] = Field(default_factory=list)
    vector_dbs: list[VectorDBInput] = Field(default_factory=list)
    datasets: list[DatasetInput] = Field(default_factory=list)
    scoring_fns: list[ScoringFnInput] = Field(default_factory=list)
    benchmarks: list[BenchmarkInput] = Field(default_factory=list)
    tool_groups: list[ToolGroupInput] = Field(default_factory=list)
 class ServerConfig(BaseModel):
    port: int = Field(
        default=8321,
@ -491,14 +503,10 @@ can be instantiated multiple times (with different configs) if necessary.
        description="Catalog of named storage backends and references available to the stack",
    )
-    # registry of "resources" in the distribution
+    registered_resources: RegisteredResources = Field(
-    models: list[ModelInput] = Field(default_factory=list)
+        default_factory=RegisteredResources,
-    shields: list[ShieldInput] = Field(default_factory=list)
+        description="Registry of resources available in the distribution",
-    vector_dbs: list[VectorDBInput] = Field(default_factory=list)
+    )
    datasets: list[DatasetInput] = Field(default_factory=list)
    scoring_fns: list[ScoringFnInput] = Field(default_factory=list)
    benchmarks: list[BenchmarkInput] = Field(default_factory=list)
    tool_groups: list[ToolGroupInput] = Field(default_factory=list)
    logging: LoggingConfig | None = Field(default=None, description="Configuration for Llama Stack Logging")
--- a/llama_stack/core/stack.py
+++ b/llama_stack/core/stack.py
@ -110,7 +110,7 @@ TEST_RECORDING_CONTEXT = None
 async def register_resources(run_config: StackRunConfig, impls: dict[Api, Any]):
    for rsrc, api, register_method, list_method in RESOURCES:
-        objects = getattr(run_config, rsrc)
+        objects = getattr(run_config.registered_resources, rsrc)
        if api not in impls:
            continue
--- a/llama_stack/distributions/ci-tests/run.yaml
+++ b/llama_stack/distributions/ci-tests/run.yaml
@ -247,6 +247,7 @@ storage:
    conversations:
      table_name: openai_conversations
      backend: sql_default
 registered_resources:
  models: []
  shields:
  - shield_id: llama-guard
--- a/llama_stack/distributions/dell/run-with-safety.yaml
+++ b/llama_stack/distributions/dell/run-with-safety.yaml
@ -109,6 +109,7 @@ storage:
    conversations:
      table_name: openai_conversations
      backend: sql_default
 registered_resources:
  models:
  - metadata: {}
    model_id: ${env.INFERENCE_MODEL}
--- a/llama_stack/distributions/dell/run.yaml
+++ b/llama_stack/distributions/dell/run.yaml
@ -105,6 +105,7 @@ storage:
    conversations:
      table_name: openai_conversations
      backend: sql_default
 registered_resources:
  models:
  - metadata: {}
    model_id: ${env.INFERENCE_MODEL}
--- a/llama_stack/distributions/meta-reference-gpu/run-with-safety.yaml
+++ b/llama_stack/distributions/meta-reference-gpu/run-with-safety.yaml
@ -122,6 +122,7 @@ storage:
    conversations:
      table_name: openai_conversations
      backend: sql_default
 registered_resources:
  models:
  - metadata: {}
    model_id: ${env.INFERENCE_MODEL}
--- a/llama_stack/distributions/meta-reference-gpu/run.yaml
+++ b/llama_stack/distributions/meta-reference-gpu/run.yaml
@ -112,6 +112,7 @@ storage:
    conversations:
      table_name: openai_conversations
      backend: sql_default
 registered_resources:
  models:
  - metadata: {}
    model_id: ${env.INFERENCE_MODEL}
--- a/llama_stack/distributions/nvidia/run-with-safety.yaml
+++ b/llama_stack/distributions/nvidia/run-with-safety.yaml
@ -111,6 +111,7 @@ storage:
    conversations:
      table_name: openai_conversations
      backend: sql_default
 registered_resources:
  models:
  - metadata: {}
    model_id: ${env.INFERENCE_MODEL}
--- a/llama_stack/distributions/nvidia/run.yaml
+++ b/llama_stack/distributions/nvidia/run.yaml
@ -100,6 +100,7 @@ storage:
    conversations:
      table_name: openai_conversations
      backend: sql_default
 registered_resources:
  models: []
  shields: []
  vector_dbs: []
--- a/llama_stack/distributions/open-benchmark/run.yaml
+++ b/llama_stack/distributions/open-benchmark/run.yaml
@ -142,6 +142,7 @@ storage:
    conversations:
      table_name: openai_conversations
      backend: sql_default
 registered_resources:
  models:
  - metadata: {}
    model_id: gpt-4o
--- a/llama_stack/distributions/postgres-demo/run.yaml
+++ b/llama_stack/distributions/postgres-demo/run.yaml
@ -87,6 +87,7 @@ storage:
    conversations:
      table_name: openai_conversations
      backend: sql_default
 registered_resources:
  models:
  - metadata: {}
    model_id: ${env.INFERENCE_MODEL}
--- a/llama_stack/distributions/starter-gpu/run.yaml
+++ b/llama_stack/distributions/starter-gpu/run.yaml
@ -250,6 +250,7 @@ storage:
    conversations:
      table_name: openai_conversations
      backend: sql_default
 registered_resources:
  models: []
  shields:
  - shield_id: llama-guard
--- a/llama_stack/distributions/starter/run.yaml
+++ b/llama_stack/distributions/starter/run.yaml
@ -247,6 +247,7 @@ storage:
    conversations:
      table_name: openai_conversations
      backend: sql_default
 registered_resources:
  models: []
  shields:
  - shield_id: llama-guard
--- a/llama_stack/distributions/template.py
+++ b/llama_stack/distributions/template.py
@ -272,6 +272,7 @@ class RunConfigSettings(BaseModel):
            "apis": apis,
            "providers": provider_configs,
            "storage": storage_config,
            "registered_resources": {
                "models": [m.model_dump(exclude_none=True) for m in (self.default_models or [])],
                "shields": [s.model_dump(exclude_none=True) for s in (self.default_shields or [])],
                "vector_dbs": [],
@ -279,6 +280,7 @@ class RunConfigSettings(BaseModel):
                "scoring_fns": [],
                "benchmarks": [b.model_dump(exclude_none=True) for b in (self.default_benchmarks or [])],
                "tool_groups": [t.model_dump(exclude_none=True) for t in (self.default_tool_groups or [])],
            },
            "server": {
                "port": 8321,
            },
--- a/llama_stack/distributions/watsonx/run.yaml
+++ b/llama_stack/distributions/watsonx/run.yaml
@ -115,6 +115,7 @@ storage:
    conversations:
      table_name: openai_conversations
      backend: sql_default
 registered_resources:
  models: []
  shields: []
  vector_dbs: []
--- a/llama_stack/providers/inline/telemetry/meta_reference/console_span_processor.py
+++ b/llama_stack/providers/inline/telemetry/meta_reference/console_span_processor.py
@ -1,75 +0,0 @@
 # Copyright (c) Meta Platforms, Inc. and affiliates.
 # All rights reserved.
 #
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 import json
 from datetime import UTC, datetime
 from opentelemetry.sdk.trace import ReadableSpan
 from opentelemetry.sdk.trace.export import SpanProcessor
 from opentelemetry.trace.status import StatusCode
 from llama_stack.log import get_logger
 logger = get_logger(name="console_span_processor", category="telemetry")
 class ConsoleSpanProcessor(SpanProcessor):
    def __init__(self, print_attributes: bool = False):
        self.print_attributes = print_attributes
    def on_start(self, span: ReadableSpan, parent_context=None) -> None:
        if span.attributes and span.attributes.get("__autotraced__"):
            return
        timestamp = datetime.fromtimestamp(span.start_time / 1e9, tz=UTC).strftime("%H:%M:%S.%f")[:-3]
        logger.info(f"[dim]{timestamp}[/dim] [bold magenta][START][/bold magenta] [dim]{span.name}[/dim]")
    def on_end(self, span: ReadableSpan) -> None:
        timestamp = datetime.fromtimestamp(span.end_time / 1e9, tz=UTC).strftime("%H:%M:%S.%f")[:-3]
        span_context = f"[dim]{timestamp}[/dim] [bold magenta][END][/bold magenta] [dim]{span.name}[/dim]"
        if span.status.status_code == StatusCode.ERROR:
            span_context += " [bold red][ERROR][/bold red]"
        elif span.status.status_code != StatusCode.UNSET:
            span_context += f" [{span.status.status_code}]"
        duration_ms = (span.end_time - span.start_time) / 1e6
        span_context += f" ({duration_ms:.2f}ms)"
        logger.info(span_context)
        if self.print_attributes and span.attributes:
            for key, value in span.attributes.items():
                if key.startswith("__"):
                    continue
                str_value = str(value)
                if len(str_value) > 1000:
                    str_value = str_value[:997] + "..."
                logger.info(f"    [dim]{key}[/dim]: {str_value}")
        for event in span.events:
            event_time = datetime.fromtimestamp(event.timestamp / 1e9, tz=UTC).strftime("%H:%M:%S.%f")[:-3]
            severity = event.attributes.get("severity", "info")
            message = event.attributes.get("message", event.name)
            if isinstance(message, dict) or isinstance(message, list):
                message = json.dumps(message, indent=2)
            severity_color = {
                "error": "red",
                "warn": "yellow",
                "info": "white",
                "debug": "dim",
            }.get(severity, "white")
            logger.info(f" {event_time} [bold {severity_color}][{severity.upper()}][/bold {severity_color}] {message}")
            if event.attributes:
                for key, value in event.attributes.items():
                    if key.startswith("__") or key in ["message", "severity"]:
                        continue
                    logger.info(f"[dim]{key}[/dim]: {value}")
    def shutdown(self) -> None:
        """Shutdown the processor."""
        pass
    def force_flush(self, timeout_millis: float | None = None) -> bool:
        """Force flush any pending spans."""
        return True