Merge branch 'main' into add-mcp-authentication-param

This commit is contained in:
Omar Abdelwahab 2025-11-05 14:12:32 -08:00 committed by GitHub
commit 411b18a90f
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
515 changed files with 1213 additions and 370490 deletions

View file

@ -6,26 +6,22 @@
from .conversations import (
Conversation,
ConversationCreateRequest,
ConversationDeletedResource,
ConversationItem,
ConversationItemCreateRequest,
ConversationItemDeletedResource,
ConversationItemList,
Conversations,
ConversationUpdateRequest,
Metadata,
)
__all__ = [
"Conversation",
"ConversationCreateRequest",
"ConversationDeletedResource",
"ConversationItem",
"ConversationItemCreateRequest",
"ConversationItemDeletedResource",
"ConversationItemList",
"Conversations",
"ConversationUpdateRequest",
"Metadata",
]

View file

@ -102,32 +102,6 @@ register_schema(ConversationItem, name="ConversationItem")
# ]
@json_schema_type
class ConversationCreateRequest(BaseModel):
"""Request body for creating a conversation."""
items: list[ConversationItem] | None = Field(
default=[],
description="Initial items to include in the conversation context. You may add up to 20 items at a time.",
max_length=20,
)
metadata: Metadata | None = Field(
default={},
description="Set of 16 key-value pairs that can be attached to an object. Useful for storing additional information",
max_length=16,
)
@json_schema_type
class ConversationUpdateRequest(BaseModel):
"""Request body for updating a conversation."""
metadata: Metadata = Field(
...,
description="Set of 16 key-value pairs that can be attached to an object. This can be useful for storing additional information about the object in a structured format, and querying for objects via API or the dashboard. Keys are strings with a maximum length of 64 characters. Values are strings with a maximum length of 512 characters.",
)
@json_schema_type
class ConversationDeletedResource(BaseModel):
"""Response for deleted conversation."""

View file

@ -46,6 +46,10 @@ class StackListDeps(Subcommand):
def _run_stack_list_deps_command(self, args: argparse.Namespace) -> None:
# always keep implementation completely silo-ed away from CLI so CLI
# can be fast to load and reduces dependencies
if not args.config and not args.providers:
self.parser.print_help()
self.parser.exit()
from ._list_deps import run_stack_list_deps_command
return run_stack_list_deps_command(args)

View file

@ -9,48 +9,69 @@ from pathlib import Path
from llama_stack.cli.subcommand import Subcommand
from llama_stack.cli.table import print_table
from llama_stack.core.utils.config_dirs import DISTRIBS_BASE_DIR
class StackListBuilds(Subcommand):
"""List built stacks in .llama/distributions directory"""
"""List available distributions (both built-in and custom)"""
def __init__(self, subparsers: argparse._SubParsersAction):
super().__init__()
self.parser = subparsers.add_parser(
"list",
prog="llama stack list",
description="list the build stacks",
description="list available distributions",
formatter_class=argparse.ArgumentDefaultsHelpFormatter,
)
self._add_arguments()
self.parser.set_defaults(func=self._list_stack_command)
def _get_distribution_dirs(self) -> dict[str, Path]:
"""Return a dictionary of distribution names and their paths"""
distributions = {}
dist_dir = Path.home() / ".llama" / "distributions"
def _get_distribution_dirs(self) -> dict[str, tuple[Path, str]]:
"""Return a dictionary of distribution names and their paths with source type
Returns:
dict mapping distro name to (path, source_type) where source_type is 'built-in' or 'custom'
"""
distributions = {}
# Get built-in distributions from source code
distro_dir = Path(__file__).parent.parent.parent / "distributions"
if distro_dir.exists():
for stack_dir in distro_dir.iterdir():
if stack_dir.is_dir() and not stack_dir.name.startswith(".") and not stack_dir.name.startswith("__"):
distributions[stack_dir.name] = (stack_dir, "built-in")
# Get custom/run distributions from ~/.llama/distributions
# These override built-in ones if they have the same name
if DISTRIBS_BASE_DIR.exists():
for stack_dir in DISTRIBS_BASE_DIR.iterdir():
if stack_dir.is_dir() and not stack_dir.name.startswith("."):
# Clean up the name (remove llamastack- prefix if present)
name = stack_dir.name.replace("llamastack-", "")
distributions[name] = (stack_dir, "custom")
if dist_dir.exists():
for stack_dir in dist_dir.iterdir():
if stack_dir.is_dir():
distributions[stack_dir.name] = stack_dir
return distributions
def _list_stack_command(self, args: argparse.Namespace) -> None:
distributions = self._get_distribution_dirs()
if not distributions:
print("No stacks found in ~/.llama/distributions")
print("No distributions found")
return
headers = ["Stack Name", "Path"]
headers.extend(["Build Config", "Run Config"])
headers = ["Stack Name", "Source", "Path", "Build Config", "Run Config"]
rows = []
for name, path in distributions.items():
row = [name, str(path)]
for name, (path, source_type) in sorted(distributions.items()):
row = [name, source_type, str(path)]
# Check for build and run config files
build_config = "Yes" if (path / f"{name}-build.yaml").exists() else "No"
run_config = "Yes" if (path / f"{name}-run.yaml").exists() else "No"
# For built-in distributions, configs are named build.yaml and run.yaml
# For custom distributions, configs are named {name}-build.yaml and {name}-run.yaml
if source_type == "built-in":
build_config = "Yes" if (path / "build.yaml").exists() else "No"
run_config = "Yes" if (path / "run.yaml").exists() else "No"
else:
build_config = "Yes" if (path / f"{name}-build.yaml").exists() else "No"
run_config = "Yes" if (path / f"{name}-run.yaml").exists() else "No"
row.extend([build_config, run_config])
rows.append(row)
print_table(rows, headers, separate_rows=True)

View file

@ -427,6 +427,7 @@ _GLOBAL_STORAGE: dict[str, dict[str | int, Any]] = {
"counters": {},
"gauges": {},
"up_down_counters": {},
"histograms": {},
}
_global_lock = threading.Lock()
_TRACER_PROVIDER = None
@ -540,6 +541,16 @@ class Telemetry:
)
return cast(metrics.ObservableGauge, _GLOBAL_STORAGE["gauges"][name])
def _get_or_create_histogram(self, name: str, unit: str) -> metrics.Histogram:
assert self.meter is not None
if name not in _GLOBAL_STORAGE["histograms"]:
_GLOBAL_STORAGE["histograms"][name] = self.meter.create_histogram(
name=name,
unit=unit,
description=f"Histogram for {name}",
)
return cast(metrics.Histogram, _GLOBAL_STORAGE["histograms"][name])
def _log_metric(self, event: MetricEvent) -> None:
# Add metric as an event to the current span
try:
@ -571,7 +582,16 @@ class Telemetry:
# Log to OpenTelemetry meter if available
if self.meter is None:
return
if isinstance(event.value, int):
# Use histograms for token-related metrics (per-request measurements)
# Use counters for other cumulative metrics
token_metrics = {"prompt_tokens", "completion_tokens", "total_tokens"}
if event.metric in token_metrics:
# Token metrics are per-request measurements, use histogram
histogram = self._get_or_create_histogram(event.metric, event.unit)
histogram.record(event.value, attributes=_clean_attributes(event.attributes))
elif isinstance(event.value, int):
counter = self._get_or_create_counter(event.metric, event.unit)
counter.add(event.value, attributes=_clean_attributes(event.attributes))
elif isinstance(event.value, float):

View file

@ -283,8 +283,8 @@ class WatsonXInferenceAdapter(LiteLLMOpenAIMixin):
# ...
provider_resource_id = f"{self.__provider_id__}/{model_spec['model_id']}"
if "embedding" in functions:
embedding_dimension = model_spec["model_limits"]["embedding_dimension"]
context_length = model_spec["model_limits"]["max_sequence_length"]
embedding_dimension = model_spec.get("model_limits", {}).get("embedding_dimension", 0)
context_length = model_spec.get("model_limits", {}).get("max_sequence_length", 0)
embedding_metadata = {
"embedding_dimension": embedding_dimension,
"context_length": context_length,
@ -306,10 +306,6 @@ class WatsonXInferenceAdapter(LiteLLMOpenAIMixin):
metadata={},
model_type=ModelType.llm,
)
# In theory, I guess it is possible that a model could be both an embedding model and a text chat model.
# In that case, the cache will record the generator Model object, and the list which we return will have
# both the generator Model object and the text chat Model object. That's fine because the cache is
# only used for check_model_availability() anyway.
self._model_cache[provider_resource_id] = model
models.append(model)
return models

View file

@ -886,8 +886,8 @@ class OpenAIVectorStoreMixin(ABC):
# Determine pagination info
has_more = len(file_objects) > limit
first_id = file_objects[0].id if file_objects else None
last_id = file_objects[-1].id if file_objects else None
first_id = limited_files[0].id if file_objects else None
last_id = limited_files[-1].id if file_objects else None
return VectorStoreListFilesResponse(
data=limited_files,