mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-12-03 09:53:45 +00:00
Merge branch 'main' into add-mcp-authentication-param
This commit is contained in:
commit
411b18a90f
515 changed files with 1213 additions and 370490 deletions
|
|
@ -6,26 +6,22 @@
|
|||
|
||||
from .conversations import (
|
||||
Conversation,
|
||||
ConversationCreateRequest,
|
||||
ConversationDeletedResource,
|
||||
ConversationItem,
|
||||
ConversationItemCreateRequest,
|
||||
ConversationItemDeletedResource,
|
||||
ConversationItemList,
|
||||
Conversations,
|
||||
ConversationUpdateRequest,
|
||||
Metadata,
|
||||
)
|
||||
|
||||
__all__ = [
|
||||
"Conversation",
|
||||
"ConversationCreateRequest",
|
||||
"ConversationDeletedResource",
|
||||
"ConversationItem",
|
||||
"ConversationItemCreateRequest",
|
||||
"ConversationItemDeletedResource",
|
||||
"ConversationItemList",
|
||||
"Conversations",
|
||||
"ConversationUpdateRequest",
|
||||
"Metadata",
|
||||
]
|
||||
|
|
|
|||
|
|
@ -102,32 +102,6 @@ register_schema(ConversationItem, name="ConversationItem")
|
|||
# ]
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class ConversationCreateRequest(BaseModel):
|
||||
"""Request body for creating a conversation."""
|
||||
|
||||
items: list[ConversationItem] | None = Field(
|
||||
default=[],
|
||||
description="Initial items to include in the conversation context. You may add up to 20 items at a time.",
|
||||
max_length=20,
|
||||
)
|
||||
metadata: Metadata | None = Field(
|
||||
default={},
|
||||
description="Set of 16 key-value pairs that can be attached to an object. Useful for storing additional information",
|
||||
max_length=16,
|
||||
)
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class ConversationUpdateRequest(BaseModel):
|
||||
"""Request body for updating a conversation."""
|
||||
|
||||
metadata: Metadata = Field(
|
||||
...,
|
||||
description="Set of 16 key-value pairs that can be attached to an object. This can be useful for storing additional information about the object in a structured format, and querying for objects via API or the dashboard. Keys are strings with a maximum length of 64 characters. Values are strings with a maximum length of 512 characters.",
|
||||
)
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class ConversationDeletedResource(BaseModel):
|
||||
"""Response for deleted conversation."""
|
||||
|
|
|
|||
|
|
@ -46,6 +46,10 @@ class StackListDeps(Subcommand):
|
|||
def _run_stack_list_deps_command(self, args: argparse.Namespace) -> None:
|
||||
# always keep implementation completely silo-ed away from CLI so CLI
|
||||
# can be fast to load and reduces dependencies
|
||||
if not args.config and not args.providers:
|
||||
self.parser.print_help()
|
||||
self.parser.exit()
|
||||
|
||||
from ._list_deps import run_stack_list_deps_command
|
||||
|
||||
return run_stack_list_deps_command(args)
|
||||
|
|
|
|||
|
|
@ -9,48 +9,69 @@ from pathlib import Path
|
|||
|
||||
from llama_stack.cli.subcommand import Subcommand
|
||||
from llama_stack.cli.table import print_table
|
||||
from llama_stack.core.utils.config_dirs import DISTRIBS_BASE_DIR
|
||||
|
||||
|
||||
class StackListBuilds(Subcommand):
|
||||
"""List built stacks in .llama/distributions directory"""
|
||||
"""List available distributions (both built-in and custom)"""
|
||||
|
||||
def __init__(self, subparsers: argparse._SubParsersAction):
|
||||
super().__init__()
|
||||
self.parser = subparsers.add_parser(
|
||||
"list",
|
||||
prog="llama stack list",
|
||||
description="list the build stacks",
|
||||
description="list available distributions",
|
||||
formatter_class=argparse.ArgumentDefaultsHelpFormatter,
|
||||
)
|
||||
self._add_arguments()
|
||||
self.parser.set_defaults(func=self._list_stack_command)
|
||||
|
||||
def _get_distribution_dirs(self) -> dict[str, Path]:
|
||||
"""Return a dictionary of distribution names and their paths"""
|
||||
distributions = {}
|
||||
dist_dir = Path.home() / ".llama" / "distributions"
|
||||
def _get_distribution_dirs(self) -> dict[str, tuple[Path, str]]:
|
||||
"""Return a dictionary of distribution names and their paths with source type
|
||||
|
||||
Returns:
|
||||
dict mapping distro name to (path, source_type) where source_type is 'built-in' or 'custom'
|
||||
"""
|
||||
distributions = {}
|
||||
|
||||
# Get built-in distributions from source code
|
||||
distro_dir = Path(__file__).parent.parent.parent / "distributions"
|
||||
if distro_dir.exists():
|
||||
for stack_dir in distro_dir.iterdir():
|
||||
if stack_dir.is_dir() and not stack_dir.name.startswith(".") and not stack_dir.name.startswith("__"):
|
||||
distributions[stack_dir.name] = (stack_dir, "built-in")
|
||||
|
||||
# Get custom/run distributions from ~/.llama/distributions
|
||||
# These override built-in ones if they have the same name
|
||||
if DISTRIBS_BASE_DIR.exists():
|
||||
for stack_dir in DISTRIBS_BASE_DIR.iterdir():
|
||||
if stack_dir.is_dir() and not stack_dir.name.startswith("."):
|
||||
# Clean up the name (remove llamastack- prefix if present)
|
||||
name = stack_dir.name.replace("llamastack-", "")
|
||||
distributions[name] = (stack_dir, "custom")
|
||||
|
||||
if dist_dir.exists():
|
||||
for stack_dir in dist_dir.iterdir():
|
||||
if stack_dir.is_dir():
|
||||
distributions[stack_dir.name] = stack_dir
|
||||
return distributions
|
||||
|
||||
def _list_stack_command(self, args: argparse.Namespace) -> None:
|
||||
distributions = self._get_distribution_dirs()
|
||||
|
||||
if not distributions:
|
||||
print("No stacks found in ~/.llama/distributions")
|
||||
print("No distributions found")
|
||||
return
|
||||
|
||||
headers = ["Stack Name", "Path"]
|
||||
headers.extend(["Build Config", "Run Config"])
|
||||
headers = ["Stack Name", "Source", "Path", "Build Config", "Run Config"]
|
||||
rows = []
|
||||
for name, path in distributions.items():
|
||||
row = [name, str(path)]
|
||||
for name, (path, source_type) in sorted(distributions.items()):
|
||||
row = [name, source_type, str(path)]
|
||||
# Check for build and run config files
|
||||
build_config = "Yes" if (path / f"{name}-build.yaml").exists() else "No"
|
||||
run_config = "Yes" if (path / f"{name}-run.yaml").exists() else "No"
|
||||
# For built-in distributions, configs are named build.yaml and run.yaml
|
||||
# For custom distributions, configs are named {name}-build.yaml and {name}-run.yaml
|
||||
if source_type == "built-in":
|
||||
build_config = "Yes" if (path / "build.yaml").exists() else "No"
|
||||
run_config = "Yes" if (path / "run.yaml").exists() else "No"
|
||||
else:
|
||||
build_config = "Yes" if (path / f"{name}-build.yaml").exists() else "No"
|
||||
run_config = "Yes" if (path / f"{name}-run.yaml").exists() else "No"
|
||||
row.extend([build_config, run_config])
|
||||
rows.append(row)
|
||||
print_table(rows, headers, separate_rows=True)
|
||||
|
|
|
|||
|
|
@ -427,6 +427,7 @@ _GLOBAL_STORAGE: dict[str, dict[str | int, Any]] = {
|
|||
"counters": {},
|
||||
"gauges": {},
|
||||
"up_down_counters": {},
|
||||
"histograms": {},
|
||||
}
|
||||
_global_lock = threading.Lock()
|
||||
_TRACER_PROVIDER = None
|
||||
|
|
@ -540,6 +541,16 @@ class Telemetry:
|
|||
)
|
||||
return cast(metrics.ObservableGauge, _GLOBAL_STORAGE["gauges"][name])
|
||||
|
||||
def _get_or_create_histogram(self, name: str, unit: str) -> metrics.Histogram:
|
||||
assert self.meter is not None
|
||||
if name not in _GLOBAL_STORAGE["histograms"]:
|
||||
_GLOBAL_STORAGE["histograms"][name] = self.meter.create_histogram(
|
||||
name=name,
|
||||
unit=unit,
|
||||
description=f"Histogram for {name}",
|
||||
)
|
||||
return cast(metrics.Histogram, _GLOBAL_STORAGE["histograms"][name])
|
||||
|
||||
def _log_metric(self, event: MetricEvent) -> None:
|
||||
# Add metric as an event to the current span
|
||||
try:
|
||||
|
|
@ -571,7 +582,16 @@ class Telemetry:
|
|||
# Log to OpenTelemetry meter if available
|
||||
if self.meter is None:
|
||||
return
|
||||
if isinstance(event.value, int):
|
||||
|
||||
# Use histograms for token-related metrics (per-request measurements)
|
||||
# Use counters for other cumulative metrics
|
||||
token_metrics = {"prompt_tokens", "completion_tokens", "total_tokens"}
|
||||
|
||||
if event.metric in token_metrics:
|
||||
# Token metrics are per-request measurements, use histogram
|
||||
histogram = self._get_or_create_histogram(event.metric, event.unit)
|
||||
histogram.record(event.value, attributes=_clean_attributes(event.attributes))
|
||||
elif isinstance(event.value, int):
|
||||
counter = self._get_or_create_counter(event.metric, event.unit)
|
||||
counter.add(event.value, attributes=_clean_attributes(event.attributes))
|
||||
elif isinstance(event.value, float):
|
||||
|
|
|
|||
|
|
@ -283,8 +283,8 @@ class WatsonXInferenceAdapter(LiteLLMOpenAIMixin):
|
|||
# ...
|
||||
provider_resource_id = f"{self.__provider_id__}/{model_spec['model_id']}"
|
||||
if "embedding" in functions:
|
||||
embedding_dimension = model_spec["model_limits"]["embedding_dimension"]
|
||||
context_length = model_spec["model_limits"]["max_sequence_length"]
|
||||
embedding_dimension = model_spec.get("model_limits", {}).get("embedding_dimension", 0)
|
||||
context_length = model_spec.get("model_limits", {}).get("max_sequence_length", 0)
|
||||
embedding_metadata = {
|
||||
"embedding_dimension": embedding_dimension,
|
||||
"context_length": context_length,
|
||||
|
|
@ -306,10 +306,6 @@ class WatsonXInferenceAdapter(LiteLLMOpenAIMixin):
|
|||
metadata={},
|
||||
model_type=ModelType.llm,
|
||||
)
|
||||
# In theory, I guess it is possible that a model could be both an embedding model and a text chat model.
|
||||
# In that case, the cache will record the generator Model object, and the list which we return will have
|
||||
# both the generator Model object and the text chat Model object. That's fine because the cache is
|
||||
# only used for check_model_availability() anyway.
|
||||
self._model_cache[provider_resource_id] = model
|
||||
models.append(model)
|
||||
return models
|
||||
|
|
|
|||
|
|
@ -886,8 +886,8 @@ class OpenAIVectorStoreMixin(ABC):
|
|||
|
||||
# Determine pagination info
|
||||
has_more = len(file_objects) > limit
|
||||
first_id = file_objects[0].id if file_objects else None
|
||||
last_id = file_objects[-1].id if file_objects else None
|
||||
first_id = limited_files[0].id if file_objects else None
|
||||
last_id = limited_files[-1].id if file_objects else None
|
||||
|
||||
return VectorStoreListFilesResponse(
|
||||
data=limited_files,
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue