From e13c92f269cc1cc404f39a20334217ba9e7e19d7 Mon Sep 17 00:00:00 2001 From: Ashwin Bharambe Date: Tue, 11 Mar 2025 09:58:25 -0700 Subject: [PATCH 01/11] revert: feat(server): Use system packages for execution (#1551) Reverts meta-llama/llama-stack#1252 The above PR breaks the following invocation: ```bash llama stack run ~/.llama/distributions/together/together-run.yaml ``` --- llama_stack/cli/stack/run.py | 52 +++++++---------------- llama_stack/distribution/server/server.py | 28 +++--------- 2 files changed, 21 insertions(+), 59 deletions(-) diff --git a/llama_stack/cli/stack/run.py b/llama_stack/cli/stack/run.py index 1e4f3c5d9..e5686fb10 100644 --- a/llama_stack/cli/stack/run.py +++ b/llama_stack/cli/stack/run.py @@ -56,6 +56,7 @@ class StackRun(Subcommand): "--env", action="append", help="Environment variables to pass to the server in KEY=VALUE format. Can be specified multiple times.", + default=[], metavar="KEY=VALUE", ) self.parser.add_argument( @@ -73,6 +74,7 @@ class StackRun(Subcommand): type=str, help="Image Type used during the build. This can be either conda or container or venv.", choices=["conda", "container", "venv"], + default="conda", ) def _run_stack_run_cmd(self, args: argparse.Namespace) -> None: @@ -118,42 +120,20 @@ class StackRun(Subcommand): except AttributeError as e: self.parser.error(f"failed to parse config file '{config_file}':\n {e}") - # If neither image type nor image name is provided, assume the server should be run directly - # using the current environment packages. - if not args.image_type and not args.image_name: - logger.info("No image type or image name provided. Assuming environment packages.") - from llama_stack.distribution.server.server import main as server_main + run_args = formulate_run_args(args.image_type, args.image_name, config, template_name) - # Build the server args from the current args passed to the CLI - server_args = argparse.Namespace() - for arg in vars(args): - # If this is a function, avoid passing it - # "args" contains: - # func=> - if callable(getattr(args, arg)): - continue - setattr(server_args, arg, getattr(args, arg)) + run_args.extend([str(config_file), str(args.port)]) + if args.disable_ipv6: + run_args.append("--disable-ipv6") - # Run the server - server_main(server_args) - else: - run_args = formulate_run_args(args.image_type, args.image_name, config, template_name) + for env_var in args.env: + if "=" not in env_var: + self.parser.error(f"Environment variable '{env_var}' must be in KEY=VALUE format") + key, value = env_var.split("=", 1) # split on first = only + if not key: + self.parser.error(f"Environment variable '{env_var}' has empty key") + run_args.extend(["--env", f"{key}={value}"]) - run_args.extend([str(config_file), str(args.port)]) - if args.disable_ipv6: - run_args.append("--disable-ipv6") - - if args.env: - for env_var in args.env: - if "=" not in env_var: - self.parser.error(f"Environment variable '{env_var}' must be in KEY=VALUE format") - return - key, value = env_var.split("=", 1) # split on first = only - if not key: - self.parser.error(f"Environment variable '{env_var}' has empty key") - return - run_args.extend(["--env", f"{key}={value}"]) - - if args.tls_keyfile and args.tls_certfile: - run_args.extend(["--tls-keyfile", args.tls_keyfile, "--tls-certfile", args.tls_certfile]) - run_with_pty(run_args) + if args.tls_keyfile and args.tls_certfile: + run_args.extend(["--tls-keyfile", args.tls_keyfile, "--tls-certfile", args.tls_certfile]) + run_with_pty(run_args) diff --git a/llama_stack/distribution/server/server.py b/llama_stack/distribution/server/server.py index 6b99d908d..f819d446f 100644 --- a/llama_stack/distribution/server/server.py +++ b/llama_stack/distribution/server/server.py @@ -17,7 +17,7 @@ import warnings from contextlib import asynccontextmanager from importlib.metadata import version as parse_version from pathlib import Path -from typing import Any, List, Optional, Union +from typing import Any, List, Union import yaml from fastapi import Body, FastAPI, HTTPException, Request @@ -314,17 +314,11 @@ class ClientVersionMiddleware: return await self.app(scope, receive, send) -def main(args: Optional[argparse.Namespace] = None): +def main(): """Start the LlamaStack server.""" parser = argparse.ArgumentParser(description="Start the LlamaStack server.") parser.add_argument( "--yaml-config", - dest="config", - help="(Deprecated) Path to YAML configuration file - use --config instead", - ) - parser.add_argument( - "--config", - dest="config", help="Path to YAML configuration file", ) parser.add_argument( @@ -354,19 +348,7 @@ def main(args: Optional[argparse.Namespace] = None): required="--tls-keyfile" in sys.argv, ) - # Determine whether the server args are being passed by the "run" command, if this is the case - # the args will be passed as a Namespace object to the main function, otherwise they will be - # parsed from the command line - if args is None: - args = parser.parse_args() - - # Check for deprecated argument usage - if "--yaml-config" in sys.argv: - warnings.warn( - "The '--yaml-config' argument is deprecated and will be removed in a future version. Use '--config' instead.", - DeprecationWarning, - stacklevel=2, - ) + args = parser.parse_args() if args.env: for env_pair in args.env: @@ -378,9 +360,9 @@ def main(args: Optional[argparse.Namespace] = None): logger.error(f"Error: {str(e)}") sys.exit(1) - if args.config: + if args.yaml_config: # if the user provided a config file, use it, even if template was specified - config_file = Path(args.config) + config_file = Path(args.yaml_config) if not config_file.exists(): raise ValueError(f"Config file {config_file} does not exist") logger.info(f"Using config file: {config_file}") From 0e73186a114a253a24a7638c1b6b9ad6e54b6e59 Mon Sep 17 00:00:00 2001 From: Ihar Hrachyshka Date: Tue, 11 Mar 2025 13:01:09 -0400 Subject: [PATCH 02/11] fix: Add missing shutdown handler for TorchtunePostTrainingImpl (#1535) # What does this PR do? Added missing shutdown handler. (Currently empty.) Without it, when server shuts down, it posts the following warning: ``` __main__:129 server: No shutdown method for TorchtunePostTrainingImpl ``` Signed-off-by: Ihar Hrachyshka [//]: # (If resolving an issue, uncomment and update the line below) [//]: # (Closes #[issue-number]) ## Test Plan (The test plan assumes shutdown logic is fixed, see #1495) Without the patch: ``` INFO: Uvicorn running on http://['::', '0.0.0.0']:8321 (Press CTRL+C to quit) INFO: Shutting down INFO: Waiting for application shutdown. INFO 2025-03-10 20:56:43,961 __main__:140 server: Shutting down INFO 2025-03-10 20:56:43,962 __main__:124 server: Shutting down DatasetsRoutingTable INFO 2025-03-10 20:56:43,964 __main__:124 server: Shutting down DatasetIORouter INFO 2025-03-10 20:56:43,965 __main__:124 server: Shutting down ScoringFunctionsRoutingTable INFO 2025-03-10 20:56:43,966 __main__:124 server: Shutting down ScoringRouter INFO 2025-03-10 20:56:43,967 __main__:124 server: Shutting down ModelsRoutingTable INFO 2025-03-10 20:56:43,968 __main__:124 server: Shutting down InferenceRouter INFO 2025-03-10 20:56:43,969 __main__:124 server: Shutting down ShieldsRoutingTable INFO 2025-03-10 20:56:43,971 __main__:124 server: Shutting down SafetyRouter INFO 2025-03-10 20:56:43,972 __main__:124 server: Shutting down VectorDBsRoutingTable INFO 2025-03-10 20:56:43,973 __main__:124 server: Shutting down VectorIORouter INFO 2025-03-10 20:56:43,974 __main__:124 server: Shutting down ToolGroupsRoutingTable INFO 2025-03-10 20:56:43,975 __main__:124 server: Shutting down ToolRuntimeRouter INFO 2025-03-10 20:56:43,976 __main__:124 server: Shutting down MetaReferenceAgentsImpl INFO 2025-03-10 20:56:43,977 __main__:124 server: Shutting down TelemetryAdapter INFO 2025-03-10 20:56:43,978 __main__:124 server: Shutting down TorchtunePostTrainingImpl WARNING 2025-03-10 20:56:43,979 __main__:129 server: No shutdown method for TorchtunePostTrainingImpl INFO 2025-03-10 20:56:43,979 __main__:124 server: Shutting down BenchmarksRoutingTable INFO 2025-03-10 20:56:43,980 __main__:124 server: Shutting down EvalRouter INFO 2025-03-10 20:56:43,981 __main__:124 server: Shutting down DistributionInspectImpl INFO: Application shutdown complete. INFO: Finished server process [33862] ``` Run with the patch and observe no warning: ``` $ kill -INT $(ps ax | grep llama_stack.distribution.server.server | grep -v nvim | awk -e '{print $1}' | sort | head -n 1) ``` ``` INFO: Uvicorn running on http://['::', '0.0.0.0']:8321 (Press CTRL+C to quit) INFO: Shutting down INFO: Waiting for application shutdown. INFO 2025-03-11 00:32:56,863 __main__:140 server: Shutting down INFO 2025-03-11 00:32:56,864 __main__:124 server: Shutting down DatasetsRoutingTable INFO 2025-03-11 00:32:56,866 __main__:124 server: Shutting down DatasetIORouter INFO 2025-03-11 00:32:56,867 __main__:124 server: Shutting down ScoringFunctionsRoutingTable INFO 2025-03-11 00:32:56,868 __main__:124 server: Shutting down ScoringRouter INFO 2025-03-11 00:32:56,869 __main__:124 server: Shutting down ModelsRoutingTable INFO 2025-03-11 00:32:56,870 __main__:124 server: Shutting down InferenceRouter INFO 2025-03-11 00:32:56,871 __main__:124 server: Shutting down ShieldsRoutingTable INFO 2025-03-11 00:32:56,872 __main__:124 server: Shutting down SafetyRouter INFO 2025-03-11 00:32:56,873 __main__:124 server: Shutting down VectorDBsRoutingTable INFO 2025-03-11 00:32:56,874 __main__:124 server: Shutting down VectorIORouter INFO 2025-03-11 00:32:56,875 __main__:124 server: Shutting down ToolGroupsRoutingTable INFO 2025-03-11 00:32:56,876 __main__:124 server: Shutting down ToolRuntimeRouter INFO 2025-03-11 00:32:56,877 __main__:124 server: Shutting down MetaReferenceAgentsImpl INFO 2025-03-11 00:32:56,878 __main__:124 server: Shutting down TelemetryAdapter INFO 2025-03-11 00:32:56,879 __main__:124 server: Shutting down TorchtunePostTrainingImpl INFO 2025-03-11 00:32:56,880 __main__:124 server: Shutting down BenchmarksRoutingTable INFO 2025-03-11 00:32:56,881 __main__:124 server: Shutting down EvalRouter INFO 2025-03-11 00:32:56,882 __main__:124 server: Shutting down DistributionInspectImpl ``` [//]: # (## Documentation) Signed-off-by: Ihar Hrachyshka --- .../providers/inline/post_training/torchtune/post_training.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/llama_stack/providers/inline/post_training/torchtune/post_training.py b/llama_stack/providers/inline/post_training/torchtune/post_training.py index b837362d7..3a1affc91 100644 --- a/llama_stack/providers/inline/post_training/torchtune/post_training.py +++ b/llama_stack/providers/inline/post_training/torchtune/post_training.py @@ -43,6 +43,9 @@ class TorchtunePostTrainingImpl: self.jobs = {} self.checkpoints_dict = {} + async def shutdown(self): + pass + async def supervised_fine_tune( self, job_uuid: str, From 04106b94aab2bf550a39047370bf75e724b4114f Mon Sep 17 00:00:00 2001 From: Ihar Hrachyshka Date: Tue, 11 Mar 2025 13:01:46 -0400 Subject: [PATCH 03/11] docs: Remove duplicate docs on api docs generator (#1534) # What does this PR do? Since #892, we also need to install ruamel. Instead of maintaining the list of script dependencies in multiple places, remove it and assume developers read CONTRIBUTING.md docs. [//]: # (If resolving an issue, uncomment and update the line below) [//]: # (Closes #[issue-number]) ## Test Plan Just docs. [//]: # (## Documentation) Signed-off-by: Ihar Hrachyshka --- CONTRIBUTING.md | 3 +-- docs/openapi_generator/README.md | 8 -------- 2 files changed, 1 insertion(+), 10 deletions(-) diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index e639328f0..7c0b5d94e 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -159,8 +159,7 @@ uv run sphinx-autobuild source build/html --write-all If you modify or add new API endpoints, update the API documentation accordingly. You can do this by running the following command: ```bash -uv sync --extra dev -uv run ./docs/openapi_generator/run_openapi_generator.sh +uv run --with ".[dev]" ./docs/openapi_generator/run_openapi_generator.sh ``` The generated API documentation will be available in `docs/_static/`. Make sure to review the changes before committing. diff --git a/docs/openapi_generator/README.md b/docs/openapi_generator/README.md index 298df3ce0..7888e7828 100644 --- a/docs/openapi_generator/README.md +++ b/docs/openapi_generator/README.md @@ -1,9 +1 @@ The RFC Specification (OpenAPI format) is generated from the set of API endpoints located in `llama_stack/distribution/server/endpoints.py` using the `generate.py` utility. - -Please install the following packages before running the script: - -``` -pip install fire PyYAML -``` - -Then simply run `sh run_openapi_generator.sh` From c3d7d17bc4c4d815537a8ca7a5530139dd93c664 Mon Sep 17 00:00:00 2001 From: Ihar Hrachyshka Date: Tue, 11 Mar 2025 13:07:28 -0400 Subject: [PATCH 04/11] chore: fix typing hints for get_provider_impl deps arguments (#1544) # What does this PR do? It's a dict that may contain different types, as per resolver:instantiate_provider implementation. (AFAIU it also never contains ProviderSpecs, but *instances* of provider implementations.) [//]: # (If resolving an issue, uncomment and update the line below) [//]: # (Closes #[issue-number]) ## Test Plan mypy passing if enabled checks for these modules. (See #1543) [//]: # (## Documentation) Signed-off-by: Ihar Hrachyshka --- .../providers/inline/agents/meta_reference/__init__.py | 6 +++--- llama_stack/providers/inline/datasetio/localfs/__init__.py | 4 +++- .../providers/inline/eval/meta_reference/__init__.py | 6 +++--- .../providers/inline/inference/meta_reference/__init__.py | 4 ++-- .../inline/inference/sentence_transformers/__init__.py | 4 +++- llama_stack/providers/inline/inference/vllm/__init__.py | 4 ++-- .../providers/inline/post_training/torchtune/__init__.py | 6 +++--- .../providers/inline/safety/code_scanner/__init__.py | 4 +++- llama_stack/providers/inline/safety/llama_guard/__init__.py | 4 +++- .../providers/inline/safety/prompt_guard/__init__.py | 4 +++- llama_stack/providers/inline/scoring/basic/__init__.py | 6 +++--- llama_stack/providers/inline/scoring/braintrust/__init__.py | 6 +++--- .../providers/inline/scoring/llm_as_judge/__init__.py | 6 +++--- .../inline/tool_runtime/code_interpreter/__init__.py | 4 +++- llama_stack/providers/inline/vector_io/chroma/__init__.py | 6 +++--- llama_stack/providers/inline/vector_io/faiss/__init__.py | 6 +++--- llama_stack/providers/inline/vector_io/milvus/__init__.py | 6 +++--- .../providers/inline/vector_io/sqlite_vec/__init__.py | 6 +++--- 18 files changed, 52 insertions(+), 40 deletions(-) diff --git a/llama_stack/providers/inline/agents/meta_reference/__init__.py b/llama_stack/providers/inline/agents/meta_reference/__init__.py index 8f8c24170..4be064f1d 100644 --- a/llama_stack/providers/inline/agents/meta_reference/__init__.py +++ b/llama_stack/providers/inline/agents/meta_reference/__init__.py @@ -4,14 +4,14 @@ # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. -from typing import Dict +from typing import Any, Dict -from llama_stack.distribution.datatypes import Api, ProviderSpec +from llama_stack.distribution.datatypes import Api from .config import MetaReferenceAgentsImplConfig -async def get_provider_impl(config: MetaReferenceAgentsImplConfig, deps: Dict[Api, ProviderSpec]): +async def get_provider_impl(config: MetaReferenceAgentsImplConfig, deps: Dict[Api, Any]): from .agents import MetaReferenceAgentsImpl impl = MetaReferenceAgentsImpl( diff --git a/llama_stack/providers/inline/datasetio/localfs/__init__.py b/llama_stack/providers/inline/datasetio/localfs/__init__.py index db8aa555c..5a0876d79 100644 --- a/llama_stack/providers/inline/datasetio/localfs/__init__.py +++ b/llama_stack/providers/inline/datasetio/localfs/__init__.py @@ -4,12 +4,14 @@ # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. +from typing import Any, Dict + from .config import LocalFSDatasetIOConfig async def get_provider_impl( config: LocalFSDatasetIOConfig, - _deps, + _deps: Dict[str, Any], ): from .datasetio import LocalFSDatasetIOImpl diff --git a/llama_stack/providers/inline/eval/meta_reference/__init__.py b/llama_stack/providers/inline/eval/meta_reference/__init__.py index 56c115322..e2a7fc2cd 100644 --- a/llama_stack/providers/inline/eval/meta_reference/__init__.py +++ b/llama_stack/providers/inline/eval/meta_reference/__init__.py @@ -3,16 +3,16 @@ # # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. -from typing import Dict +from typing import Any, Dict -from llama_stack.distribution.datatypes import Api, ProviderSpec +from llama_stack.distribution.datatypes import Api from .config import MetaReferenceEvalConfig async def get_provider_impl( config: MetaReferenceEvalConfig, - deps: Dict[Api, ProviderSpec], + deps: Dict[Api, Any], ): from .eval import MetaReferenceEvalImpl diff --git a/llama_stack/providers/inline/inference/meta_reference/__init__.py b/llama_stack/providers/inline/inference/meta_reference/__init__.py index 9c923490d..3ef7cfd45 100644 --- a/llama_stack/providers/inline/inference/meta_reference/__init__.py +++ b/llama_stack/providers/inline/inference/meta_reference/__init__.py @@ -4,14 +4,14 @@ # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. -from typing import Union +from typing import Any, Dict, Union from .config import MetaReferenceInferenceConfig, MetaReferenceQuantizedInferenceConfig async def get_provider_impl( config: Union[MetaReferenceInferenceConfig, MetaReferenceQuantizedInferenceConfig], - _deps, + _deps: Dict[str, Any], ): from .inference import MetaReferenceInferenceImpl diff --git a/llama_stack/providers/inline/inference/sentence_transformers/__init__.py b/llama_stack/providers/inline/inference/sentence_transformers/__init__.py index d5710f7fd..c1d65d10c 100644 --- a/llama_stack/providers/inline/inference/sentence_transformers/__init__.py +++ b/llama_stack/providers/inline/inference/sentence_transformers/__init__.py @@ -4,6 +4,8 @@ # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. +from typing import Any, Dict + from llama_stack.providers.inline.inference.sentence_transformers.config import ( SentenceTransformersInferenceConfig, ) @@ -11,7 +13,7 @@ from llama_stack.providers.inline.inference.sentence_transformers.config import async def get_provider_impl( config: SentenceTransformersInferenceConfig, - _deps, + _deps: Dict[str, Any], ): from .sentence_transformers import SentenceTransformersInferenceImpl diff --git a/llama_stack/providers/inline/inference/vllm/__init__.py b/llama_stack/providers/inline/inference/vllm/__init__.py index aa0c4b101..bd0551e57 100644 --- a/llama_stack/providers/inline/inference/vllm/__init__.py +++ b/llama_stack/providers/inline/inference/vllm/__init__.py @@ -4,12 +4,12 @@ # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. -from typing import Any +from typing import Any, Dict from .config import VLLMConfig -async def get_provider_impl(config: VLLMConfig, _deps) -> Any: +async def get_provider_impl(config: VLLMConfig, _deps: Dict[str, Any]): from .vllm import VLLMInferenceImpl impl = VLLMInferenceImpl(config) diff --git a/llama_stack/providers/inline/post_training/torchtune/__init__.py b/llama_stack/providers/inline/post_training/torchtune/__init__.py index 7ef8eee01..ca7801be7 100644 --- a/llama_stack/providers/inline/post_training/torchtune/__init__.py +++ b/llama_stack/providers/inline/post_training/torchtune/__init__.py @@ -4,9 +4,9 @@ # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. -from typing import Dict +from typing import Any, Dict -from llama_stack.distribution.datatypes import Api, ProviderSpec +from llama_stack.distribution.datatypes import Api from .config import TorchtunePostTrainingConfig @@ -15,7 +15,7 @@ from .config import TorchtunePostTrainingConfig async def get_provider_impl( config: TorchtunePostTrainingConfig, - deps: Dict[Api, ProviderSpec], + deps: Dict[Api, Any], ): from .post_training import TorchtunePostTrainingImpl diff --git a/llama_stack/providers/inline/safety/code_scanner/__init__.py b/llama_stack/providers/inline/safety/code_scanner/__init__.py index 031130cb7..62975a963 100644 --- a/llama_stack/providers/inline/safety/code_scanner/__init__.py +++ b/llama_stack/providers/inline/safety/code_scanner/__init__.py @@ -4,10 +4,12 @@ # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. +from typing import Any, Dict + from .config import CodeScannerConfig -async def get_provider_impl(config: CodeScannerConfig, deps): +async def get_provider_impl(config: CodeScannerConfig, deps: Dict[str, Any]): from .code_scanner import MetaReferenceCodeScannerSafetyImpl impl = MetaReferenceCodeScannerSafetyImpl(config, deps) diff --git a/llama_stack/providers/inline/safety/llama_guard/__init__.py b/llama_stack/providers/inline/safety/llama_guard/__init__.py index ee9ee31e6..a4263b169 100644 --- a/llama_stack/providers/inline/safety/llama_guard/__init__.py +++ b/llama_stack/providers/inline/safety/llama_guard/__init__.py @@ -4,10 +4,12 @@ # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. +from typing import Any, Dict + from .config import LlamaGuardConfig -async def get_provider_impl(config: LlamaGuardConfig, deps): +async def get_provider_impl(config: LlamaGuardConfig, deps: Dict[str, Any]): from .llama_guard import LlamaGuardSafetyImpl assert isinstance(config, LlamaGuardConfig), f"Unexpected config type: {type(config)}" diff --git a/llama_stack/providers/inline/safety/prompt_guard/__init__.py b/llama_stack/providers/inline/safety/prompt_guard/__init__.py index 087aca6d9..747f34421 100644 --- a/llama_stack/providers/inline/safety/prompt_guard/__init__.py +++ b/llama_stack/providers/inline/safety/prompt_guard/__init__.py @@ -4,10 +4,12 @@ # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. +from typing import Any, Dict + from .config import PromptGuardConfig # noqa: F401 -async def get_provider_impl(config: PromptGuardConfig, deps): +async def get_provider_impl(config: PromptGuardConfig, deps: Dict[str, Any]): from .prompt_guard import PromptGuardSafetyImpl impl = PromptGuardSafetyImpl(config, deps) diff --git a/llama_stack/providers/inline/scoring/basic/__init__.py b/llama_stack/providers/inline/scoring/basic/__init__.py index c72434e9e..4898b973a 100644 --- a/llama_stack/providers/inline/scoring/basic/__init__.py +++ b/llama_stack/providers/inline/scoring/basic/__init__.py @@ -3,16 +3,16 @@ # # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. -from typing import Dict +from typing import Any, Dict -from llama_stack.distribution.datatypes import Api, ProviderSpec +from llama_stack.distribution.datatypes import Api from .config import BasicScoringConfig async def get_provider_impl( config: BasicScoringConfig, - deps: Dict[Api, ProviderSpec], + deps: Dict[Api, Any], ): from .scoring import BasicScoringImpl diff --git a/llama_stack/providers/inline/scoring/braintrust/__init__.py b/llama_stack/providers/inline/scoring/braintrust/__init__.py index 2ddc58bd2..f1b0112d9 100644 --- a/llama_stack/providers/inline/scoring/braintrust/__init__.py +++ b/llama_stack/providers/inline/scoring/braintrust/__init__.py @@ -3,11 +3,11 @@ # # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. -from typing import Dict +from typing import Any, Dict from pydantic import BaseModel -from llama_stack.distribution.datatypes import Api, ProviderSpec +from llama_stack.distribution.datatypes import Api from .config import BraintrustScoringConfig @@ -18,7 +18,7 @@ class BraintrustProviderDataValidator(BaseModel): async def get_provider_impl( config: BraintrustScoringConfig, - deps: Dict[Api, ProviderSpec], + deps: Dict[Api, Any], ): from .braintrust import BraintrustScoringImpl diff --git a/llama_stack/providers/inline/scoring/llm_as_judge/__init__.py b/llama_stack/providers/inline/scoring/llm_as_judge/__init__.py index 18535332e..4a83bfe13 100644 --- a/llama_stack/providers/inline/scoring/llm_as_judge/__init__.py +++ b/llama_stack/providers/inline/scoring/llm_as_judge/__init__.py @@ -3,16 +3,16 @@ # # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. -from typing import Dict +from typing import Any, Dict -from llama_stack.distribution.datatypes import Api, ProviderSpec +from llama_stack.distribution.datatypes import Api from .config import LlmAsJudgeScoringConfig async def get_provider_impl( config: LlmAsJudgeScoringConfig, - deps: Dict[Api, ProviderSpec], + deps: Dict[Api, Any], ): from .scoring import LlmAsJudgeScoringImpl diff --git a/llama_stack/providers/inline/tool_runtime/code_interpreter/__init__.py b/llama_stack/providers/inline/tool_runtime/code_interpreter/__init__.py index 995358d46..8317ce793 100644 --- a/llama_stack/providers/inline/tool_runtime/code_interpreter/__init__.py +++ b/llama_stack/providers/inline/tool_runtime/code_interpreter/__init__.py @@ -4,12 +4,14 @@ # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. +from typing import Any, Dict + from .config import CodeInterpreterToolConfig __all__ = ["CodeInterpreterToolConfig", "CodeInterpreterToolRuntimeImpl"] -async def get_provider_impl(config: CodeInterpreterToolConfig, _deps): +async def get_provider_impl(config: CodeInterpreterToolConfig, _deps: Dict[str, Any]): from .code_interpreter import CodeInterpreterToolRuntimeImpl impl = CodeInterpreterToolRuntimeImpl(config) diff --git a/llama_stack/providers/inline/vector_io/chroma/__init__.py b/llama_stack/providers/inline/vector_io/chroma/__init__.py index abaf01097..f39188b46 100644 --- a/llama_stack/providers/inline/vector_io/chroma/__init__.py +++ b/llama_stack/providers/inline/vector_io/chroma/__init__.py @@ -4,14 +4,14 @@ # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. -from typing import Dict +from typing import Any, Dict -from llama_stack.providers.datatypes import Api, ProviderSpec +from llama_stack.providers.datatypes import Api from .config import ChromaVectorIOConfig -async def get_provider_impl(config: ChromaVectorIOConfig, deps: Dict[Api, ProviderSpec]): +async def get_provider_impl(config: ChromaVectorIOConfig, deps: Dict[Api, Any]): from llama_stack.providers.remote.vector_io.chroma.chroma import ( ChromaVectorIOAdapter, ) diff --git a/llama_stack/providers/inline/vector_io/faiss/__init__.py b/llama_stack/providers/inline/vector_io/faiss/__init__.py index f23e1fa4f..fc8ce70b4 100644 --- a/llama_stack/providers/inline/vector_io/faiss/__init__.py +++ b/llama_stack/providers/inline/vector_io/faiss/__init__.py @@ -4,14 +4,14 @@ # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. -from typing import Dict +from typing import Any, Dict -from llama_stack.providers.datatypes import Api, ProviderSpec +from llama_stack.providers.datatypes import Api from .config import FaissVectorIOConfig -async def get_provider_impl(config: FaissVectorIOConfig, deps: Dict[Api, ProviderSpec]): +async def get_provider_impl(config: FaissVectorIOConfig, deps: Dict[Api, Any]): from .faiss import FaissVectorIOAdapter assert isinstance(config, FaissVectorIOConfig), f"Unexpected config type: {type(config)}" diff --git a/llama_stack/providers/inline/vector_io/milvus/__init__.py b/llama_stack/providers/inline/vector_io/milvus/__init__.py index bee6b2ded..d88a3b005 100644 --- a/llama_stack/providers/inline/vector_io/milvus/__init__.py +++ b/llama_stack/providers/inline/vector_io/milvus/__init__.py @@ -4,14 +4,14 @@ # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. -from typing import Dict +from typing import Any, Dict -from llama_stack.providers.datatypes import Api, ProviderSpec +from llama_stack.providers.datatypes import Api from .config import MilvusVectorIOConfig -async def get_provider_impl(config: MilvusVectorIOConfig, deps: Dict[Api, ProviderSpec]): +async def get_provider_impl(config: MilvusVectorIOConfig, deps: Dict[Api, Any]): from llama_stack.providers.remote.vector_io.milvus.milvus import MilvusVectorIOAdapter impl = MilvusVectorIOAdapter(config, deps[Api.inference]) diff --git a/llama_stack/providers/inline/vector_io/sqlite_vec/__init__.py b/llama_stack/providers/inline/vector_io/sqlite_vec/__init__.py index 5a2f07012..2380eb0ef 100644 --- a/llama_stack/providers/inline/vector_io/sqlite_vec/__init__.py +++ b/llama_stack/providers/inline/vector_io/sqlite_vec/__init__.py @@ -4,14 +4,14 @@ # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. -from typing import Dict +from typing import Any, Dict -from llama_stack.providers.datatypes import Api, ProviderSpec +from llama_stack.providers.datatypes import Api from .config import SQLiteVectorIOConfig -async def get_provider_impl(config: SQLiteVectorIOConfig, deps: Dict[Api, ProviderSpec]): +async def get_provider_impl(config: SQLiteVectorIOConfig, deps: Dict[Api, Any]): from .sqlite_vec import SQLiteVecVectorIOAdapter assert isinstance(config, SQLiteVectorIOConfig), f"Unexpected config type: {type(config)}" From d33b8ea3dc652fdb1c6a9c94e42c5e2dfe36eb7f Mon Sep 17 00:00:00 2001 From: Kelly Brown <86735520+kelbrown20@users.noreply.github.com> Date: Tue, 11 Mar 2025 13:12:18 -0400 Subject: [PATCH 05/11] docs: Small nits in llama CLI reference (#1542) **Description:** Fixes some small nits in the llama CLI reference Note: There are a few nits in this PR, but also has some small suggestions, feel free to close if not necessary --- .../references/llama_cli_reference/index.md | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/docs/source/references/llama_cli_reference/index.md b/docs/source/references/llama_cli_reference/index.md index 8a38fc3ae..7b7abdf88 100644 --- a/docs/source/references/llama_cli_reference/index.md +++ b/docs/source/references/llama_cli_reference/index.md @@ -1,6 +1,6 @@ # llama (server-side) CLI Reference -The `llama` CLI tool helps you setup and use the Llama Stack. It should be available on your path after installing the `llama-stack` package. +The `llama` CLI tool helps you set up and use the Llama Stack. The CLI is available on your path after installing the `llama-stack` package. ## Installation @@ -27,9 +27,9 @@ You have two ways to install Llama Stack: ## `llama` subcommands -1. `download`: `llama` cli tools supports downloading the model from Meta or Hugging Face. -2. `model`: Lists available models and their properties. -3. `stack`: Allows you to build and run a Llama Stack server. You can read more about this [here](../../distributions/building_distro). +1. `download`: Supports downloading models from Meta or Hugging Face. [Downloading models](#downloading-models) +2. `model`: Lists available models and their properties. [Understanding models](#understand-the-models) +3. `stack`: Allows you to build a stack using the `llama stack` distribution and run a Llama Stack server. You can read more about how to build a Llama Stack distribution in the [Build your own Distribution](../../distributions/building_distro) documentation. ### Sample Usage @@ -117,7 +117,7 @@ You should see a table like this: +----------------------------------+------------------------------------------+----------------+ ``` -To download models, you can use the llama download command. +To download models, you can use the `llama download` command. ### Downloading from [Meta](https://llama.meta.com/llama-downloads/) @@ -191,7 +191,7 @@ You should see a table like this: The `llama model` command helps you explore the model’s interface. 1. `download`: Download the model from different sources. (meta, huggingface) -2. `list`: Lists all the models available for download with hardware requirements to deploy the models. +2. `list`: Lists all the models available for download with hardware requirements for deploying the models. 3. `prompt-format`: Show llama model message formats. 4. `describe`: Describes all the properties of the model. @@ -262,13 +262,12 @@ llama model prompt-format -m Llama3.2-3B-Instruct ![alt text](../../../resources/prompt-format.png) - You will be shown a Markdown formatted description of the model interface and how prompts / messages are formatted for various scenarios. **NOTE**: Outputs in terminal are color printed to show special tokens. ### Remove model -You can run `llama model remove` to remove unecessary model: +You can run `llama model remove` to remove an unnecessary model: ``` llama model remove -m Llama-Guard-3-8B-int8 From aca82df7edfbbedfafd0f0db354ee4161e959fed Mon Sep 17 00:00:00 2001 From: Ihar Hrachyshka Date: Tue, 11 Mar 2025 13:30:55 -0400 Subject: [PATCH 06/11] fix: Multiple fixes for server shutdown (fix lifespan handling; fix handling CancelledError when raised by provider; let uvicorn handle signals) (#1495) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit # What does this PR do? If implementation raises CancelledError (e.g. when it runs its own async loop for jobs), the main server shutdown handler gets confused and doesn't attempt to shut down the main loop tasks. While at it, also fixing the following failure when this happens: ``` UnboundLocalError: cannot access local variable 'loop' where it is not associated with a value ``` Shutdown handlers were not running because lifespan logic was broken since ~Oct 2024. Fixed that too and enforcing `lifespan` now (making sure server will crash when it fails to interact with app through middleware). [//]: # (If resolving an issue, uncomment and update the line below) [//]: # (Closes #[issue-number]) ## Test Plan Spotted while working on https://github.com/meta-llama/llama-stack/pull/1437 One way to trigger it without the PR above is to add `raise CancelledError` in any of the running providers' `shutdown` methods; then `kill -INT ` the server process. Validated this with the following test patch: ``` diff --git a/llama_stack/distribution/server/server.py b/llama_stack/distribution/server/server.py index b85c463a..10dad83e 100644 --- a/llama_stack/distribution/server/server.py +++ b/llama_stack/distribution/server/server.py @@ -174,6 +174,7 @@ def handle_signal(app, signum, _) -> None: except asyncio.CancelledError: pass finally: + logger.info("Stopping event loop") loop.stop() loop = asyncio.get_running_loop() diff --git a/llama_stack/providers/inline/post_training/torchtune/post_training.py b/llama_stack/providers/inline/post_training/torchtune/post_training.py index b837362d..163f43d8 100644 --- a/llama_stack/providers/inline/post_training/torchtune/post_training.py +++ b/llama_stack/providers/inline/post_training/torchtune/post_training.py @@ -3,6 +3,7 @@ # # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. +import asyncio from datetime import datetime from typing import Any, Dict, Optional @@ -43,6 +44,9 @@ class TorchtunePostTrainingImpl: self.jobs = {} self.checkpoints_dict = {} + async def shutdown(self) -> None: + raise asyncio.CancelledError("Shutdown") + async def supervised_fine_tune( self, job_uuid: str, ``` Without the fix: ``` INFO: Uvicorn running on http://['::', '0.0.0.0']:8321 (Press CTRL+C to quit) INFO: Shutting down INFO: Finished server process [52099] INFO 2025-03-07 23:25:33,548 __main__:143 server: Received signal SIGINT (2). Exiting gracefully... INFO 2025-03-07 23:25:33,550 __main__:150 server: Shutting down DatasetsRoutingTable INFO 2025-03-07 23:25:33,551 __main__:177 server: Stopping event loop ERROR 2025-03-07 23:25:33,552 asyncio:1785 uncategorized: unhandled exception during asyncio.run() shutdown task: .shutdown() done, defined at /home/ec2-user/src/llama-stack/schedule/llama_stack/distribution/server/server.py:145> exception=UnboundLocalError("cannot access local variable 'loop' where it is not associated with a value")> ╭───────────────────────────────────── Traceback (most recent call last) ─────────────────────────────────────╮ │ /home/ec2-user/src/llama-stack/schedule/llama_stack/distribution/server/server.py:178 in shutdown │ │ │ │ 175 │ │ │ pass │ │ 176 │ │ finally: │ │ 177 │ │ │ logger.info("Stopping event loop") │ │ ❱ 178 │ │ │ loop.stop() │ │ 179 │ │ │ 180 │ loop = asyncio.get_running_loop() │ │ 181 │ loop.create_task(shutdown()) │ ╰─────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ UnboundLocalError: cannot access local variable 'loop' where it is not associated with a value ``` With the fix, now seeing the following messages when the server is killed: ``` INFO: Uvicorn running on http://['::', '0.0.0.0']:8321 (Press CTRL+C to quit) INFO: Shutting down INFO: Finished server process [50836] INFO 2025-03-07 23:20:35,182 __main__:143 server: Received signal SIGINT (2). Exiting gracefully... INFO 2025-03-07 23:20:35,184 __main__:149 server: Shutting down DatasetsRoutingTable ERROR 2025-03-07 23:20:35,185 __main__:158 server: Failed to shutdown DatasetsRoutingTable: {CancelledError()} ╭───────────────────────────────────── Traceback (most recent call last) ─────────────────────────────────────╮ │ /usr/lib64/python3.11/asyncio/tasks.py:476 in wait_for │ │ │ │ 473 │ try: │ │ 474 │ │ # wait until the future completes or the timeout │ │ 475 │ │ try: │ │ ❱ 476 │ │ │ await waiter │ │ 477 │ │ except exceptions.CancelledError: │ │ 478 │ │ │ if fut.done(): │ │ 479 │ │ │ │ return fut.result() │ ╰─────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ CancelledError During handling of the above exception, another exception occurred: ╭───────────────────────────────────── Traceback (most recent call last) ─────────────────────────────────────╮ │ /home/ec2-user/src/llama-stack/schedule/llama_stack/distribution/server/server.py:152 in shutdown │ │ │ │ 149 │ │ │ logger.info("Shutting down %s", impl_name) │ │ 150 │ │ │ try: │ │ 151 │ │ │ │ if hasattr(impl, "shutdown"): │ │ ❱ 152 │ │ │ │ │ await asyncio.wait_for(impl.shutdown(), timeout=5) │ │ 153 │ │ │ │ else: │ │ 154 │ │ │ │ │ logger.warning("No shutdown method for %s", impl_name) │ │ 155 │ │ │ except asyncio.TimeoutError: │ │ │ │ /usr/lib64/python3.11/asyncio/tasks.py:479 in wait_for │ │ │ │ 476 │ │ │ await waiter │ │ 477 │ │ except exceptions.CancelledError: │ │ 478 │ │ │ if fut.done(): │ │ ❱ 479 │ │ │ │ return fut.result() │ │ 480 │ │ │ else: │ │ 481 │ │ │ │ fut.remove_done_callback(cb) │ │ 482 │ │ │ │ # We must ensure that the task is not running │ │ │ │ /home/ec2-user/src/llama-stack/schedule/llama_stack/distribution/routers/routing_tables.py:131 in shutdown │ │ │ │ 128 │ │ │ elif api == Api.tool_runtime: │ │ 129 │ │ │ │ p.tool_store = self │ │ 130 │ │ │ ❱ 131 │ async def shutdown(self) -> None: │ │ 132 │ │ for p in self.impls_by_provider_id.values(): │ │ 133 │ │ │ await p.shutdown() │ │ 134 │ ╰─────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ CancelledError INFO 2025-03-07 23:20:35,295 __main__:149 server: Shutting down DatasetIORouter INFO 2025-03-07 23:20:35,296 __main__:149 server: Shutting down ScoringFunctionsRoutingTable INFO 2025-03-07 23:20:35,297 __main__:149 server: Shutting down ScoringRouter INFO 2025-03-07 23:20:35,298 __main__:149 server: Shutting down ModelsRoutingTable INFO 2025-03-07 23:20:35,299 __main__:149 server: Shutting down InferenceRouter INFO 2025-03-07 23:20:35,300 __main__:149 server: Shutting down ShieldsRoutingTable INFO 2025-03-07 23:20:35,300 __main__:149 server: Shutting down SafetyRouter INFO 2025-03-07 23:20:35,301 __main__:149 server: Shutting down VectorDBsRoutingTable INFO 2025-03-07 23:20:35,302 __main__:149 server: Shutting down VectorIORouter INFO 2025-03-07 23:20:35,303 __main__:149 server: Shutting down ToolGroupsRoutingTable INFO 2025-03-07 23:20:35,304 __main__:149 server: Shutting down ToolRuntimeRouter INFO 2025-03-07 23:20:35,304 __main__:149 server: Shutting down MetaReferenceAgentsImpl INFO 2025-03-07 23:20:35,305 __main__:149 server: Shutting down TelemetryAdapter INFO 2025-03-07 23:20:35,306 __main__:149 server: Shutting down TorchtunePostTrainingImpl ERROR 2025-03-07 23:20:35,307 __main__:158 server: Failed to shutdown TorchtunePostTrainingImpl: {CancelledError('Shutdown')} ╭───────────────────────────────────── Traceback (most recent call last) ─────────────────────────────────────╮ │ /home/ec2-user/src/llama-stack/schedule/llama_stack/distribution/server/server.py:152 in shutdown │ │ │ │ 149 │ │ │ logger.info("Shutting down %s", impl_name) │ │ 150 │ │ │ try: │ │ 151 │ │ │ │ if hasattr(impl, "shutdown"): │ │ ❱ 152 │ │ │ │ │ await asyncio.wait_for(impl.shutdown(), timeout=5) │ │ 153 │ │ │ │ else: │ │ 154 │ │ │ │ │ logger.warning("No shutdown method for %s", impl_name) │ │ 155 │ │ │ except asyncio.TimeoutError: │ │ │ │ /usr/lib64/python3.11/asyncio/tasks.py:489 in wait_for │ │ │ │ 486 │ │ │ │ raise │ │ 487 │ │ │ │ 488 │ │ if fut.done(): │ │ ❱ 489 │ │ │ return fut.result() │ │ 490 │ │ else: │ │ 491 │ │ │ fut.remove_done_callback(cb) │ │ 492 │ │ │ # We must ensure that the task is not running │ │ │ │ /home/ec2-user/src/llama-stack/schedule/llama_stack/providers/inline/post_training/torchtune/post_training. │ │ py:48 in shutdown │ │ │ │ 45 │ │ self.checkpoints_dict = {} │ │ 46 │ │ │ 47 │ async def shutdown(self) -> None: │ │ ❱ 48 │ │ raise asyncio.CancelledError("Shutdown") │ │ 49 │ │ │ 50 │ async def supervised_fine_tune( │ │ 51 │ │ self, │ ╰─────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ CancelledError: Shutdown INFO 2025-03-07 23:20:35,352 __main__:149 server: Shutting down BenchmarksRoutingTable INFO 2025-03-07 23:20:35,353 __main__:149 server: Shutting down EvalRouter INFO 2025-03-07 23:20:35,354 __main__:149 server: Shutting down DistributionInspectImpl INFO 2025-03-07 23:20:35,355 __main__:177 server: Stopping event loop Traceback (most recent call last): File "", line 198, in _run_module_as_main File "", line 88, in _run_code File "/home/ec2-user/src/llama-stack/schedule/llama_stack/distribution/server/server.py", line 488, in main() File "/home/ec2-user/src/llama-stack/schedule/llama_stack/distribution/server/server.py", line 476, in main uvicorn.run(**uvicorn_config) File "/home/ec2-user/src/llama-stack/schedule/venv/lib64/python3.11/site-packages/uvicorn/main.py", line 579, in run server.run() File "/home/ec2-user/src/llama-stack/schedule/venv/lib64/python3.11/site-packages/uvicorn/server.py", line 66, in run return asyncio.run(self.serve(sockets=sockets)) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ File "/usr/lib64/python3.11/asyncio/runners.py", line 189, in run with Runner(debug=debug) as runner: File "/usr/lib64/python3.11/asyncio/runners.py", line 63, in __exit__ self.close() File "/usr/lib64/python3.11/asyncio/runners.py", line 71, in close _cancel_all_tasks(loop) File "/usr/lib64/python3.11/asyncio/runners.py", line 201, in _cancel_all_tasks loop.run_until_complete(tasks.gather(*to_cancel, return_exceptions=True)) File "/usr/lib64/python3.11/asyncio/base_events.py", line 652, in run_until_complete raise RuntimeError('Event loop stopped before Future completed.') RuntimeError: Event loop stopped before Future completed. ++ error_handler 104 ++ echo 'Error occurred in script at line: 104' Error occurred in script at line: 104 ++ exit 1 ``` With all patches included, the shutdown now looks as follows: ``` $ kill -INT $(ps ax | grep llama_stack.distribution.server.server | grep -v nvim | awk -e '{print $1}' | sort | head -n 1) ``` ``` 20:56:09.308 [START] INFO: Uvicorn running on http://['::', '0.0.0.0']:8321 (Press CTRL+C to quit) INFO: Shutting down INFO: Waiting for application shutdown. INFO 2025-03-10 20:56:43,961 __main__:140 server: Shutting down INFO 2025-03-10 20:56:43,962 __main__:124 server: Shutting down DatasetsRoutingTable INFO 2025-03-10 20:56:43,964 __main__:124 server: Shutting down DatasetIORouter INFO 2025-03-10 20:56:43,965 __main__:124 server: Shutting down ScoringFunctionsRoutingTable INFO 2025-03-10 20:56:43,966 __main__:124 server: Shutting down ScoringRouter INFO 2025-03-10 20:56:43,967 __main__:124 server: Shutting down ModelsRoutingTable INFO 2025-03-10 20:56:43,968 __main__:124 server: Shutting down InferenceRouter INFO 2025-03-10 20:56:43,969 __main__:124 server: Shutting down ShieldsRoutingTable INFO 2025-03-10 20:56:43,971 __main__:124 server: Shutting down SafetyRouter INFO 2025-03-10 20:56:43,972 __main__:124 server: Shutting down VectorDBsRoutingTable INFO 2025-03-10 20:56:43,973 __main__:124 server: Shutting down VectorIORouter INFO 2025-03-10 20:56:43,974 __main__:124 server: Shutting down ToolGroupsRoutingTable INFO 2025-03-10 20:56:43,975 __main__:124 server: Shutting down ToolRuntimeRouter INFO 2025-03-10 20:56:43,976 __main__:124 server: Shutting down MetaReferenceAgentsImpl INFO 2025-03-10 20:56:43,977 __main__:124 server: Shutting down TelemetryAdapter INFO 2025-03-10 20:56:43,978 __main__:124 server: Shutting down TorchtunePostTrainingImpl WARNING 2025-03-10 20:56:43,979 __main__:129 server: No shutdown method for TorchtunePostTrainingImpl INFO 2025-03-10 20:56:43,979 __main__:124 server: Shutting down BenchmarksRoutingTable INFO 2025-03-10 20:56:43,980 __main__:124 server: Shutting down EvalRouter INFO 2025-03-10 20:56:43,981 __main__:124 server: Shutting down DistributionInspectImpl INFO: Application shutdown complete. INFO: Finished server process [33862] ``` [//]: # (## Documentation) --------- Signed-off-by: Ihar Hrachyshka --- llama_stack/distribution/server/server.py | 87 +++++------------------ 1 file changed, 19 insertions(+), 68 deletions(-) diff --git a/llama_stack/distribution/server/server.py b/llama_stack/distribution/server/server.py index f819d446f..ea8723365 100644 --- a/llama_stack/distribution/server/server.py +++ b/llama_stack/distribution/server/server.py @@ -6,11 +6,9 @@ import argparse import asyncio -import functools import inspect import json import os -import signal import sys import traceback import warnings @@ -118,69 +116,24 @@ def translate_exception(exc: Exception) -> Union[HTTPException, RequestValidatio ) -def handle_signal(app, signum, _) -> None: +async def shutdown(app): + """Initiate a graceful shutdown of the application. + + Handled by the lifespan context manager. The shutdown process involves + shutting down all implementations registered in the application. """ - Handle incoming signals and initiate a graceful shutdown of the application. - - This function is intended to be used as a signal handler for various signals - (e.g., SIGINT, SIGTERM). Upon receiving a signal, it will print a message - indicating the received signal and initiate a shutdown process. - - Args: - app: The application instance containing implementations to be shut down. - signum (int): The signal number received. - frame: The current stack frame (not used in this function). - - The shutdown process involves: - - Shutting down all implementations registered in the application. - - Gathering all running asyncio tasks. - - Cancelling all gathered tasks. - - Waiting for all tasks to finish. - - Stopping the event loop. - - Note: - This function schedules the shutdown process as an asyncio task and does - not block the current execution. - """ - signame = signal.Signals(signum).name - logger.info(f"Received signal {signame} ({signum}). Exiting gracefully...") - - async def shutdown(): + for impl in app.__llama_stack_impls__.values(): + impl_name = impl.__class__.__name__ + logger.info("Shutting down %s", impl_name) try: - # Gracefully shut down implementations - for impl in app.__llama_stack_impls__.values(): - impl_name = impl.__class__.__name__ - logger.info("Shutting down %s", impl_name) - try: - if hasattr(impl, "shutdown"): - await asyncio.wait_for(impl.shutdown(), timeout=5) - else: - logger.warning("No shutdown method for %s", impl_name) - except asyncio.TimeoutError: - logger.exception("Shutdown timeout for %s ", impl_name, exc_info=True) - except Exception as e: - logger.exception("Failed to shutdown %s: %s", impl_name, {e}) - - # Gather all running tasks - loop = asyncio.get_running_loop() - tasks = [task for task in asyncio.all_tasks(loop) if task is not asyncio.current_task()] - - # Cancel all tasks - for task in tasks: - task.cancel() - - # Wait for all tasks to finish - try: - await asyncio.wait_for(asyncio.gather(*tasks, return_exceptions=True), timeout=10) - except asyncio.TimeoutError: - logger.exception("Timeout while waiting for tasks to finish") - except asyncio.CancelledError: - pass - finally: - loop.stop() - - loop = asyncio.get_running_loop() - loop.create_task(shutdown()) + if hasattr(impl, "shutdown"): + await asyncio.wait_for(impl.shutdown(), timeout=5) + else: + logger.warning("No shutdown method for %s", impl_name) + except asyncio.TimeoutError: + logger.exception("Shutdown timeout for %s ", impl_name, exc_info=True) + except (Exception, asyncio.CancelledError) as e: + logger.exception("Failed to shutdown %s: %s", impl_name, {e}) @asynccontextmanager @@ -188,8 +141,7 @@ async def lifespan(app: FastAPI): logger.info("Starting up") yield logger.info("Shutting down") - for impl in app.__llama_stack_impls__.values(): - await impl.shutdown() + await shutdown(app) def is_streaming_request(func_name: str, request: Request, **kwargs): @@ -266,7 +218,7 @@ class TracingMiddleware: self.app = app async def __call__(self, scope, receive, send): - path = scope["path"] + path = scope.get("path", "") await start_trace(path, {"__location__": "server"}) try: return await self.app(scope, receive, send) @@ -439,8 +391,6 @@ def main(): app.exception_handler(RequestValidationError)(global_exception_handler) app.exception_handler(Exception)(global_exception_handler) - signal.signal(signal.SIGINT, functools.partial(handle_signal, app)) - signal.signal(signal.SIGTERM, functools.partial(handle_signal, app)) app.__llama_stack_impls__ = impls @@ -471,6 +421,7 @@ def main(): "app": app, "host": listen_host, "port": port, + "lifespan": "on", } if ssl_config: uvicorn_config.update(ssl_config) From 83a2c78615a3b4a2ad96852023b0292a401a0463 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?S=C3=A9bastien=20Han?= Date: Tue, 11 Mar 2025 18:33:46 +0100 Subject: [PATCH 07/11] feat(api): list agents / sessions and get agent (#1410) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit # What does this PR do? Add support for listing agents, describing an agent, and retrieving session IDs for a given agent. This is only the API definition, the implementations will come separately. Closes: https://github.com/meta-llama/llama-stack/issues/1294 Signed-off-by: Sébastien Han --- docs/_static/llama-stack-spec.html | 169 +++++++++++++++++++++++++++++ docs/_static/llama-stack-spec.yaml | 118 ++++++++++++++++++++ llama_stack/apis/agents/agents.py | 46 ++++++++ 3 files changed, 333 insertions(+) diff --git a/docs/_static/llama-stack-spec.html b/docs/_static/llama-stack-spec.html index 1a8169090..b0febbbef 100644 --- a/docs/_static/llama-stack-spec.html +++ b/docs/_static/llama-stack-spec.html @@ -363,6 +363,37 @@ } }, "/v1/agents": { + "get": { + "responses": { + "200": { + "description": "A ListAgentsResponse.", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/ListAgentsResponse" + } + } + } + }, + "400": { + "$ref": "#/components/responses/BadRequest400" + }, + "429": { + "$ref": "#/components/responses/TooManyRequests429" + }, + "500": { + "$ref": "#/components/responses/InternalServerError500" + }, + "default": { + "$ref": "#/components/responses/DefaultError" + } + }, + "tags": [ + "Agents" + ], + "description": "List all agents.", + "parameters": [] + }, "post": { "responses": { "200": { @@ -609,6 +640,47 @@ } }, "/v1/agents/{agent_id}": { + "get": { + "responses": { + "200": { + "description": "An Agent of the agent.", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/Agent" + } + } + } + }, + "400": { + "$ref": "#/components/responses/BadRequest400" + }, + "429": { + "$ref": "#/components/responses/TooManyRequests429" + }, + "500": { + "$ref": "#/components/responses/InternalServerError500" + }, + "default": { + "$ref": "#/components/responses/DefaultError" + } + }, + "tags": [ + "Agents" + ], + "description": "Describe an agent by its ID.", + "parameters": [ + { + "name": "agent_id", + "in": "path", + "description": "ID of the agent.", + "required": true, + "schema": { + "type": "string" + } + } + ] + }, "delete": { "responses": { "200": { @@ -2276,6 +2348,49 @@ ] } }, + "/v1/agents/{agent_id}/sessions": { + "get": { + "responses": { + "200": { + "description": "A ListAgentSessionsResponse.", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/ListAgentSessionsResponse" + } + } + } + }, + "400": { + "$ref": "#/components/responses/BadRequest400" + }, + "429": { + "$ref": "#/components/responses/TooManyRequests429" + }, + "500": { + "$ref": "#/components/responses/InternalServerError500" + }, + "default": { + "$ref": "#/components/responses/DefaultError" + } + }, + "tags": [ + "Agents" + ], + "description": "List all session(s) of a given agent.", + "parameters": [ + { + "name": "agent_id", + "in": "path", + "description": "The ID of the agent to list sessions for.", + "required": true, + "schema": { + "type": "string" + } + } + ] + } + }, "/v1/eval/benchmarks": { "get": { "responses": { @@ -6565,6 +6680,28 @@ "title": "ScoringResult", "description": "A scoring result for a single row." }, + "Agent": { + "type": "object", + "properties": { + "agent_id": { + "type": "string" + }, + "agent_config": { + "$ref": "#/components/schemas/AgentConfig" + }, + "created_at": { + "type": "string", + "format": "date-time" + } + }, + "additionalProperties": false, + "required": [ + "agent_id", + "agent_config", + "created_at" + ], + "title": "Agent" + }, "Session": { "type": "object", "properties": { @@ -7907,6 +8044,38 @@ ], "title": "ToolInvocationResult" }, + "ListAgentSessionsResponse": { + "type": "object", + "properties": { + "data": { + "type": "array", + "items": { + "$ref": "#/components/schemas/Session" + } + } + }, + "additionalProperties": false, + "required": [ + "data" + ], + "title": "ListAgentSessionsResponse" + }, + "ListAgentsResponse": { + "type": "object", + "properties": { + "data": { + "type": "array", + "items": { + "$ref": "#/components/schemas/Agent" + } + } + }, + "additionalProperties": false, + "required": [ + "data" + ], + "title": "ListAgentsResponse" + }, "BucketResponse": { "type": "object", "properties": { diff --git a/docs/_static/llama-stack-spec.yaml b/docs/_static/llama-stack-spec.yaml index d6001c00d..2985e6222 100644 --- a/docs/_static/llama-stack-spec.yaml +++ b/docs/_static/llama-stack-spec.yaml @@ -238,6 +238,28 @@ paths: $ref: '#/components/schemas/CompletionRequest' required: true /v1/agents: + get: + responses: + '200': + description: A ListAgentsResponse. + content: + application/json: + schema: + $ref: '#/components/schemas/ListAgentsResponse' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' + tags: + - Agents + description: List all agents. + parameters: [] post: responses: '200': @@ -410,6 +432,34 @@ paths: $ref: '#/components/schemas/CreateUploadSessionRequest' required: true /v1/agents/{agent_id}: + get: + responses: + '200': + description: An Agent of the agent. + content: + application/json: + schema: + $ref: '#/components/schemas/Agent' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' + tags: + - Agents + description: Describe an agent by its ID. + parameters: + - name: agent_id + in: path + description: ID of the agent. + required: true + schema: + type: string delete: responses: '200': @@ -1528,6 +1578,36 @@ paths: required: true schema: type: string + /v1/agents/{agent_id}/sessions: + get: + responses: + '200': + description: A ListAgentSessionsResponse. + content: + application/json: + schema: + $ref: '#/components/schemas/ListAgentSessionsResponse' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' + tags: + - Agents + description: List all session(s) of a given agent. + parameters: + - name: agent_id + in: path + description: >- + The ID of the agent to list sessions for. + required: true + schema: + type: string /v1/eval/benchmarks: get: responses: @@ -4549,6 +4629,22 @@ components: - aggregated_results title: ScoringResult description: A scoring result for a single row. + Agent: + type: object + properties: + agent_id: + type: string + agent_config: + $ref: '#/components/schemas/AgentConfig' + created_at: + type: string + format: date-time + additionalProperties: false + required: + - agent_id + - agent_config + - created_at + title: Agent Session: type: object properties: @@ -5385,6 +5481,28 @@ components: required: - content title: ToolInvocationResult + ListAgentSessionsResponse: + type: object + properties: + data: + type: array + items: + $ref: '#/components/schemas/Session' + additionalProperties: false + required: + - data + title: ListAgentSessionsResponse + ListAgentsResponse: + type: object + properties: + data: + type: array + items: + $ref: '#/components/schemas/Agent' + additionalProperties: false + required: + - data + title: ListAgentsResponse BucketResponse: type: object properties: diff --git a/llama_stack/apis/agents/agents.py b/llama_stack/apis/agents/agents.py index af4b0ba77..1170a56d5 100644 --- a/llama_stack/apis/agents/agents.py +++ b/llama_stack/apis/agents/agents.py @@ -234,6 +234,23 @@ class AgentConfig(AgentConfigCommon): response_format: Optional[ResponseFormat] = None +@json_schema_type +class Agent(BaseModel): + agent_id: str + agent_config: AgentConfig + created_at: datetime + + +@json_schema_type +class ListAgentsResponse(BaseModel): + data: List[Agent] + + +@json_schema_type +class ListAgentSessionsResponse(BaseModel): + data: List[Session] + + class AgentConfigOverridablePerTurn(AgentConfigCommon): instructions: Optional[str] = None @@ -541,3 +558,32 @@ class Agents(Protocol): :param agent_id: The ID of the agent to delete. """ ... + + @webmethod(route="/agents", method="GET") + async def list_agents(self) -> ListAgentsResponse: + """List all agents. + + :returns: A ListAgentsResponse. + """ + ... + + @webmethod(route="/agents/{agent_id}", method="GET") + async def get_agent(self, agent_id: str) -> Agent: + """Describe an agent by its ID. + + :param agent_id: ID of the agent. + :returns: An Agent of the agent. + """ + ... + + @webmethod(route="/agents/{agent_id}/sessions", method="GET") + async def list_agent_sessions( + self, + agent_id: str, + ) -> ListAgentSessionsResponse: + """List all session(s) of a given agent. + + :param agent_id: The ID of the agent to list sessions for. + :returns: A ListAgentSessionsResponse. + """ + ... From b647ecd9ed9ecf433a6ce972a06e7a339fbf7ca6 Mon Sep 17 00:00:00 2001 From: Charlie Doern Date: Tue, 11 Mar 2025 14:09:31 -0400 Subject: [PATCH 08/11] feat: add support for LLAMA_STACK_LOG_FILE (#1450) # What does this PR do? setting $LLAMA_STACK_LOG_FILE will pipe the logs to a file as well as stdout. this is done by using a logging FileHandler Signed-off-by: Charlie Doern --- docs/source/distributions/building_distro.md | 2 + llama_stack/log.py | 47 +++++++++++++------- 2 files changed, 33 insertions(+), 16 deletions(-) diff --git a/docs/source/distributions/building_distro.md b/docs/source/distributions/building_distro.md index 942596b59..37a7e7974 100644 --- a/docs/source/distributions/building_distro.md +++ b/docs/source/distributions/building_distro.md @@ -33,6 +33,8 @@ Can be set to any of the following log levels: The default global log level is `info`. `all` sets the log level for all components. +A user can also set `LLAMA_STACK_LOG_FILE` which will pipe the logs to the specified path as well as to the terminal. An example would be: `export LLAMA_STACK_LOG_FILE=server.log` + ### Llama Stack Build In order to build your own distribution, we recommend you clone the `llama-stack` repository. diff --git a/llama_stack/log.py b/llama_stack/log.py index 9b9f5c5d8..80ee9fa1b 100644 --- a/llama_stack/log.py +++ b/llama_stack/log.py @@ -97,12 +97,13 @@ class CustomRichHandler(RichHandler): self.markup = original_markup -def setup_logging(category_levels: Dict[str, int]) -> None: +def setup_logging(category_levels: Dict[str, int], log_file: str | None) -> None: """ - Configure logging based on the provided category log levels. + Configure logging based on the provided category log levels and an optional log file. Parameters: category_levels (Dict[str, int]): A dictionary mapping categories to their log levels. + log_file (str): Path to a log file to additionally pipe the logs into """ log_format = "[dim]%(asctime)s %(name)s:%(lineno)d[/] [yellow dim]%(category)s[/]: %(message)s" @@ -117,6 +118,28 @@ def setup_logging(category_levels: Dict[str, int]) -> None: # Determine the root logger's level (default to WARNING if not specified) root_level = category_levels.get("root", logging.WARNING) + handlers = { + "console": { + "()": CustomRichHandler, # Use custom console handler + "formatter": "rich", + "rich_tracebacks": True, + "show_time": False, + "show_path": False, + "markup": True, + "filters": ["category_filter"], + } + } + + # Add a file handler if log_file is set + if log_file: + handlers["file"] = { + "class": "logging.FileHandler", + "formatter": "rich", + "filename": log_file, + "mode": "a", + "encoding": "utf-8", + } + logging_config = { "version": 1, "disable_existing_loggers": False, @@ -126,17 +149,7 @@ def setup_logging(category_levels: Dict[str, int]) -> None: "format": log_format, } }, - "handlers": { - "console": { - "()": CustomRichHandler, # Use our custom handler class - "formatter": "rich", - "rich_tracebacks": True, - "show_time": False, - "show_path": False, - "markup": True, - "filters": ["category_filter"], - } - }, + "handlers": handlers, "filters": { "category_filter": { "()": CategoryFilter, @@ -144,14 +157,14 @@ def setup_logging(category_levels: Dict[str, int]) -> None: }, "loggers": { category: { - "handlers": ["console"], + "handlers": list(handlers.keys()), # Apply all handlers "level": category_levels.get(category, DEFAULT_LOG_LEVEL), "propagate": False, # Disable propagation to root logger } for category in CATEGORIES }, "root": { - "handlers": ["console"], + "handlers": list(handlers.keys()), "level": root_level, # Set root logger's level dynamically }, } @@ -180,4 +193,6 @@ if env_config: cprint(f"Environment variable LLAMA_STACK_LOGGING found: {env_config}", "yellow") _category_levels.update(parse_environment_config(env_config)) -setup_logging(_category_levels) +log_file = os.environ.get("LLAMA_STACK_LOG_FILE") + +setup_logging(_category_levels, log_file) From 275bab1373f13704edf3cc29a94dd37af6a5dced Mon Sep 17 00:00:00 2001 From: Nathan Weinberg <31703736+nathan-weinberg@users.noreply.github.com> Date: Tue, 11 Mar 2025 14:11:32 -0400 Subject: [PATCH 09/11] test: loosen Python 3.10 version for unit tests (#1547) # What does this PR do? as I brought up in #1515 it shouldn't be nessessary to tie the unit test runner to an exact z-stream of Python 3.10 updated so unit test runner always uses latest z-stream of Python 3.10 ## Test Plan ```shell $ uv run -p 3.10 --with-editable . --with-editable ".[dev]" --with-editable ".[unit]" pytest --cov=llama_stack -s -v tests/unit/ --junitxml=pytest-report.xml ``` Signed-off-by: Nathan Weinberg --- .github/workflows/unit-tests.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/unit-tests.yml b/.github/workflows/unit-tests.yml index 48658047f..3acfabe70 100644 --- a/.github/workflows/unit-tests.yml +++ b/.github/workflows/unit-tests.yml @@ -14,16 +14,16 @@ jobs: - name: Set up Python uses: actions/setup-python@v5 with: - python-version: '3.10.16' + python-version: '3.10' - uses: astral-sh/setup-uv@v5 with: - python-version: '3.10.16' + python-version: '3.10' enable-cache: false - name: Run unit tests run: | - uv run -p 3.10.16 --with-editable . --with-editable ".[dev]" --with-editable ".[unit]" pytest --cov=llama_stack -s -v tests/unit/ --junitxml=pytest-report.xml + uv run -p 3.10 --with-editable . --with-editable ".[dev]" --with-editable ".[unit]" pytest --cov=llama_stack -s -v tests/unit/ --junitxml=pytest-report.xml - name: Upload test results if: always() From 85501ed8758a7b511cf972dfcb4c685ee849e368 Mon Sep 17 00:00:00 2001 From: Dinesh Yeduguru Date: Tue, 11 Mar 2025 11:19:29 -0700 Subject: [PATCH 10/11] fix: remove Llama-3.2-1B-Instruct for fireworks (#1558) # What does this PR do? remove Llama-3.2-1B-Instruct for fireworks as its no longer appears to be hosted on website. ## Test Plan python distro_codegen.py --- .../distributions/self_hosted_distro/fireworks.md | 1 - .../providers/remote/inference/fireworks/models.py | 4 ---- llama_stack/templates/ci-tests/run.yaml | 10 ---------- llama_stack/templates/dev/run.yaml | 10 ---------- llama_stack/templates/fireworks/run-with-safety.yaml | 10 ---------- llama_stack/templates/fireworks/run.yaml | 10 ---------- 6 files changed, 45 deletions(-) diff --git a/docs/source/distributions/self_hosted_distro/fireworks.md b/docs/source/distributions/self_hosted_distro/fireworks.md index 9592a18fe..3c8f5eec9 100644 --- a/docs/source/distributions/self_hosted_distro/fireworks.md +++ b/docs/source/distributions/self_hosted_distro/fireworks.md @@ -40,7 +40,6 @@ The following models are available by default: - `accounts/fireworks/models/llama-v3p1-8b-instruct (aliases: meta-llama/Llama-3.1-8B-Instruct)` - `accounts/fireworks/models/llama-v3p1-70b-instruct (aliases: meta-llama/Llama-3.1-70B-Instruct)` - `accounts/fireworks/models/llama-v3p1-405b-instruct (aliases: meta-llama/Llama-3.1-405B-Instruct-FP8)` -- `accounts/fireworks/models/llama-v3p2-1b-instruct (aliases: meta-llama/Llama-3.2-1B-Instruct)` - `accounts/fireworks/models/llama-v3p2-3b-instruct (aliases: meta-llama/Llama-3.2-3B-Instruct)` - `accounts/fireworks/models/llama-v3p2-11b-vision-instruct (aliases: meta-llama/Llama-3.2-11B-Vision-Instruct)` - `accounts/fireworks/models/llama-v3p2-90b-vision-instruct (aliases: meta-llama/Llama-3.2-90B-Vision-Instruct)` diff --git a/llama_stack/providers/remote/inference/fireworks/models.py b/llama_stack/providers/remote/inference/fireworks/models.py index c90f632ff..a0dc11768 100644 --- a/llama_stack/providers/remote/inference/fireworks/models.py +++ b/llama_stack/providers/remote/inference/fireworks/models.py @@ -24,10 +24,6 @@ MODEL_ENTRIES = [ "accounts/fireworks/models/llama-v3p1-405b-instruct", CoreModelId.llama3_1_405b_instruct.value, ), - build_hf_repo_model_entry( - "accounts/fireworks/models/llama-v3p2-1b-instruct", - CoreModelId.llama3_2_1b_instruct.value, - ), build_hf_repo_model_entry( "accounts/fireworks/models/llama-v3p2-3b-instruct", CoreModelId.llama3_2_3b_instruct.value, diff --git a/llama_stack/templates/ci-tests/run.yaml b/llama_stack/templates/ci-tests/run.yaml index 3a973cabf..715d7c86d 100644 --- a/llama_stack/templates/ci-tests/run.yaml +++ b/llama_stack/templates/ci-tests/run.yaml @@ -120,16 +120,6 @@ models: provider_id: fireworks provider_model_id: accounts/fireworks/models/llama-v3p1-405b-instruct model_type: llm -- metadata: {} - model_id: accounts/fireworks/models/llama-v3p2-1b-instruct - provider_id: fireworks - provider_model_id: accounts/fireworks/models/llama-v3p2-1b-instruct - model_type: llm -- metadata: {} - model_id: meta-llama/Llama-3.2-1B-Instruct - provider_id: fireworks - provider_model_id: accounts/fireworks/models/llama-v3p2-1b-instruct - model_type: llm - metadata: {} model_id: accounts/fireworks/models/llama-v3p2-3b-instruct provider_id: fireworks diff --git a/llama_stack/templates/dev/run.yaml b/llama_stack/templates/dev/run.yaml index 71fbcb353..f908af8c3 100644 --- a/llama_stack/templates/dev/run.yaml +++ b/llama_stack/templates/dev/run.yaml @@ -178,16 +178,6 @@ models: provider_id: fireworks provider_model_id: accounts/fireworks/models/llama-v3p1-405b-instruct model_type: llm -- metadata: {} - model_id: accounts/fireworks/models/llama-v3p2-1b-instruct - provider_id: fireworks - provider_model_id: accounts/fireworks/models/llama-v3p2-1b-instruct - model_type: llm -- metadata: {} - model_id: meta-llama/Llama-3.2-1B-Instruct - provider_id: fireworks - provider_model_id: accounts/fireworks/models/llama-v3p2-1b-instruct - model_type: llm - metadata: {} model_id: accounts/fireworks/models/llama-v3p2-3b-instruct provider_id: fireworks diff --git a/llama_stack/templates/fireworks/run-with-safety.yaml b/llama_stack/templates/fireworks/run-with-safety.yaml index 359bf0194..e04141a07 100644 --- a/llama_stack/templates/fireworks/run-with-safety.yaml +++ b/llama_stack/templates/fireworks/run-with-safety.yaml @@ -132,16 +132,6 @@ models: provider_id: fireworks provider_model_id: accounts/fireworks/models/llama-v3p1-405b-instruct model_type: llm -- metadata: {} - model_id: accounts/fireworks/models/llama-v3p2-1b-instruct - provider_id: fireworks - provider_model_id: accounts/fireworks/models/llama-v3p2-1b-instruct - model_type: llm -- metadata: {} - model_id: meta-llama/Llama-3.2-1B-Instruct - provider_id: fireworks - provider_model_id: accounts/fireworks/models/llama-v3p2-1b-instruct - model_type: llm - metadata: {} model_id: accounts/fireworks/models/llama-v3p2-3b-instruct provider_id: fireworks diff --git a/llama_stack/templates/fireworks/run.yaml b/llama_stack/templates/fireworks/run.yaml index 0ce3a4505..369b9ae7b 100644 --- a/llama_stack/templates/fireworks/run.yaml +++ b/llama_stack/templates/fireworks/run.yaml @@ -126,16 +126,6 @@ models: provider_id: fireworks provider_model_id: accounts/fireworks/models/llama-v3p1-405b-instruct model_type: llm -- metadata: {} - model_id: accounts/fireworks/models/llama-v3p2-1b-instruct - provider_id: fireworks - provider_model_id: accounts/fireworks/models/llama-v3p2-1b-instruct - model_type: llm -- metadata: {} - model_id: meta-llama/Llama-3.2-1B-Instruct - provider_id: fireworks - provider_model_id: accounts/fireworks/models/llama-v3p2-1b-instruct - model_type: llm - metadata: {} model_id: accounts/fireworks/models/llama-v3p2-3b-instruct provider_id: fireworks From 43044f29e2275bd6a15cd74b9cdb816f7049756f Mon Sep 17 00:00:00 2001 From: Xi Yan Date: Tue, 11 Mar 2025 11:22:22 -0700 Subject: [PATCH 11/11] fix: fix llama stack run with missing agent impl (#1559) # What does this PR do? - recent merge https://github.com/meta-llama/llama-stack/pull/1410 introduce error ``` ValueError: Provider meta-reference (Api.agents) does not implement the following methods: [('list_agent_sessions', 'not_actually_implemented'), ('list_agents', 'not_actually_implemented')] ``` [//]: # (If resolving an issue, uncomment and update the line below) [//]: # (Closes #[issue-number]) ## Test Plan ``` llama stack run ``` ``` LLAMA_STACK_CONFIG=fireworks pytest -v tests/integration/agents/test_agents.py --text-model meta-llama/Llama-3.3-70B-Instruct ``` https://github.com/meta-llama/llama-stack-ops/actions/runs/13795303869 [//]: # (## Documentation) --- .../inline/agents/meta_reference/agents.py | 27 ++++++++++++++----- 1 file changed, 21 insertions(+), 6 deletions(-) diff --git a/llama_stack/providers/inline/agents/meta_reference/agents.py b/llama_stack/providers/inline/agents/meta_reference/agents.py index a46fa8eb7..c24b14e35 100644 --- a/llama_stack/providers/inline/agents/meta_reference/agents.py +++ b/llama_stack/providers/inline/agents/meta_reference/agents.py @@ -12,6 +12,7 @@ import uuid from typing import AsyncGenerator, List, Optional, Union from llama_stack.apis.agents import ( + Agent, AgentConfig, AgentCreateResponse, Agents, @@ -21,6 +22,8 @@ from llama_stack.apis.agents import ( AgentTurnCreateRequest, AgentTurnResumeRequest, Document, + ListAgentSessionsResponse, + ListAgentsResponse, Session, Turn, ) @@ -84,7 +87,7 @@ class MetaReferenceAgentsImpl(Agents): agent_id=agent_id, ) - async def get_agent(self, agent_id: str) -> ChatAgent: + async def _get_agent_impl(self, agent_id: str) -> ChatAgent: agent_config = await self.persistence_store.get( key=f"agent:{agent_id}", ) @@ -120,7 +123,7 @@ class MetaReferenceAgentsImpl(Agents): agent_id: str, session_name: str, ) -> AgentSessionCreateResponse: - agent = await self.get_agent(agent_id) + agent = await self._get_agent_impl(agent_id) session_id = await agent.create_session(session_name) return AgentSessionCreateResponse( @@ -160,7 +163,7 @@ class MetaReferenceAgentsImpl(Agents): self, request: AgentTurnCreateRequest, ) -> AsyncGenerator: - agent = await self.get_agent(request.agent_id) + agent = await self._get_agent_impl(request.agent_id) async for event in agent.create_and_execute_turn(request): yield event @@ -188,12 +191,12 @@ class MetaReferenceAgentsImpl(Agents): self, request: AgentTurnResumeRequest, ) -> AsyncGenerator: - agent = await self.get_agent(request.agent_id) + agent = await self._get_agent_impl(request.agent_id) async for event in agent.resume_turn(request): yield event async def get_agents_turn(self, agent_id: str, session_id: str, turn_id: str) -> Turn: - agent = await self.get_agent(agent_id) + agent = await self._get_agent_impl(agent_id) turn = await agent.storage.get_session_turn(session_id, turn_id) return turn @@ -210,7 +213,7 @@ class MetaReferenceAgentsImpl(Agents): session_id: str, turn_ids: Optional[List[str]] = None, ) -> Session: - agent = await self.get_agent(agent_id) + agent = await self._get_agent_impl(agent_id) session_info = await agent.storage.get_session_info(session_id) if session_info is None: raise ValueError(f"Session {session_id} not found") @@ -232,3 +235,15 @@ class MetaReferenceAgentsImpl(Agents): async def shutdown(self) -> None: pass + + async def list_agents(self) -> ListAgentsResponse: + pass + + async def get_agent(self, agent_id: str) -> Agent: + pass + + async def list_agent_sessions( + self, + agent_id: str, + ) -> ListAgentSessionsResponse: + pass