diff --git a/.github/workflows/unit-tests.yml b/.github/workflows/unit-tests.yml index 48658047f..3acfabe70 100644 --- a/.github/workflows/unit-tests.yml +++ b/.github/workflows/unit-tests.yml @@ -14,16 +14,16 @@ jobs: - name: Set up Python uses: actions/setup-python@v5 with: - python-version: '3.10.16' + python-version: '3.10' - uses: astral-sh/setup-uv@v5 with: - python-version: '3.10.16' + python-version: '3.10' enable-cache: false - name: Run unit tests run: | - uv run -p 3.10.16 --with-editable . --with-editable ".[dev]" --with-editable ".[unit]" pytest --cov=llama_stack -s -v tests/unit/ --junitxml=pytest-report.xml + uv run -p 3.10 --with-editable . --with-editable ".[dev]" --with-editable ".[unit]" pytest --cov=llama_stack -s -v tests/unit/ --junitxml=pytest-report.xml - name: Upload test results if: always() diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index e639328f0..7c0b5d94e 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -159,8 +159,7 @@ uv run sphinx-autobuild source build/html --write-all If you modify or add new API endpoints, update the API documentation accordingly. You can do this by running the following command: ```bash -uv sync --extra dev -uv run ./docs/openapi_generator/run_openapi_generator.sh +uv run --with ".[dev]" ./docs/openapi_generator/run_openapi_generator.sh ``` The generated API documentation will be available in `docs/_static/`. Make sure to review the changes before committing. diff --git a/docs/_static/llama-stack-spec.html b/docs/_static/llama-stack-spec.html index 102198184..05e950605 100644 --- a/docs/_static/llama-stack-spec.html +++ b/docs/_static/llama-stack-spec.html @@ -363,6 +363,37 @@ } }, "/v1/agents": { + "get": { + "responses": { + "200": { + "description": "A ListAgentsResponse.", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/ListAgentsResponse" + } + } + } + }, + "400": { + "$ref": "#/components/responses/BadRequest400" + }, + "429": { + "$ref": "#/components/responses/TooManyRequests429" + }, + "500": { + "$ref": "#/components/responses/InternalServerError500" + }, + "default": { + "$ref": "#/components/responses/DefaultError" + } + }, + "tags": [ + "Agents" + ], + "description": "List all agents.", + "parameters": [] + }, "post": { "responses": { "200": { @@ -609,6 +640,47 @@ } }, "/v1/agents/{agent_id}": { + "get": { + "responses": { + "200": { + "description": "An Agent of the agent.", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/Agent" + } + } + } + }, + "400": { + "$ref": "#/components/responses/BadRequest400" + }, + "429": { + "$ref": "#/components/responses/TooManyRequests429" + }, + "500": { + "$ref": "#/components/responses/InternalServerError500" + }, + "default": { + "$ref": "#/components/responses/DefaultError" + } + }, + "tags": [ + "Agents" + ], + "description": "Describe an agent by its ID.", + "parameters": [ + { + "name": "agent_id", + "in": "path", + "description": "ID of the agent.", + "required": true, + "schema": { + "type": "string" + } + } + ] + }, "delete": { "responses": { "200": { @@ -2358,6 +2430,49 @@ ] } }, + "/v1/agents/{agent_id}/sessions": { + "get": { + "responses": { + "200": { + "description": "A ListAgentSessionsResponse.", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/ListAgentSessionsResponse" + } + } + } + }, + "400": { + "$ref": "#/components/responses/BadRequest400" + }, + "429": { + "$ref": "#/components/responses/TooManyRequests429" + }, + "500": { + "$ref": "#/components/responses/InternalServerError500" + }, + "default": { + "$ref": "#/components/responses/DefaultError" + } + }, + "tags": [ + "Agents" + ], + "description": "List all session(s) of a given agent.", + "parameters": [ + { + "name": "agent_id", + "in": "path", + "description": "The ID of the agent to list sessions for.", + "required": true, + "schema": { + "type": "string" + } + } + ] + } + }, "/v1/eval/benchmarks": { "get": { "responses": { @@ -6776,6 +6891,28 @@ "title": "ScoringResult", "description": "A scoring result for a single row." }, + "Agent": { + "type": "object", + "properties": { + "agent_id": { + "type": "string" + }, + "agent_config": { + "$ref": "#/components/schemas/AgentConfig" + }, + "created_at": { + "type": "string", + "format": "date-time" + } + }, + "additionalProperties": false, + "required": [ + "agent_id", + "agent_config", + "created_at" + ], + "title": "Agent" + }, "Session": { "type": "object", "properties": { @@ -8214,6 +8351,38 @@ ], "title": "ToolInvocationResult" }, + "ListAgentSessionsResponse": { + "type": "object", + "properties": { + "data": { + "type": "array", + "items": { + "$ref": "#/components/schemas/Session" + } + } + }, + "additionalProperties": false, + "required": [ + "data" + ], + "title": "ListAgentSessionsResponse" + }, + "ListAgentsResponse": { + "type": "object", + "properties": { + "data": { + "type": "array", + "items": { + "$ref": "#/components/schemas/Agent" + } + } + }, + "additionalProperties": false, + "required": [ + "data" + ], + "title": "ListAgentsResponse" + }, "BucketResponse": { "type": "object", "properties": { diff --git a/docs/_static/llama-stack-spec.yaml b/docs/_static/llama-stack-spec.yaml index e235d8ee7..4660436c3 100644 --- a/docs/_static/llama-stack-spec.yaml +++ b/docs/_static/llama-stack-spec.yaml @@ -238,6 +238,28 @@ paths: $ref: '#/components/schemas/CompletionRequest' required: true /v1/agents: + get: + responses: + '200': + description: A ListAgentsResponse. + content: + application/json: + schema: + $ref: '#/components/schemas/ListAgentsResponse' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' + tags: + - Agents + description: List all agents. + parameters: [] post: responses: '200': @@ -410,6 +432,34 @@ paths: $ref: '#/components/schemas/CreateUploadSessionRequest' required: true /v1/agents/{agent_id}: + get: + responses: + '200': + description: An Agent of the agent. + content: + application/json: + schema: + $ref: '#/components/schemas/Agent' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' + tags: + - Agents + description: Describe an agent by its ID. + parameters: + - name: agent_id + in: path + description: ID of the agent. + required: true + schema: + type: string delete: responses: '200': @@ -1581,6 +1631,36 @@ paths: required: true schema: type: string + /v1/agents/{agent_id}/sessions: + get: + responses: + '200': + description: A ListAgentSessionsResponse. + content: + application/json: + schema: + $ref: '#/components/schemas/ListAgentSessionsResponse' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' + tags: + - Agents + description: List all session(s) of a given agent. + parameters: + - name: agent_id + in: path + description: >- + The ID of the agent to list sessions for. + required: true + schema: + type: string /v1/eval/benchmarks: get: responses: @@ -4690,6 +4770,22 @@ components: - aggregated_results title: ScoringResult description: A scoring result for a single row. + Agent: + type: object + properties: + agent_id: + type: string + agent_config: + $ref: '#/components/schemas/AgentConfig' + created_at: + type: string + format: date-time + additionalProperties: false + required: + - agent_id + - agent_config + - created_at + title: Agent Session: type: object properties: @@ -5579,6 +5675,28 @@ components: required: - content title: ToolInvocationResult + ListAgentSessionsResponse: + type: object + properties: + data: + type: array + items: + $ref: '#/components/schemas/Session' + additionalProperties: false + required: + - data + title: ListAgentSessionsResponse + ListAgentsResponse: + type: object + properties: + data: + type: array + items: + $ref: '#/components/schemas/Agent' + additionalProperties: false + required: + - data + title: ListAgentsResponse BucketResponse: type: object properties: diff --git a/docs/openapi_generator/README.md b/docs/openapi_generator/README.md index 298df3ce0..7888e7828 100644 --- a/docs/openapi_generator/README.md +++ b/docs/openapi_generator/README.md @@ -1,9 +1 @@ The RFC Specification (OpenAPI format) is generated from the set of API endpoints located in `llama_stack/distribution/server/endpoints.py` using the `generate.py` utility. - -Please install the following packages before running the script: - -``` -pip install fire PyYAML -``` - -Then simply run `sh run_openapi_generator.sh` diff --git a/docs/source/distributions/building_distro.md b/docs/source/distributions/building_distro.md index 942596b59..37a7e7974 100644 --- a/docs/source/distributions/building_distro.md +++ b/docs/source/distributions/building_distro.md @@ -33,6 +33,8 @@ Can be set to any of the following log levels: The default global log level is `info`. `all` sets the log level for all components. +A user can also set `LLAMA_STACK_LOG_FILE` which will pipe the logs to the specified path as well as to the terminal. An example would be: `export LLAMA_STACK_LOG_FILE=server.log` + ### Llama Stack Build In order to build your own distribution, we recommend you clone the `llama-stack` repository. diff --git a/docs/source/distributions/self_hosted_distro/fireworks.md b/docs/source/distributions/self_hosted_distro/fireworks.md index 99c75f166..0f94df207 100644 --- a/docs/source/distributions/self_hosted_distro/fireworks.md +++ b/docs/source/distributions/self_hosted_distro/fireworks.md @@ -41,7 +41,6 @@ The following models are available by default: - `accounts/fireworks/models/llama-v3p1-8b-instruct (aliases: meta-llama/Llama-3.1-8B-Instruct)` - `accounts/fireworks/models/llama-v3p1-70b-instruct (aliases: meta-llama/Llama-3.1-70B-Instruct)` - `accounts/fireworks/models/llama-v3p1-405b-instruct (aliases: meta-llama/Llama-3.1-405B-Instruct-FP8)` -- `accounts/fireworks/models/llama-v3p2-1b-instruct (aliases: meta-llama/Llama-3.2-1B-Instruct)` - `accounts/fireworks/models/llama-v3p2-3b-instruct (aliases: meta-llama/Llama-3.2-3B-Instruct)` - `accounts/fireworks/models/llama-v3p2-11b-vision-instruct (aliases: meta-llama/Llama-3.2-11B-Vision-Instruct)` - `accounts/fireworks/models/llama-v3p2-90b-vision-instruct (aliases: meta-llama/Llama-3.2-90B-Vision-Instruct)` diff --git a/docs/source/references/llama_cli_reference/index.md b/docs/source/references/llama_cli_reference/index.md index 8a38fc3ae..7b7abdf88 100644 --- a/docs/source/references/llama_cli_reference/index.md +++ b/docs/source/references/llama_cli_reference/index.md @@ -1,6 +1,6 @@ # llama (server-side) CLI Reference -The `llama` CLI tool helps you setup and use the Llama Stack. It should be available on your path after installing the `llama-stack` package. +The `llama` CLI tool helps you set up and use the Llama Stack. The CLI is available on your path after installing the `llama-stack` package. ## Installation @@ -27,9 +27,9 @@ You have two ways to install Llama Stack: ## `llama` subcommands -1. `download`: `llama` cli tools supports downloading the model from Meta or Hugging Face. -2. `model`: Lists available models and their properties. -3. `stack`: Allows you to build and run a Llama Stack server. You can read more about this [here](../../distributions/building_distro). +1. `download`: Supports downloading models from Meta or Hugging Face. [Downloading models](#downloading-models) +2. `model`: Lists available models and their properties. [Understanding models](#understand-the-models) +3. `stack`: Allows you to build a stack using the `llama stack` distribution and run a Llama Stack server. You can read more about how to build a Llama Stack distribution in the [Build your own Distribution](../../distributions/building_distro) documentation. ### Sample Usage @@ -117,7 +117,7 @@ You should see a table like this: +----------------------------------+------------------------------------------+----------------+ ``` -To download models, you can use the llama download command. +To download models, you can use the `llama download` command. ### Downloading from [Meta](https://llama.meta.com/llama-downloads/) @@ -191,7 +191,7 @@ You should see a table like this: The `llama model` command helps you explore the model’s interface. 1. `download`: Download the model from different sources. (meta, huggingface) -2. `list`: Lists all the models available for download with hardware requirements to deploy the models. +2. `list`: Lists all the models available for download with hardware requirements for deploying the models. 3. `prompt-format`: Show llama model message formats. 4. `describe`: Describes all the properties of the model. @@ -262,13 +262,12 @@ llama model prompt-format -m Llama3.2-3B-Instruct ![alt text](../../../resources/prompt-format.png) - You will be shown a Markdown formatted description of the model interface and how prompts / messages are formatted for various scenarios. **NOTE**: Outputs in terminal are color printed to show special tokens. ### Remove model -You can run `llama model remove` to remove unecessary model: +You can run `llama model remove` to remove an unnecessary model: ``` llama model remove -m Llama-Guard-3-8B-int8 diff --git a/llama_stack/apis/agents/agents.py b/llama_stack/apis/agents/agents.py index af4b0ba77..1170a56d5 100644 --- a/llama_stack/apis/agents/agents.py +++ b/llama_stack/apis/agents/agents.py @@ -234,6 +234,23 @@ class AgentConfig(AgentConfigCommon): response_format: Optional[ResponseFormat] = None +@json_schema_type +class Agent(BaseModel): + agent_id: str + agent_config: AgentConfig + created_at: datetime + + +@json_schema_type +class ListAgentsResponse(BaseModel): + data: List[Agent] + + +@json_schema_type +class ListAgentSessionsResponse(BaseModel): + data: List[Session] + + class AgentConfigOverridablePerTurn(AgentConfigCommon): instructions: Optional[str] = None @@ -541,3 +558,32 @@ class Agents(Protocol): :param agent_id: The ID of the agent to delete. """ ... + + @webmethod(route="/agents", method="GET") + async def list_agents(self) -> ListAgentsResponse: + """List all agents. + + :returns: A ListAgentsResponse. + """ + ... + + @webmethod(route="/agents/{agent_id}", method="GET") + async def get_agent(self, agent_id: str) -> Agent: + """Describe an agent by its ID. + + :param agent_id: ID of the agent. + :returns: An Agent of the agent. + """ + ... + + @webmethod(route="/agents/{agent_id}/sessions", method="GET") + async def list_agent_sessions( + self, + agent_id: str, + ) -> ListAgentSessionsResponse: + """List all session(s) of a given agent. + + :param agent_id: The ID of the agent to list sessions for. + :returns: A ListAgentSessionsResponse. + """ + ... diff --git a/llama_stack/cli/stack/run.py b/llama_stack/cli/stack/run.py index 1e4f3c5d9..e5686fb10 100644 --- a/llama_stack/cli/stack/run.py +++ b/llama_stack/cli/stack/run.py @@ -56,6 +56,7 @@ class StackRun(Subcommand): "--env", action="append", help="Environment variables to pass to the server in KEY=VALUE format. Can be specified multiple times.", + default=[], metavar="KEY=VALUE", ) self.parser.add_argument( @@ -73,6 +74,7 @@ class StackRun(Subcommand): type=str, help="Image Type used during the build. This can be either conda or container or venv.", choices=["conda", "container", "venv"], + default="conda", ) def _run_stack_run_cmd(self, args: argparse.Namespace) -> None: @@ -118,42 +120,20 @@ class StackRun(Subcommand): except AttributeError as e: self.parser.error(f"failed to parse config file '{config_file}':\n {e}") - # If neither image type nor image name is provided, assume the server should be run directly - # using the current environment packages. - if not args.image_type and not args.image_name: - logger.info("No image type or image name provided. Assuming environment packages.") - from llama_stack.distribution.server.server import main as server_main + run_args = formulate_run_args(args.image_type, args.image_name, config, template_name) - # Build the server args from the current args passed to the CLI - server_args = argparse.Namespace() - for arg in vars(args): - # If this is a function, avoid passing it - # "args" contains: - # func=> - if callable(getattr(args, arg)): - continue - setattr(server_args, arg, getattr(args, arg)) + run_args.extend([str(config_file), str(args.port)]) + if args.disable_ipv6: + run_args.append("--disable-ipv6") - # Run the server - server_main(server_args) - else: - run_args = formulate_run_args(args.image_type, args.image_name, config, template_name) + for env_var in args.env: + if "=" not in env_var: + self.parser.error(f"Environment variable '{env_var}' must be in KEY=VALUE format") + key, value = env_var.split("=", 1) # split on first = only + if not key: + self.parser.error(f"Environment variable '{env_var}' has empty key") + run_args.extend(["--env", f"{key}={value}"]) - run_args.extend([str(config_file), str(args.port)]) - if args.disable_ipv6: - run_args.append("--disable-ipv6") - - if args.env: - for env_var in args.env: - if "=" not in env_var: - self.parser.error(f"Environment variable '{env_var}' must be in KEY=VALUE format") - return - key, value = env_var.split("=", 1) # split on first = only - if not key: - self.parser.error(f"Environment variable '{env_var}' has empty key") - return - run_args.extend(["--env", f"{key}={value}"]) - - if args.tls_keyfile and args.tls_certfile: - run_args.extend(["--tls-keyfile", args.tls_keyfile, "--tls-certfile", args.tls_certfile]) - run_with_pty(run_args) + if args.tls_keyfile and args.tls_certfile: + run_args.extend(["--tls-keyfile", args.tls_keyfile, "--tls-certfile", args.tls_certfile]) + run_with_pty(run_args) diff --git a/llama_stack/distribution/server/server.py b/llama_stack/distribution/server/server.py index 6b99d908d..ea8723365 100644 --- a/llama_stack/distribution/server/server.py +++ b/llama_stack/distribution/server/server.py @@ -6,18 +6,16 @@ import argparse import asyncio -import functools import inspect import json import os -import signal import sys import traceback import warnings from contextlib import asynccontextmanager from importlib.metadata import version as parse_version from pathlib import Path -from typing import Any, List, Optional, Union +from typing import Any, List, Union import yaml from fastapi import Body, FastAPI, HTTPException, Request @@ -118,69 +116,24 @@ def translate_exception(exc: Exception) -> Union[HTTPException, RequestValidatio ) -def handle_signal(app, signum, _) -> None: +async def shutdown(app): + """Initiate a graceful shutdown of the application. + + Handled by the lifespan context manager. The shutdown process involves + shutting down all implementations registered in the application. """ - Handle incoming signals and initiate a graceful shutdown of the application. - - This function is intended to be used as a signal handler for various signals - (e.g., SIGINT, SIGTERM). Upon receiving a signal, it will print a message - indicating the received signal and initiate a shutdown process. - - Args: - app: The application instance containing implementations to be shut down. - signum (int): The signal number received. - frame: The current stack frame (not used in this function). - - The shutdown process involves: - - Shutting down all implementations registered in the application. - - Gathering all running asyncio tasks. - - Cancelling all gathered tasks. - - Waiting for all tasks to finish. - - Stopping the event loop. - - Note: - This function schedules the shutdown process as an asyncio task and does - not block the current execution. - """ - signame = signal.Signals(signum).name - logger.info(f"Received signal {signame} ({signum}). Exiting gracefully...") - - async def shutdown(): + for impl in app.__llama_stack_impls__.values(): + impl_name = impl.__class__.__name__ + logger.info("Shutting down %s", impl_name) try: - # Gracefully shut down implementations - for impl in app.__llama_stack_impls__.values(): - impl_name = impl.__class__.__name__ - logger.info("Shutting down %s", impl_name) - try: - if hasattr(impl, "shutdown"): - await asyncio.wait_for(impl.shutdown(), timeout=5) - else: - logger.warning("No shutdown method for %s", impl_name) - except asyncio.TimeoutError: - logger.exception("Shutdown timeout for %s ", impl_name, exc_info=True) - except Exception as e: - logger.exception("Failed to shutdown %s: %s", impl_name, {e}) - - # Gather all running tasks - loop = asyncio.get_running_loop() - tasks = [task for task in asyncio.all_tasks(loop) if task is not asyncio.current_task()] - - # Cancel all tasks - for task in tasks: - task.cancel() - - # Wait for all tasks to finish - try: - await asyncio.wait_for(asyncio.gather(*tasks, return_exceptions=True), timeout=10) - except asyncio.TimeoutError: - logger.exception("Timeout while waiting for tasks to finish") - except asyncio.CancelledError: - pass - finally: - loop.stop() - - loop = asyncio.get_running_loop() - loop.create_task(shutdown()) + if hasattr(impl, "shutdown"): + await asyncio.wait_for(impl.shutdown(), timeout=5) + else: + logger.warning("No shutdown method for %s", impl_name) + except asyncio.TimeoutError: + logger.exception("Shutdown timeout for %s ", impl_name, exc_info=True) + except (Exception, asyncio.CancelledError) as e: + logger.exception("Failed to shutdown %s: %s", impl_name, {e}) @asynccontextmanager @@ -188,8 +141,7 @@ async def lifespan(app: FastAPI): logger.info("Starting up") yield logger.info("Shutting down") - for impl in app.__llama_stack_impls__.values(): - await impl.shutdown() + await shutdown(app) def is_streaming_request(func_name: str, request: Request, **kwargs): @@ -266,7 +218,7 @@ class TracingMiddleware: self.app = app async def __call__(self, scope, receive, send): - path = scope["path"] + path = scope.get("path", "") await start_trace(path, {"__location__": "server"}) try: return await self.app(scope, receive, send) @@ -314,17 +266,11 @@ class ClientVersionMiddleware: return await self.app(scope, receive, send) -def main(args: Optional[argparse.Namespace] = None): +def main(): """Start the LlamaStack server.""" parser = argparse.ArgumentParser(description="Start the LlamaStack server.") parser.add_argument( "--yaml-config", - dest="config", - help="(Deprecated) Path to YAML configuration file - use --config instead", - ) - parser.add_argument( - "--config", - dest="config", help="Path to YAML configuration file", ) parser.add_argument( @@ -354,19 +300,7 @@ def main(args: Optional[argparse.Namespace] = None): required="--tls-keyfile" in sys.argv, ) - # Determine whether the server args are being passed by the "run" command, if this is the case - # the args will be passed as a Namespace object to the main function, otherwise they will be - # parsed from the command line - if args is None: - args = parser.parse_args() - - # Check for deprecated argument usage - if "--yaml-config" in sys.argv: - warnings.warn( - "The '--yaml-config' argument is deprecated and will be removed in a future version. Use '--config' instead.", - DeprecationWarning, - stacklevel=2, - ) + args = parser.parse_args() if args.env: for env_pair in args.env: @@ -378,9 +312,9 @@ def main(args: Optional[argparse.Namespace] = None): logger.error(f"Error: {str(e)}") sys.exit(1) - if args.config: + if args.yaml_config: # if the user provided a config file, use it, even if template was specified - config_file = Path(args.config) + config_file = Path(args.yaml_config) if not config_file.exists(): raise ValueError(f"Config file {config_file} does not exist") logger.info(f"Using config file: {config_file}") @@ -457,8 +391,6 @@ def main(args: Optional[argparse.Namespace] = None): app.exception_handler(RequestValidationError)(global_exception_handler) app.exception_handler(Exception)(global_exception_handler) - signal.signal(signal.SIGINT, functools.partial(handle_signal, app)) - signal.signal(signal.SIGTERM, functools.partial(handle_signal, app)) app.__llama_stack_impls__ = impls @@ -489,6 +421,7 @@ def main(args: Optional[argparse.Namespace] = None): "app": app, "host": listen_host, "port": port, + "lifespan": "on", } if ssl_config: uvicorn_config.update(ssl_config) diff --git a/llama_stack/log.py b/llama_stack/log.py index 9b9f5c5d8..80ee9fa1b 100644 --- a/llama_stack/log.py +++ b/llama_stack/log.py @@ -97,12 +97,13 @@ class CustomRichHandler(RichHandler): self.markup = original_markup -def setup_logging(category_levels: Dict[str, int]) -> None: +def setup_logging(category_levels: Dict[str, int], log_file: str | None) -> None: """ - Configure logging based on the provided category log levels. + Configure logging based on the provided category log levels and an optional log file. Parameters: category_levels (Dict[str, int]): A dictionary mapping categories to their log levels. + log_file (str): Path to a log file to additionally pipe the logs into """ log_format = "[dim]%(asctime)s %(name)s:%(lineno)d[/] [yellow dim]%(category)s[/]: %(message)s" @@ -117,6 +118,28 @@ def setup_logging(category_levels: Dict[str, int]) -> None: # Determine the root logger's level (default to WARNING if not specified) root_level = category_levels.get("root", logging.WARNING) + handlers = { + "console": { + "()": CustomRichHandler, # Use custom console handler + "formatter": "rich", + "rich_tracebacks": True, + "show_time": False, + "show_path": False, + "markup": True, + "filters": ["category_filter"], + } + } + + # Add a file handler if log_file is set + if log_file: + handlers["file"] = { + "class": "logging.FileHandler", + "formatter": "rich", + "filename": log_file, + "mode": "a", + "encoding": "utf-8", + } + logging_config = { "version": 1, "disable_existing_loggers": False, @@ -126,17 +149,7 @@ def setup_logging(category_levels: Dict[str, int]) -> None: "format": log_format, } }, - "handlers": { - "console": { - "()": CustomRichHandler, # Use our custom handler class - "formatter": "rich", - "rich_tracebacks": True, - "show_time": False, - "show_path": False, - "markup": True, - "filters": ["category_filter"], - } - }, + "handlers": handlers, "filters": { "category_filter": { "()": CategoryFilter, @@ -144,14 +157,14 @@ def setup_logging(category_levels: Dict[str, int]) -> None: }, "loggers": { category: { - "handlers": ["console"], + "handlers": list(handlers.keys()), # Apply all handlers "level": category_levels.get(category, DEFAULT_LOG_LEVEL), "propagate": False, # Disable propagation to root logger } for category in CATEGORIES }, "root": { - "handlers": ["console"], + "handlers": list(handlers.keys()), "level": root_level, # Set root logger's level dynamically }, } @@ -180,4 +193,6 @@ if env_config: cprint(f"Environment variable LLAMA_STACK_LOGGING found: {env_config}", "yellow") _category_levels.update(parse_environment_config(env_config)) -setup_logging(_category_levels) +log_file = os.environ.get("LLAMA_STACK_LOG_FILE") + +setup_logging(_category_levels, log_file) diff --git a/llama_stack/providers/inline/agents/meta_reference/__init__.py b/llama_stack/providers/inline/agents/meta_reference/__init__.py index 8f8c24170..4be064f1d 100644 --- a/llama_stack/providers/inline/agents/meta_reference/__init__.py +++ b/llama_stack/providers/inline/agents/meta_reference/__init__.py @@ -4,14 +4,14 @@ # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. -from typing import Dict +from typing import Any, Dict -from llama_stack.distribution.datatypes import Api, ProviderSpec +from llama_stack.distribution.datatypes import Api from .config import MetaReferenceAgentsImplConfig -async def get_provider_impl(config: MetaReferenceAgentsImplConfig, deps: Dict[Api, ProviderSpec]): +async def get_provider_impl(config: MetaReferenceAgentsImplConfig, deps: Dict[Api, Any]): from .agents import MetaReferenceAgentsImpl impl = MetaReferenceAgentsImpl( diff --git a/llama_stack/providers/inline/agents/meta_reference/agents.py b/llama_stack/providers/inline/agents/meta_reference/agents.py index a46fa8eb7..c24b14e35 100644 --- a/llama_stack/providers/inline/agents/meta_reference/agents.py +++ b/llama_stack/providers/inline/agents/meta_reference/agents.py @@ -12,6 +12,7 @@ import uuid from typing import AsyncGenerator, List, Optional, Union from llama_stack.apis.agents import ( + Agent, AgentConfig, AgentCreateResponse, Agents, @@ -21,6 +22,8 @@ from llama_stack.apis.agents import ( AgentTurnCreateRequest, AgentTurnResumeRequest, Document, + ListAgentSessionsResponse, + ListAgentsResponse, Session, Turn, ) @@ -84,7 +87,7 @@ class MetaReferenceAgentsImpl(Agents): agent_id=agent_id, ) - async def get_agent(self, agent_id: str) -> ChatAgent: + async def _get_agent_impl(self, agent_id: str) -> ChatAgent: agent_config = await self.persistence_store.get( key=f"agent:{agent_id}", ) @@ -120,7 +123,7 @@ class MetaReferenceAgentsImpl(Agents): agent_id: str, session_name: str, ) -> AgentSessionCreateResponse: - agent = await self.get_agent(agent_id) + agent = await self._get_agent_impl(agent_id) session_id = await agent.create_session(session_name) return AgentSessionCreateResponse( @@ -160,7 +163,7 @@ class MetaReferenceAgentsImpl(Agents): self, request: AgentTurnCreateRequest, ) -> AsyncGenerator: - agent = await self.get_agent(request.agent_id) + agent = await self._get_agent_impl(request.agent_id) async for event in agent.create_and_execute_turn(request): yield event @@ -188,12 +191,12 @@ class MetaReferenceAgentsImpl(Agents): self, request: AgentTurnResumeRequest, ) -> AsyncGenerator: - agent = await self.get_agent(request.agent_id) + agent = await self._get_agent_impl(request.agent_id) async for event in agent.resume_turn(request): yield event async def get_agents_turn(self, agent_id: str, session_id: str, turn_id: str) -> Turn: - agent = await self.get_agent(agent_id) + agent = await self._get_agent_impl(agent_id) turn = await agent.storage.get_session_turn(session_id, turn_id) return turn @@ -210,7 +213,7 @@ class MetaReferenceAgentsImpl(Agents): session_id: str, turn_ids: Optional[List[str]] = None, ) -> Session: - agent = await self.get_agent(agent_id) + agent = await self._get_agent_impl(agent_id) session_info = await agent.storage.get_session_info(session_id) if session_info is None: raise ValueError(f"Session {session_id} not found") @@ -232,3 +235,15 @@ class MetaReferenceAgentsImpl(Agents): async def shutdown(self) -> None: pass + + async def list_agents(self) -> ListAgentsResponse: + pass + + async def get_agent(self, agent_id: str) -> Agent: + pass + + async def list_agent_sessions( + self, + agent_id: str, + ) -> ListAgentSessionsResponse: + pass diff --git a/llama_stack/providers/inline/datasetio/localfs/__init__.py b/llama_stack/providers/inline/datasetio/localfs/__init__.py index db8aa555c..5a0876d79 100644 --- a/llama_stack/providers/inline/datasetio/localfs/__init__.py +++ b/llama_stack/providers/inline/datasetio/localfs/__init__.py @@ -4,12 +4,14 @@ # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. +from typing import Any, Dict + from .config import LocalFSDatasetIOConfig async def get_provider_impl( config: LocalFSDatasetIOConfig, - _deps, + _deps: Dict[str, Any], ): from .datasetio import LocalFSDatasetIOImpl diff --git a/llama_stack/providers/inline/eval/meta_reference/__init__.py b/llama_stack/providers/inline/eval/meta_reference/__init__.py index 56c115322..e2a7fc2cd 100644 --- a/llama_stack/providers/inline/eval/meta_reference/__init__.py +++ b/llama_stack/providers/inline/eval/meta_reference/__init__.py @@ -3,16 +3,16 @@ # # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. -from typing import Dict +from typing import Any, Dict -from llama_stack.distribution.datatypes import Api, ProviderSpec +from llama_stack.distribution.datatypes import Api from .config import MetaReferenceEvalConfig async def get_provider_impl( config: MetaReferenceEvalConfig, - deps: Dict[Api, ProviderSpec], + deps: Dict[Api, Any], ): from .eval import MetaReferenceEvalImpl diff --git a/llama_stack/providers/inline/inference/meta_reference/__init__.py b/llama_stack/providers/inline/inference/meta_reference/__init__.py index 9c923490d..3ef7cfd45 100644 --- a/llama_stack/providers/inline/inference/meta_reference/__init__.py +++ b/llama_stack/providers/inline/inference/meta_reference/__init__.py @@ -4,14 +4,14 @@ # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. -from typing import Union +from typing import Any, Dict, Union from .config import MetaReferenceInferenceConfig, MetaReferenceQuantizedInferenceConfig async def get_provider_impl( config: Union[MetaReferenceInferenceConfig, MetaReferenceQuantizedInferenceConfig], - _deps, + _deps: Dict[str, Any], ): from .inference import MetaReferenceInferenceImpl diff --git a/llama_stack/providers/inline/inference/sentence_transformers/__init__.py b/llama_stack/providers/inline/inference/sentence_transformers/__init__.py index d5710f7fd..c1d65d10c 100644 --- a/llama_stack/providers/inline/inference/sentence_transformers/__init__.py +++ b/llama_stack/providers/inline/inference/sentence_transformers/__init__.py @@ -4,6 +4,8 @@ # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. +from typing import Any, Dict + from llama_stack.providers.inline.inference.sentence_transformers.config import ( SentenceTransformersInferenceConfig, ) @@ -11,7 +13,7 @@ from llama_stack.providers.inline.inference.sentence_transformers.config import async def get_provider_impl( config: SentenceTransformersInferenceConfig, - _deps, + _deps: Dict[str, Any], ): from .sentence_transformers import SentenceTransformersInferenceImpl diff --git a/llama_stack/providers/inline/inference/vllm/__init__.py b/llama_stack/providers/inline/inference/vllm/__init__.py index aa0c4b101..bd0551e57 100644 --- a/llama_stack/providers/inline/inference/vllm/__init__.py +++ b/llama_stack/providers/inline/inference/vllm/__init__.py @@ -4,12 +4,12 @@ # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. -from typing import Any +from typing import Any, Dict from .config import VLLMConfig -async def get_provider_impl(config: VLLMConfig, _deps) -> Any: +async def get_provider_impl(config: VLLMConfig, _deps: Dict[str, Any]): from .vllm import VLLMInferenceImpl impl = VLLMInferenceImpl(config) diff --git a/llama_stack/providers/inline/post_training/torchtune/__init__.py b/llama_stack/providers/inline/post_training/torchtune/__init__.py index 7ef8eee01..ca7801be7 100644 --- a/llama_stack/providers/inline/post_training/torchtune/__init__.py +++ b/llama_stack/providers/inline/post_training/torchtune/__init__.py @@ -4,9 +4,9 @@ # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. -from typing import Dict +from typing import Any, Dict -from llama_stack.distribution.datatypes import Api, ProviderSpec +from llama_stack.distribution.datatypes import Api from .config import TorchtunePostTrainingConfig @@ -15,7 +15,7 @@ from .config import TorchtunePostTrainingConfig async def get_provider_impl( config: TorchtunePostTrainingConfig, - deps: Dict[Api, ProviderSpec], + deps: Dict[Api, Any], ): from .post_training import TorchtunePostTrainingImpl diff --git a/llama_stack/providers/inline/post_training/torchtune/post_training.py b/llama_stack/providers/inline/post_training/torchtune/post_training.py index b837362d7..3a1affc91 100644 --- a/llama_stack/providers/inline/post_training/torchtune/post_training.py +++ b/llama_stack/providers/inline/post_training/torchtune/post_training.py @@ -43,6 +43,9 @@ class TorchtunePostTrainingImpl: self.jobs = {} self.checkpoints_dict = {} + async def shutdown(self): + pass + async def supervised_fine_tune( self, job_uuid: str, diff --git a/llama_stack/providers/inline/safety/code_scanner/__init__.py b/llama_stack/providers/inline/safety/code_scanner/__init__.py index 031130cb7..62975a963 100644 --- a/llama_stack/providers/inline/safety/code_scanner/__init__.py +++ b/llama_stack/providers/inline/safety/code_scanner/__init__.py @@ -4,10 +4,12 @@ # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. +from typing import Any, Dict + from .config import CodeScannerConfig -async def get_provider_impl(config: CodeScannerConfig, deps): +async def get_provider_impl(config: CodeScannerConfig, deps: Dict[str, Any]): from .code_scanner import MetaReferenceCodeScannerSafetyImpl impl = MetaReferenceCodeScannerSafetyImpl(config, deps) diff --git a/llama_stack/providers/inline/safety/llama_guard/__init__.py b/llama_stack/providers/inline/safety/llama_guard/__init__.py index ee9ee31e6..a4263b169 100644 --- a/llama_stack/providers/inline/safety/llama_guard/__init__.py +++ b/llama_stack/providers/inline/safety/llama_guard/__init__.py @@ -4,10 +4,12 @@ # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. +from typing import Any, Dict + from .config import LlamaGuardConfig -async def get_provider_impl(config: LlamaGuardConfig, deps): +async def get_provider_impl(config: LlamaGuardConfig, deps: Dict[str, Any]): from .llama_guard import LlamaGuardSafetyImpl assert isinstance(config, LlamaGuardConfig), f"Unexpected config type: {type(config)}" diff --git a/llama_stack/providers/inline/safety/prompt_guard/__init__.py b/llama_stack/providers/inline/safety/prompt_guard/__init__.py index 087aca6d9..747f34421 100644 --- a/llama_stack/providers/inline/safety/prompt_guard/__init__.py +++ b/llama_stack/providers/inline/safety/prompt_guard/__init__.py @@ -4,10 +4,12 @@ # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. +from typing import Any, Dict + from .config import PromptGuardConfig # noqa: F401 -async def get_provider_impl(config: PromptGuardConfig, deps): +async def get_provider_impl(config: PromptGuardConfig, deps: Dict[str, Any]): from .prompt_guard import PromptGuardSafetyImpl impl = PromptGuardSafetyImpl(config, deps) diff --git a/llama_stack/providers/inline/scoring/basic/__init__.py b/llama_stack/providers/inline/scoring/basic/__init__.py index c72434e9e..4898b973a 100644 --- a/llama_stack/providers/inline/scoring/basic/__init__.py +++ b/llama_stack/providers/inline/scoring/basic/__init__.py @@ -3,16 +3,16 @@ # # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. -from typing import Dict +from typing import Any, Dict -from llama_stack.distribution.datatypes import Api, ProviderSpec +from llama_stack.distribution.datatypes import Api from .config import BasicScoringConfig async def get_provider_impl( config: BasicScoringConfig, - deps: Dict[Api, ProviderSpec], + deps: Dict[Api, Any], ): from .scoring import BasicScoringImpl diff --git a/llama_stack/providers/inline/scoring/braintrust/__init__.py b/llama_stack/providers/inline/scoring/braintrust/__init__.py index 2ddc58bd2..f1b0112d9 100644 --- a/llama_stack/providers/inline/scoring/braintrust/__init__.py +++ b/llama_stack/providers/inline/scoring/braintrust/__init__.py @@ -3,11 +3,11 @@ # # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. -from typing import Dict +from typing import Any, Dict from pydantic import BaseModel -from llama_stack.distribution.datatypes import Api, ProviderSpec +from llama_stack.distribution.datatypes import Api from .config import BraintrustScoringConfig @@ -18,7 +18,7 @@ class BraintrustProviderDataValidator(BaseModel): async def get_provider_impl( config: BraintrustScoringConfig, - deps: Dict[Api, ProviderSpec], + deps: Dict[Api, Any], ): from .braintrust import BraintrustScoringImpl diff --git a/llama_stack/providers/inline/scoring/llm_as_judge/__init__.py b/llama_stack/providers/inline/scoring/llm_as_judge/__init__.py index 18535332e..4a83bfe13 100644 --- a/llama_stack/providers/inline/scoring/llm_as_judge/__init__.py +++ b/llama_stack/providers/inline/scoring/llm_as_judge/__init__.py @@ -3,16 +3,16 @@ # # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. -from typing import Dict +from typing import Any, Dict -from llama_stack.distribution.datatypes import Api, ProviderSpec +from llama_stack.distribution.datatypes import Api from .config import LlmAsJudgeScoringConfig async def get_provider_impl( config: LlmAsJudgeScoringConfig, - deps: Dict[Api, ProviderSpec], + deps: Dict[Api, Any], ): from .scoring import LlmAsJudgeScoringImpl diff --git a/llama_stack/providers/inline/tool_runtime/code_interpreter/__init__.py b/llama_stack/providers/inline/tool_runtime/code_interpreter/__init__.py index 995358d46..8317ce793 100644 --- a/llama_stack/providers/inline/tool_runtime/code_interpreter/__init__.py +++ b/llama_stack/providers/inline/tool_runtime/code_interpreter/__init__.py @@ -4,12 +4,14 @@ # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. +from typing import Any, Dict + from .config import CodeInterpreterToolConfig __all__ = ["CodeInterpreterToolConfig", "CodeInterpreterToolRuntimeImpl"] -async def get_provider_impl(config: CodeInterpreterToolConfig, _deps): +async def get_provider_impl(config: CodeInterpreterToolConfig, _deps: Dict[str, Any]): from .code_interpreter import CodeInterpreterToolRuntimeImpl impl = CodeInterpreterToolRuntimeImpl(config) diff --git a/llama_stack/providers/inline/vector_io/chroma/__init__.py b/llama_stack/providers/inline/vector_io/chroma/__init__.py index abaf01097..f39188b46 100644 --- a/llama_stack/providers/inline/vector_io/chroma/__init__.py +++ b/llama_stack/providers/inline/vector_io/chroma/__init__.py @@ -4,14 +4,14 @@ # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. -from typing import Dict +from typing import Any, Dict -from llama_stack.providers.datatypes import Api, ProviderSpec +from llama_stack.providers.datatypes import Api from .config import ChromaVectorIOConfig -async def get_provider_impl(config: ChromaVectorIOConfig, deps: Dict[Api, ProviderSpec]): +async def get_provider_impl(config: ChromaVectorIOConfig, deps: Dict[Api, Any]): from llama_stack.providers.remote.vector_io.chroma.chroma import ( ChromaVectorIOAdapter, ) diff --git a/llama_stack/providers/inline/vector_io/faiss/__init__.py b/llama_stack/providers/inline/vector_io/faiss/__init__.py index f23e1fa4f..fc8ce70b4 100644 --- a/llama_stack/providers/inline/vector_io/faiss/__init__.py +++ b/llama_stack/providers/inline/vector_io/faiss/__init__.py @@ -4,14 +4,14 @@ # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. -from typing import Dict +from typing import Any, Dict -from llama_stack.providers.datatypes import Api, ProviderSpec +from llama_stack.providers.datatypes import Api from .config import FaissVectorIOConfig -async def get_provider_impl(config: FaissVectorIOConfig, deps: Dict[Api, ProviderSpec]): +async def get_provider_impl(config: FaissVectorIOConfig, deps: Dict[Api, Any]): from .faiss import FaissVectorIOAdapter assert isinstance(config, FaissVectorIOConfig), f"Unexpected config type: {type(config)}" diff --git a/llama_stack/providers/inline/vector_io/milvus/__init__.py b/llama_stack/providers/inline/vector_io/milvus/__init__.py index bee6b2ded..d88a3b005 100644 --- a/llama_stack/providers/inline/vector_io/milvus/__init__.py +++ b/llama_stack/providers/inline/vector_io/milvus/__init__.py @@ -4,14 +4,14 @@ # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. -from typing import Dict +from typing import Any, Dict -from llama_stack.providers.datatypes import Api, ProviderSpec +from llama_stack.providers.datatypes import Api from .config import MilvusVectorIOConfig -async def get_provider_impl(config: MilvusVectorIOConfig, deps: Dict[Api, ProviderSpec]): +async def get_provider_impl(config: MilvusVectorIOConfig, deps: Dict[Api, Any]): from llama_stack.providers.remote.vector_io.milvus.milvus import MilvusVectorIOAdapter impl = MilvusVectorIOAdapter(config, deps[Api.inference]) diff --git a/llama_stack/providers/inline/vector_io/sqlite_vec/__init__.py b/llama_stack/providers/inline/vector_io/sqlite_vec/__init__.py index 5a2f07012..2380eb0ef 100644 --- a/llama_stack/providers/inline/vector_io/sqlite_vec/__init__.py +++ b/llama_stack/providers/inline/vector_io/sqlite_vec/__init__.py @@ -4,14 +4,14 @@ # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. -from typing import Dict +from typing import Any, Dict -from llama_stack.providers.datatypes import Api, ProviderSpec +from llama_stack.providers.datatypes import Api from .config import SQLiteVectorIOConfig -async def get_provider_impl(config: SQLiteVectorIOConfig, deps: Dict[Api, ProviderSpec]): +async def get_provider_impl(config: SQLiteVectorIOConfig, deps: Dict[Api, Any]): from .sqlite_vec import SQLiteVecVectorIOAdapter assert isinstance(config, SQLiteVectorIOConfig), f"Unexpected config type: {type(config)}" diff --git a/llama_stack/providers/remote/inference/fireworks/models.py b/llama_stack/providers/remote/inference/fireworks/models.py index c90f632ff..a0dc11768 100644 --- a/llama_stack/providers/remote/inference/fireworks/models.py +++ b/llama_stack/providers/remote/inference/fireworks/models.py @@ -24,10 +24,6 @@ MODEL_ENTRIES = [ "accounts/fireworks/models/llama-v3p1-405b-instruct", CoreModelId.llama3_1_405b_instruct.value, ), - build_hf_repo_model_entry( - "accounts/fireworks/models/llama-v3p2-1b-instruct", - CoreModelId.llama3_2_1b_instruct.value, - ), build_hf_repo_model_entry( "accounts/fireworks/models/llama-v3p2-3b-instruct", CoreModelId.llama3_2_3b_instruct.value, diff --git a/llama_stack/templates/ci-tests/run.yaml b/llama_stack/templates/ci-tests/run.yaml index 237bbf0c9..350bc9bd1 100644 --- a/llama_stack/templates/ci-tests/run.yaml +++ b/llama_stack/templates/ci-tests/run.yaml @@ -128,16 +128,6 @@ models: provider_id: fireworks provider_model_id: accounts/fireworks/models/llama-v3p1-405b-instruct model_type: llm -- metadata: {} - model_id: accounts/fireworks/models/llama-v3p2-1b-instruct - provider_id: fireworks - provider_model_id: accounts/fireworks/models/llama-v3p2-1b-instruct - model_type: llm -- metadata: {} - model_id: meta-llama/Llama-3.2-1B-Instruct - provider_id: fireworks - provider_model_id: accounts/fireworks/models/llama-v3p2-1b-instruct - model_type: llm - metadata: {} model_id: accounts/fireworks/models/llama-v3p2-3b-instruct provider_id: fireworks diff --git a/llama_stack/templates/dev/run.yaml b/llama_stack/templates/dev/run.yaml index 8346c0a43..fb5c7126e 100644 --- a/llama_stack/templates/dev/run.yaml +++ b/llama_stack/templates/dev/run.yaml @@ -186,16 +186,6 @@ models: provider_id: fireworks provider_model_id: accounts/fireworks/models/llama-v3p1-405b-instruct model_type: llm -- metadata: {} - model_id: accounts/fireworks/models/llama-v3p2-1b-instruct - provider_id: fireworks - provider_model_id: accounts/fireworks/models/llama-v3p2-1b-instruct - model_type: llm -- metadata: {} - model_id: meta-llama/Llama-3.2-1B-Instruct - provider_id: fireworks - provider_model_id: accounts/fireworks/models/llama-v3p2-1b-instruct - model_type: llm - metadata: {} model_id: accounts/fireworks/models/llama-v3p2-3b-instruct provider_id: fireworks diff --git a/llama_stack/templates/fireworks/run-with-safety.yaml b/llama_stack/templates/fireworks/run-with-safety.yaml index df4b9126c..071375788 100644 --- a/llama_stack/templates/fireworks/run-with-safety.yaml +++ b/llama_stack/templates/fireworks/run-with-safety.yaml @@ -140,16 +140,6 @@ models: provider_id: fireworks provider_model_id: accounts/fireworks/models/llama-v3p1-405b-instruct model_type: llm -- metadata: {} - model_id: accounts/fireworks/models/llama-v3p2-1b-instruct - provider_id: fireworks - provider_model_id: accounts/fireworks/models/llama-v3p2-1b-instruct - model_type: llm -- metadata: {} - model_id: meta-llama/Llama-3.2-1B-Instruct - provider_id: fireworks - provider_model_id: accounts/fireworks/models/llama-v3p2-1b-instruct - model_type: llm - metadata: {} model_id: accounts/fireworks/models/llama-v3p2-3b-instruct provider_id: fireworks diff --git a/llama_stack/templates/fireworks/run.yaml b/llama_stack/templates/fireworks/run.yaml index d5429c27f..adc9c7344 100644 --- a/llama_stack/templates/fireworks/run.yaml +++ b/llama_stack/templates/fireworks/run.yaml @@ -134,16 +134,6 @@ models: provider_id: fireworks provider_model_id: accounts/fireworks/models/llama-v3p1-405b-instruct model_type: llm -- metadata: {} - model_id: accounts/fireworks/models/llama-v3p2-1b-instruct - provider_id: fireworks - provider_model_id: accounts/fireworks/models/llama-v3p2-1b-instruct - model_type: llm -- metadata: {} - model_id: meta-llama/Llama-3.2-1B-Instruct - provider_id: fireworks - provider_model_id: accounts/fireworks/models/llama-v3p2-1b-instruct - model_type: llm - metadata: {} model_id: accounts/fireworks/models/llama-v3p2-3b-instruct provider_id: fireworks