Merge branch 'refs/heads/main' into preprocessors

2025-08-06 18:50:44 +00:00 · 2025-03-11 20:05:52 +01:00 · 2025-03-11 20:05:52 +01:00 · d38aea33c1
commit d38aea33c1
parent a714bbac9d 43044f29e2
37 changed files with 493 additions and 255 deletions
--- a/.github/workflows/unit-tests.yml
+++ b/.github/workflows/unit-tests.yml
@ -14,16 +14,16 @@ jobs:
      - name: Set up Python
        uses: actions/setup-python@v5
        with:
-          python-version: '3.10.16'
+          python-version: '3.10'
      - uses: astral-sh/setup-uv@v5
        with:
-          python-version: '3.10.16'
+          python-version: '3.10'
          enable-cache: false
      - name: Run unit tests
        run: |
-          uv run -p 3.10.16 --with-editable . --with-editable ".[dev]" --with-editable ".[unit]" pytest --cov=llama_stack -s -v tests/unit/ --junitxml=pytest-report.xml
+          uv run -p 3.10 --with-editable . --with-editable ".[dev]" --with-editable ".[unit]" pytest --cov=llama_stack -s -v tests/unit/ --junitxml=pytest-report.xml
      - name: Upload test results
        if: always()
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@ -159,8 +159,7 @@ uv run sphinx-autobuild source build/html --write-all
 If you modify or add new API endpoints, update the API documentation accordingly. You can do this by running the following command:
 ```bash
-uv sync --extra dev
+uv run --with ".[dev]" ./docs/openapi_generator/run_openapi_generator.sh
 uv run ./docs/openapi_generator/run_openapi_generator.sh
 ```
 The generated API documentation will be available in `docs/_static/`. Make sure to review the changes before committing.
--- a/docs/_static/llama-stack-spec.html
+++ b/docs/_static/llama-stack-spec.html
@ -363,6 +363,37 @@
            }
        },
        "/v1/agents": {
            "get": {
                "responses": {
                    "200": {
                        "description": "A ListAgentsResponse.",
                        "content": {
                            "application/json": {
                                "schema": {
                                    "$ref": "#/components/schemas/ListAgentsResponse"
                                }
                            }
                        }
                    },
                    "400": {
                        "$ref": "#/components/responses/BadRequest400"
                    },
                    "429": {
                        "$ref": "#/components/responses/TooManyRequests429"
                    },
                    "500": {
                        "$ref": "#/components/responses/InternalServerError500"
                    },
                    "default": {
                        "$ref": "#/components/responses/DefaultError"
                    }
                },
                "tags": [
                    "Agents"
                ],
                "description": "List all agents.",
                "parameters": []
            },
            "post": {
                "responses": {
                    "200": {
@ -609,6 +640,47 @@
            }
        },
        "/v1/agents/{agent_id}": {
            "get": {
                "responses": {
                    "200": {
                        "description": "An Agent of the agent.",
                        "content": {
                            "application/json": {
                                "schema": {
                                    "$ref": "#/components/schemas/Agent"
                                }
                            }
                        }
                    },
                    "400": {
                        "$ref": "#/components/responses/BadRequest400"
                    },
                    "429": {
                        "$ref": "#/components/responses/TooManyRequests429"
                    },
                    "500": {
                        "$ref": "#/components/responses/InternalServerError500"
                    },
                    "default": {
                        "$ref": "#/components/responses/DefaultError"
                    }
                },
                "tags": [
                    "Agents"
                ],
                "description": "Describe an agent by its ID.",
                "parameters": [
                    {
                        "name": "agent_id",
                        "in": "path",
                        "description": "ID of the agent.",
                        "required": true,
                        "schema": {
                            "type": "string"
                        }
                    }
                ]
            },
            "delete": {
                "responses": {
                    "200": {
@ -2358,6 +2430,49 @@
                ]
            }
        },
        "/v1/agents/{agent_id}/sessions": {
            "get": {
                "responses": {
                    "200": {
                        "description": "A ListAgentSessionsResponse.",
                        "content": {
                            "application/json": {
                                "schema": {
                                    "$ref": "#/components/schemas/ListAgentSessionsResponse"
                                }
                            }
                        }
                    },
                    "400": {
                        "$ref": "#/components/responses/BadRequest400"
                    },
                    "429": {
                        "$ref": "#/components/responses/TooManyRequests429"
                    },
                    "500": {
                        "$ref": "#/components/responses/InternalServerError500"
                    },
                    "default": {
                        "$ref": "#/components/responses/DefaultError"
                    }
                },
                "tags": [
                    "Agents"
                ],
                "description": "List all session(s) of a given agent.",
                "parameters": [
                    {
                        "name": "agent_id",
                        "in": "path",
                        "description": "The ID of the agent to list sessions for.",
                        "required": true,
                        "schema": {
                            "type": "string"
                        }
                    }
                ]
            }
        },
        "/v1/eval/benchmarks": {
            "get": {
                "responses": {
@ -6776,6 +6891,28 @@
                "title": "ScoringResult",
                "description": "A scoring result for a single row."
            },
            "Agent": {
                "type": "object",
                "properties": {
                    "agent_id": {
                        "type": "string"
                    },
                    "agent_config": {
                        "$ref": "#/components/schemas/AgentConfig"
                    },
                    "created_at": {
                        "type": "string",
                        "format": "date-time"
                    }
                },
                "additionalProperties": false,
                "required": [
                    "agent_id",
                    "agent_config",
                    "created_at"
                ],
                "title": "Agent"
            },
            "Session": {
                "type": "object",
                "properties": {
@ -8214,6 +8351,38 @@
                ],
                "title": "ToolInvocationResult"
            },
            "ListAgentSessionsResponse": {
                "type": "object",
                "properties": {
                    "data": {
                        "type": "array",
                        "items": {
                            "$ref": "#/components/schemas/Session"
                        }
                    }
                },
                "additionalProperties": false,
                "required": [
                    "data"
                ],
                "title": "ListAgentSessionsResponse"
            },
            "ListAgentsResponse": {
                "type": "object",
                "properties": {
                    "data": {
                        "type": "array",
                        "items": {
                            "$ref": "#/components/schemas/Agent"
                        }
                    }
                },
                "additionalProperties": false,
                "required": [
                    "data"
                ],
                "title": "ListAgentsResponse"
            },
            "BucketResponse": {
                "type": "object",
                "properties": {
--- a/docs/_static/llama-stack-spec.yaml
+++ b/docs/_static/llama-stack-spec.yaml
@ -238,6 +238,28 @@ paths:
              $ref: '#/components/schemas/CompletionRequest'
        required: true
  /v1/agents:
    get:
      responses:
        '200':
          description: A ListAgentsResponse.
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/ListAgentsResponse'
        '400':
          $ref: '#/components/responses/BadRequest400'
        '429':
          $ref: >-
            #/components/responses/TooManyRequests429
        '500':
          $ref: >-
            #/components/responses/InternalServerError500
        default:
          $ref: '#/components/responses/DefaultError'
      tags:
        - Agents
      description: List all agents.
      parameters: []
    post:
      responses:
        '200':
@ -410,6 +432,34 @@ paths:
              $ref: '#/components/schemas/CreateUploadSessionRequest'
        required: true
  /v1/agents/{agent_id}:
    get:
      responses:
        '200':
          description: An Agent of the agent.
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/Agent'
        '400':
          $ref: '#/components/responses/BadRequest400'
        '429':
          $ref: >-
            #/components/responses/TooManyRequests429
        '500':
          $ref: >-
            #/components/responses/InternalServerError500
        default:
          $ref: '#/components/responses/DefaultError'
      tags:
        - Agents
      description: Describe an agent by its ID.
      parameters:
        - name: agent_id
          in: path
          description: ID of the agent.
          required: true
          schema:
            type: string
    delete:
      responses:
        '200':
@ -1581,6 +1631,36 @@ paths:
          required: true
          schema:
            type: string
  /v1/agents/{agent_id}/sessions:
    get:
      responses:
        '200':
          description: A ListAgentSessionsResponse.
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/ListAgentSessionsResponse'
        '400':
          $ref: '#/components/responses/BadRequest400'
        '429':
          $ref: >-
            #/components/responses/TooManyRequests429
        '500':
          $ref: >-
            #/components/responses/InternalServerError500
        default:
          $ref: '#/components/responses/DefaultError'
      tags:
        - Agents
      description: List all session(s) of a given agent.
      parameters:
        - name: agent_id
          in: path
          description: >-
            The ID of the agent to list sessions for.
          required: true
          schema:
            type: string
  /v1/eval/benchmarks:
    get:
      responses:
@ -4690,6 +4770,22 @@ components:
        - aggregated_results
      title: ScoringResult
      description: A scoring result for a single row.
    Agent:
      type: object
      properties:
        agent_id:
          type: string
        agent_config:
          $ref: '#/components/schemas/AgentConfig'
        created_at:
          type: string
          format: date-time
      additionalProperties: false
      required:
        - agent_id
        - agent_config
        - created_at
      title: Agent
    Session:
      type: object
      properties:
@ -5579,6 +5675,28 @@ components:
      required:
        - content
      title: ToolInvocationResult
    ListAgentSessionsResponse:
      type: object
      properties:
        data:
          type: array
          items:
            $ref: '#/components/schemas/Session'
      additionalProperties: false
      required:
        - data
      title: ListAgentSessionsResponse
    ListAgentsResponse:
      type: object
      properties:
        data:
          type: array
          items:
            $ref: '#/components/schemas/Agent'
      additionalProperties: false
      required:
        - data
      title: ListAgentsResponse
    BucketResponse:
      type: object
      properties:
--- a/docs/openapi_generator/README.md
+++ b/docs/openapi_generator/README.md
@ -1,9 +1 @@
 The RFC Specification (OpenAPI format) is generated from the set of API endpoints located in `llama_stack/distribution/server/endpoints.py` using the `generate.py` utility.
 Please install the following packages before running the script:
 ```
 pip install fire PyYAML
 ```
 Then simply run `sh run_openapi_generator.sh`
--- a/docs/source/distributions/building_distro.md
+++ b/docs/source/distributions/building_distro.md
@ -33,6 +33,8 @@ Can be set to any of the following log levels:
 The default global log level is `info`. `all` sets the log level for all components.
 A user can also set `LLAMA_STACK_LOG_FILE` which will pipe the logs to the specified path as well as to the terminal. An example would be: `export LLAMA_STACK_LOG_FILE=server.log`
 ### Llama Stack Build
 In order to build your own distribution, we recommend you clone the `llama-stack` repository.
--- a/docs/source/distributions/self_hosted_distro/fireworks.md
+++ b/docs/source/distributions/self_hosted_distro/fireworks.md
@ -41,7 +41,6 @@ The following models are available by default:
 - `accounts/fireworks/models/llama-v3p1-8b-instruct (aliases: meta-llama/Llama-3.1-8B-Instruct)`
 - `accounts/fireworks/models/llama-v3p1-70b-instruct (aliases: meta-llama/Llama-3.1-70B-Instruct)`
 - `accounts/fireworks/models/llama-v3p1-405b-instruct (aliases: meta-llama/Llama-3.1-405B-Instruct-FP8)`
 - `accounts/fireworks/models/llama-v3p2-1b-instruct (aliases: meta-llama/Llama-3.2-1B-Instruct)`
 - `accounts/fireworks/models/llama-v3p2-3b-instruct (aliases: meta-llama/Llama-3.2-3B-Instruct)`
 - `accounts/fireworks/models/llama-v3p2-11b-vision-instruct (aliases: meta-llama/Llama-3.2-11B-Vision-Instruct)`
 - `accounts/fireworks/models/llama-v3p2-90b-vision-instruct (aliases: meta-llama/Llama-3.2-90B-Vision-Instruct)`
--- a/docs/source/references/llama_cli_reference/index.md
+++ b/docs/source/references/llama_cli_reference/index.md
@ -1,6 +1,6 @@
 # llama (server-side) CLI Reference
-The `llama` CLI tool helps you setup and use the Llama Stack. It should be available on your path after installing the `llama-stack` package.
+The `llama` CLI tool helps you set up and use the Llama Stack. The CLI is available on your path after installing the `llama-stack` package.
 ## Installation
@ -27,9 +27,9 @@ You have two ways to install Llama Stack:
 ## `llama` subcommands
-1. `download`: `llama` cli tools supports downloading the model from Meta or Hugging Face.
+1. `download`: Supports downloading models from Meta or Hugging Face.  [Downloading models](#downloading-models)
-2. `model`: Lists available models and their properties.
+2. `model`: Lists available models and their properties. [Understanding models](#understand-the-models)
-3. `stack`: Allows you to build and run a Llama Stack server. You can read more about this [here](../../distributions/building_distro).
+3. `stack`: Allows you to build a stack using the `llama stack` distribution and run a Llama Stack server. You can read more about how to build a Llama Stack distribution in the [Build your own Distribution](../../distributions/building_distro) documentation.
 ### Sample Usage
@ -117,7 +117,7 @@ You should see a table like this:
 +----------------------------------+------------------------------------------+----------------+
 ```
-To download models, you can use the llama download command.
+To download models, you can use the `llama download` command.
 ### Downloading from [Meta](https://llama.meta.com/llama-downloads/)
@ -191,7 +191,7 @@ You should see a table like this:
 The `llama model` command helps you explore the model’s interface.
 1. `download`: Download the model from different sources. (meta, huggingface)
-2. `list`: Lists all the models available for download with hardware requirements to deploy the models.
+2. `list`: Lists all the models available for download with hardware requirements for deploying the models.
 3. `prompt-format`: Show llama model message formats.
 4. `describe`: Describes all the properties of the model.
@ -262,13 +262,12 @@ llama model prompt-format -m Llama3.2-3B-Instruct
 ![alt text](../../../resources/prompt-format.png)
 You will be shown a Markdown formatted description of the model interface and how prompts / messages are formatted for various scenarios.
 **NOTE**: Outputs in terminal are color printed to show special tokens.
 ### Remove model
-You can run `llama model remove` to remove unecessary model:
+You can run `llama model remove` to remove an unnecessary model:
 ```
 llama model remove -m Llama-Guard-3-8B-int8
--- a/llama_stack/apis/agents/agents.py
+++ b/llama_stack/apis/agents/agents.py
@ -234,6 +234,23 @@ class AgentConfig(AgentConfigCommon):
    response_format: Optional[ResponseFormat] = None
@json_schema_type
 class Agent(BaseModel):
    agent_id: str
    agent_config: AgentConfig
    created_at: datetime
@json_schema_type
 class ListAgentsResponse(BaseModel):
    data: List[Agent]
@json_schema_type
 class ListAgentSessionsResponse(BaseModel):
    data: List[Session]
 class AgentConfigOverridablePerTurn(AgentConfigCommon):
    instructions: Optional[str] = None
@ -541,3 +558,32 @@ class Agents(Protocol):
        :param agent_id: The ID of the agent to delete.
        """
        ...
    @webmethod(route="/agents", method="GET")
    async def list_agents(self) -> ListAgentsResponse:
        """List all agents.
        :returns: A ListAgentsResponse.
        """
        ...
    @webmethod(route="/agents/{agent_id}", method="GET")
    async def get_agent(self, agent_id: str) -> Agent:
        """Describe an agent by its ID.
        :param agent_id: ID of the agent.
        :returns: An Agent of the agent.
        """
        ...
    @webmethod(route="/agents/{agent_id}/sessions", method="GET")
    async def list_agent_sessions(
        self,
        agent_id: str,
    ) -> ListAgentSessionsResponse:
        """List all session(s) of a given agent.
        :param agent_id: The ID of the agent to list sessions for.
        :returns: A ListAgentSessionsResponse.
        """
        ...
--- a/llama_stack/cli/stack/run.py
+++ b/llama_stack/cli/stack/run.py
@ -56,6 +56,7 @@ class StackRun(Subcommand):
            "--env",
            action="append",
            help="Environment variables to pass to the server in KEY=VALUE format. Can be specified multiple times.",
            default=[],
            metavar="KEY=VALUE",
        )
        self.parser.add_argument(
@ -73,6 +74,7 @@ class StackRun(Subcommand):
            type=str,
            help="Image Type used during the build. This can be either conda or container or venv.",
            choices=["conda", "container", "venv"],
            default="conda",
        )
    def _run_stack_run_cmd(self, args: argparse.Namespace) -> None:
@ -118,42 +120,20 @@ class StackRun(Subcommand):
        except AttributeError as e:
            self.parser.error(f"failed to parse config file '{config_file}':\n {e}")
-        # If neither image type nor image name is provided, assume the server should be run directly
+        run_args = formulate_run_args(args.image_type, args.image_name, config, template_name)
        # using the current environment packages.
        if not args.image_type and not args.image_name:
            logger.info("No image type or image name provided. Assuming environment packages.")
            from llama_stack.distribution.server.server import main as server_main
-            # Build the server args from the current args passed to the CLI
+        run_args.extend([str(config_file), str(args.port)])
-            server_args = argparse.Namespace()
+        if args.disable_ipv6:
-            for arg in vars(args):
+            run_args.append("--disable-ipv6")
                # If this is a function, avoid passing it
                # "args" contains:
                # func=<bound method StackRun._run_stack_run_cmd of <llama_stack.cli.stack.run.StackRun object at 0x10484b010>>
                if callable(getattr(args, arg)):
                    continue
                setattr(server_args, arg, getattr(args, arg))
-            # Run the server
+        for env_var in args.env:
-            server_main(server_args)
+            if "=" not in env_var:
-        else:
+                self.parser.error(f"Environment variable '{env_var}' must be in KEY=VALUE format")
-            run_args = formulate_run_args(args.image_type, args.image_name, config, template_name)
+            key, value = env_var.split("=", 1)  # split on first = only
            if not key:
                self.parser.error(f"Environment variable '{env_var}' has empty key")
            run_args.extend(["--env", f"{key}={value}"])
-            run_args.extend([str(config_file), str(args.port)])
+        if args.tls_keyfile and args.tls_certfile:
-            if args.disable_ipv6:
+            run_args.extend(["--tls-keyfile", args.tls_keyfile, "--tls-certfile", args.tls_certfile])
-                run_args.append("--disable-ipv6")
+        run_with_pty(run_args)
            if args.env:
                for env_var in args.env:
                    if "=" not in env_var:
                        self.parser.error(f"Environment variable '{env_var}' must be in KEY=VALUE format")
                        return
                    key, value = env_var.split("=", 1)  # split on first = only
                    if not key:
                        self.parser.error(f"Environment variable '{env_var}' has empty key")
                        return
                    run_args.extend(["--env", f"{key}={value}"])
            if args.tls_keyfile and args.tls_certfile:
                run_args.extend(["--tls-keyfile", args.tls_keyfile, "--tls-certfile", args.tls_certfile])
            run_with_pty(run_args)
--- a/llama_stack/distribution/server/server.py
+++ b/llama_stack/distribution/server/server.py
@ -6,18 +6,16 @@
 import argparse
 import asyncio
 import functools
 import inspect
 import json
 import os
 import signal
 import sys
 import traceback
 import warnings
 from contextlib import asynccontextmanager
 from importlib.metadata import version as parse_version
 from pathlib import Path
-from typing import Any, List, Optional, Union
+from typing import Any, List, Union
 import yaml
 from fastapi import Body, FastAPI, HTTPException, Request
@ -118,69 +116,24 @@ def translate_exception(exc: Exception) -> Union[HTTPException, RequestValidatio
        )
-def handle_signal(app, signum, _) -> None:
+async def shutdown(app):
    """Initiate a graceful shutdown of the application.
    Handled by the lifespan context manager. The shutdown process involves
    shutting down all implementations registered in the application.
    """
-    Handle incoming signals and initiate a graceful shutdown of the application.
+    for impl in app.__llama_stack_impls__.values():
-
+        impl_name = impl.__class__.__name__
-    This function is intended to be used as a signal handler for various signals
+        logger.info("Shutting down %s", impl_name)
    (e.g., SIGINT, SIGTERM). Upon receiving a signal, it will print a message
    indicating the received signal and initiate a shutdown process.
    Args:
        app: The application instance containing implementations to be shut down.
        signum (int): The signal number received.
        frame: The current stack frame (not used in this function).
    The shutdown process involves:
        - Shutting down all implementations registered in the application.
        - Gathering all running asyncio tasks.
        - Cancelling all gathered tasks.
        - Waiting for all tasks to finish.
        - Stopping the event loop.
    Note:
        This function schedules the shutdown process as an asyncio task and does
        not block the current execution.
    """
    signame = signal.Signals(signum).name
    logger.info(f"Received signal {signame} ({signum}). Exiting gracefully...")
    async def shutdown():
        try:
-            # Gracefully shut down implementations
+            if hasattr(impl, "shutdown"):
-            for impl in app.__llama_stack_impls__.values():
+                await asyncio.wait_for(impl.shutdown(), timeout=5)
-                impl_name = impl.__class__.__name__
+            else:
-                logger.info("Shutting down %s", impl_name)
+                logger.warning("No shutdown method for %s", impl_name)
-                try:
+        except asyncio.TimeoutError:
-                    if hasattr(impl, "shutdown"):
+            logger.exception("Shutdown timeout for %s ", impl_name, exc_info=True)
-                        await asyncio.wait_for(impl.shutdown(), timeout=5)
+        except (Exception, asyncio.CancelledError) as e:
-                    else:
+            logger.exception("Failed to shutdown %s: %s", impl_name, {e})
                        logger.warning("No shutdown method for %s", impl_name)
                except asyncio.TimeoutError:
                    logger.exception("Shutdown timeout for %s ", impl_name, exc_info=True)
                except Exception as e:
                    logger.exception("Failed to shutdown %s: %s", impl_name, {e})
            # Gather all running tasks
            loop = asyncio.get_running_loop()
            tasks = [task for task in asyncio.all_tasks(loop) if task is not asyncio.current_task()]
            # Cancel all tasks
            for task in tasks:
                task.cancel()
            # Wait for all tasks to finish
            try:
                await asyncio.wait_for(asyncio.gather(*tasks, return_exceptions=True), timeout=10)
            except asyncio.TimeoutError:
                logger.exception("Timeout while waiting for tasks to finish")
        except asyncio.CancelledError:
            pass
        finally:
            loop.stop()
    loop = asyncio.get_running_loop()
    loop.create_task(shutdown())
@asynccontextmanager
@ -188,8 +141,7 @@ async def lifespan(app: FastAPI):
    logger.info("Starting up")
    yield
    logger.info("Shutting down")
-    for impl in app.__llama_stack_impls__.values():
+    await shutdown(app)
        await impl.shutdown()
 def is_streaming_request(func_name: str, request: Request, **kwargs):
@ -266,7 +218,7 @@ class TracingMiddleware:
        self.app = app
    async def __call__(self, scope, receive, send):
-        path = scope["path"]
+        path = scope.get("path", "")
        await start_trace(path, {"__location__": "server"})
        try:
            return await self.app(scope, receive, send)
@ -314,17 +266,11 @@ class ClientVersionMiddleware:
        return await self.app(scope, receive, send)
-def main(args: Optional[argparse.Namespace] = None):
+def main():
    """Start the LlamaStack server."""
    parser = argparse.ArgumentParser(description="Start the LlamaStack server.")
    parser.add_argument(
        "--yaml-config",
        dest="config",
        help="(Deprecated) Path to YAML configuration file - use --config instead",
    )
    parser.add_argument(
        "--config",
        dest="config",
        help="Path to YAML configuration file",
    )
    parser.add_argument(
@ -354,19 +300,7 @@ def main(args: Optional[argparse.Namespace] = None):
        required="--tls-keyfile" in sys.argv,
    )
-    # Determine whether the server args are being passed by the "run" command, if this is the case
+    args = parser.parse_args()
    # the args will be passed as a Namespace object to the main function, otherwise they will be
    # parsed from the command line
    if args is None:
        args = parser.parse_args()
    # Check for deprecated argument usage
    if "--yaml-config" in sys.argv:
        warnings.warn(
            "The '--yaml-config' argument is deprecated and will be removed in a future version. Use '--config' instead.",
            DeprecationWarning,
            stacklevel=2,
        )
    if args.env:
        for env_pair in args.env:
@ -378,9 +312,9 @@ def main(args: Optional[argparse.Namespace] = None):
                logger.error(f"Error: {str(e)}")
                sys.exit(1)
-    if args.config:
+    if args.yaml_config:
        # if the user provided a config file, use it, even if template was specified
-        config_file = Path(args.config)
+        config_file = Path(args.yaml_config)
        if not config_file.exists():
            raise ValueError(f"Config file {config_file} does not exist")
        logger.info(f"Using config file: {config_file}")
@ -457,8 +391,6 @@ def main(args: Optional[argparse.Namespace] = None):
    app.exception_handler(RequestValidationError)(global_exception_handler)
    app.exception_handler(Exception)(global_exception_handler)
    signal.signal(signal.SIGINT, functools.partial(handle_signal, app))
    signal.signal(signal.SIGTERM, functools.partial(handle_signal, app))
    app.__llama_stack_impls__ = impls
@ -489,6 +421,7 @@ def main(args: Optional[argparse.Namespace] = None):
        "app": app,
        "host": listen_host,
        "port": port,
        "lifespan": "on",
    }
    if ssl_config:
        uvicorn_config.update(ssl_config)
--- a/llama_stack/log.py
+++ b/llama_stack/log.py
@ -97,12 +97,13 @@ class CustomRichHandler(RichHandler):
                self.markup = original_markup
-def setup_logging(category_levels: Dict[str, int]) -> None:
+def setup_logging(category_levels: Dict[str, int], log_file: str | None) -> None:
    """
-    Configure logging based on the provided category log levels.
+    Configure logging based on the provided category log levels and an optional log file.
    Parameters:
        category_levels (Dict[str, int]): A dictionary mapping categories to their log levels.
        log_file (str): Path to a log file to additionally pipe the logs into
    """
    log_format = "[dim]%(asctime)s %(name)s:%(lineno)d[/] [yellow dim]%(category)s[/]: %(message)s"
@ -117,6 +118,28 @@ def setup_logging(category_levels: Dict[str, int]) -> None:
    # Determine the root logger's level (default to WARNING if not specified)
    root_level = category_levels.get("root", logging.WARNING)
    handlers = {
        "console": {
            "()": CustomRichHandler,  # Use custom console handler
            "formatter": "rich",
            "rich_tracebacks": True,
            "show_time": False,
            "show_path": False,
            "markup": True,
            "filters": ["category_filter"],
        }
    }
    # Add a file handler if log_file is set
    if log_file:
        handlers["file"] = {
            "class": "logging.FileHandler",
            "formatter": "rich",
            "filename": log_file,
            "mode": "a",
            "encoding": "utf-8",
        }
    logging_config = {
        "version": 1,
        "disable_existing_loggers": False,
@ -126,17 +149,7 @@ def setup_logging(category_levels: Dict[str, int]) -> None:
                "format": log_format,
            }
        },
-        "handlers": {
+        "handlers": handlers,
            "console": {
                "()": CustomRichHandler,  # Use our custom handler class
                "formatter": "rich",
                "rich_tracebacks": True,
                "show_time": False,
                "show_path": False,
                "markup": True,
                "filters": ["category_filter"],
            }
        },
        "filters": {
            "category_filter": {
                "()": CategoryFilter,
@ -144,14 +157,14 @@ def setup_logging(category_levels: Dict[str, int]) -> None:
        },
        "loggers": {
            category: {
-                "handlers": ["console"],
+                "handlers": list(handlers.keys()),  # Apply all handlers
                "level": category_levels.get(category, DEFAULT_LOG_LEVEL),
                "propagate": False,  # Disable propagation to root logger
            }
            for category in CATEGORIES
        },
        "root": {
-            "handlers": ["console"],
+            "handlers": list(handlers.keys()),
            "level": root_level,  # Set root logger's level dynamically
        },
    }
@ -180,4 +193,6 @@ if env_config:
    cprint(f"Environment variable LLAMA_STACK_LOGGING found: {env_config}", "yellow")
    _category_levels.update(parse_environment_config(env_config))
-setup_logging(_category_levels)
+log_file = os.environ.get("LLAMA_STACK_LOG_FILE")
 setup_logging(_category_levels, log_file)
--- a/llama_stack/providers/inline/agents/meta_reference/init.py
+++ b/llama_stack/providers/inline/agents/meta_reference/init.py
@ -4,14 +4,14 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
-from typing import Dict
+from typing import Any, Dict
-from llama_stack.distribution.datatypes import Api, ProviderSpec
+from llama_stack.distribution.datatypes import Api
 from .config import MetaReferenceAgentsImplConfig
-async def get_provider_impl(config: MetaReferenceAgentsImplConfig, deps: Dict[Api, ProviderSpec]):
+async def get_provider_impl(config: MetaReferenceAgentsImplConfig, deps: Dict[Api, Any]):
    from .agents import MetaReferenceAgentsImpl
    impl = MetaReferenceAgentsImpl(
--- a/llama_stack/providers/inline/agents/meta_reference/agents.py
+++ b/llama_stack/providers/inline/agents/meta_reference/agents.py
@ -12,6 +12,7 @@ import uuid
 from typing import AsyncGenerator, List, Optional, Union
 from llama_stack.apis.agents import (
    Agent,
    AgentConfig,
    AgentCreateResponse,
    Agents,
@ -21,6 +22,8 @@ from llama_stack.apis.agents import (
    AgentTurnCreateRequest,
    AgentTurnResumeRequest,
    Document,
    ListAgentSessionsResponse,
    ListAgentsResponse,
    Session,
    Turn,
 )
@ -84,7 +87,7 @@ class MetaReferenceAgentsImpl(Agents):
            agent_id=agent_id,
        )
-    async def get_agent(self, agent_id: str) -> ChatAgent:
+    async def _get_agent_impl(self, agent_id: str) -> ChatAgent:
        agent_config = await self.persistence_store.get(
            key=f"agent:{agent_id}",
        )
@ -120,7 +123,7 @@ class MetaReferenceAgentsImpl(Agents):
        agent_id: str,
        session_name: str,
    ) -> AgentSessionCreateResponse:
-        agent = await self.get_agent(agent_id)
+        agent = await self._get_agent_impl(agent_id)
        session_id = await agent.create_session(session_name)
        return AgentSessionCreateResponse(
@ -160,7 +163,7 @@ class MetaReferenceAgentsImpl(Agents):
        self,
        request: AgentTurnCreateRequest,
    ) -> AsyncGenerator:
-        agent = await self.get_agent(request.agent_id)
+        agent = await self._get_agent_impl(request.agent_id)
        async for event in agent.create_and_execute_turn(request):
            yield event
@ -188,12 +191,12 @@ class MetaReferenceAgentsImpl(Agents):
        self,
        request: AgentTurnResumeRequest,
    ) -> AsyncGenerator:
-        agent = await self.get_agent(request.agent_id)
+        agent = await self._get_agent_impl(request.agent_id)
        async for event in agent.resume_turn(request):
            yield event
    async def get_agents_turn(self, agent_id: str, session_id: str, turn_id: str) -> Turn:
-        agent = await self.get_agent(agent_id)
+        agent = await self._get_agent_impl(agent_id)
        turn = await agent.storage.get_session_turn(session_id, turn_id)
        return turn
@ -210,7 +213,7 @@ class MetaReferenceAgentsImpl(Agents):
        session_id: str,
        turn_ids: Optional[List[str]] = None,
    ) -> Session:
-        agent = await self.get_agent(agent_id)
+        agent = await self._get_agent_impl(agent_id)
        session_info = await agent.storage.get_session_info(session_id)
        if session_info is None:
            raise ValueError(f"Session {session_id} not found")
@ -232,3 +235,15 @@ class MetaReferenceAgentsImpl(Agents):
    async def shutdown(self) -> None:
        pass
    async def list_agents(self) -> ListAgentsResponse:
        pass
    async def get_agent(self, agent_id: str) -> Agent:
        pass
    async def list_agent_sessions(
        self,
        agent_id: str,
    ) -> ListAgentSessionsResponse:
        pass
--- a/llama_stack/providers/inline/datasetio/localfs/init.py
+++ b/llama_stack/providers/inline/datasetio/localfs/init.py
@ -4,12 +4,14 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 from typing import Any, Dict
 from .config import LocalFSDatasetIOConfig
 async def get_provider_impl(
    config: LocalFSDatasetIOConfig,
-    _deps,
+    _deps: Dict[str, Any],
 ):
    from .datasetio import LocalFSDatasetIOImpl
--- a/llama_stack/providers/inline/eval/meta_reference/init.py
+++ b/llama_stack/providers/inline/eval/meta_reference/init.py
@ -3,16 +3,16 @@
 #
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
-from typing import Dict
+from typing import Any, Dict
-from llama_stack.distribution.datatypes import Api, ProviderSpec
+from llama_stack.distribution.datatypes import Api
 from .config import MetaReferenceEvalConfig
 async def get_provider_impl(
    config: MetaReferenceEvalConfig,
-    deps: Dict[Api, ProviderSpec],
+    deps: Dict[Api, Any],
 ):
    from .eval import MetaReferenceEvalImpl
--- a/llama_stack/providers/inline/inference/meta_reference/init.py
+++ b/llama_stack/providers/inline/inference/meta_reference/init.py
@ -4,14 +4,14 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
-from typing import Union
+from typing import Any, Dict, Union
 from .config import MetaReferenceInferenceConfig, MetaReferenceQuantizedInferenceConfig
 async def get_provider_impl(
    config: Union[MetaReferenceInferenceConfig, MetaReferenceQuantizedInferenceConfig],
-    _deps,
+    _deps: Dict[str, Any],
 ):
    from .inference import MetaReferenceInferenceImpl
--- a/llama_stack/providers/inline/inference/sentence_transformers/init.py
+++ b/llama_stack/providers/inline/inference/sentence_transformers/init.py
@ -4,6 +4,8 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 from typing import Any, Dict
 from llama_stack.providers.inline.inference.sentence_transformers.config import (
    SentenceTransformersInferenceConfig,
 )
@ -11,7 +13,7 @@ from llama_stack.providers.inline.inference.sentence_transformers.config import
 async def get_provider_impl(
    config: SentenceTransformersInferenceConfig,
-    _deps,
+    _deps: Dict[str, Any],
 ):
    from .sentence_transformers import SentenceTransformersInferenceImpl
--- a/llama_stack/providers/inline/inference/vllm/init.py
+++ b/llama_stack/providers/inline/inference/vllm/init.py
@ -4,12 +4,12 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
-from typing import Any
+from typing import Any, Dict
 from .config import VLLMConfig
-async def get_provider_impl(config: VLLMConfig, _deps) -> Any:
+async def get_provider_impl(config: VLLMConfig, _deps: Dict[str, Any]):
    from .vllm import VLLMInferenceImpl
    impl = VLLMInferenceImpl(config)
--- a/llama_stack/providers/inline/post_training/torchtune/init.py
+++ b/llama_stack/providers/inline/post_training/torchtune/init.py
@ -4,9 +4,9 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
-from typing import Dict
+from typing import Any, Dict
-from llama_stack.distribution.datatypes import Api, ProviderSpec
+from llama_stack.distribution.datatypes import Api
 from .config import TorchtunePostTrainingConfig
@ -15,7 +15,7 @@ from .config import TorchtunePostTrainingConfig
 async def get_provider_impl(
    config: TorchtunePostTrainingConfig,
-    deps: Dict[Api, ProviderSpec],
+    deps: Dict[Api, Any],
 ):
    from .post_training import TorchtunePostTrainingImpl
--- a/llama_stack/providers/inline/post_training/torchtune/post_training.py
+++ b/llama_stack/providers/inline/post_training/torchtune/post_training.py
@ -43,6 +43,9 @@ class TorchtunePostTrainingImpl:
        self.jobs = {}
        self.checkpoints_dict = {}
    async def shutdown(self):
        pass
    async def supervised_fine_tune(
        self,
        job_uuid: str,
--- a/llama_stack/providers/inline/safety/code_scanner/init.py
+++ b/llama_stack/providers/inline/safety/code_scanner/init.py
@ -4,10 +4,12 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 from typing import Any, Dict
 from .config import CodeScannerConfig
-async def get_provider_impl(config: CodeScannerConfig, deps):
+async def get_provider_impl(config: CodeScannerConfig, deps: Dict[str, Any]):
    from .code_scanner import MetaReferenceCodeScannerSafetyImpl
    impl = MetaReferenceCodeScannerSafetyImpl(config, deps)
--- a/llama_stack/providers/inline/safety/llama_guard/init.py
+++ b/llama_stack/providers/inline/safety/llama_guard/init.py
@ -4,10 +4,12 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 from typing import Any, Dict
 from .config import LlamaGuardConfig
-async def get_provider_impl(config: LlamaGuardConfig, deps):
+async def get_provider_impl(config: LlamaGuardConfig, deps: Dict[str, Any]):
    from .llama_guard import LlamaGuardSafetyImpl
    assert isinstance(config, LlamaGuardConfig), f"Unexpected config type: {type(config)}"
--- a/llama_stack/providers/inline/safety/prompt_guard/init.py
+++ b/llama_stack/providers/inline/safety/prompt_guard/init.py
@ -4,10 +4,12 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 from typing import Any, Dict
 from .config import PromptGuardConfig  # noqa: F401
-async def get_provider_impl(config: PromptGuardConfig, deps):
+async def get_provider_impl(config: PromptGuardConfig, deps: Dict[str, Any]):
    from .prompt_guard import PromptGuardSafetyImpl
    impl = PromptGuardSafetyImpl(config, deps)
--- a/llama_stack/providers/inline/scoring/basic/init.py
+++ b/llama_stack/providers/inline/scoring/basic/init.py
@ -3,16 +3,16 @@
 #
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
-from typing import Dict
+from typing import Any, Dict
-from llama_stack.distribution.datatypes import Api, ProviderSpec
+from llama_stack.distribution.datatypes import Api
 from .config import BasicScoringConfig
 async def get_provider_impl(
    config: BasicScoringConfig,
-    deps: Dict[Api, ProviderSpec],
+    deps: Dict[Api, Any],
 ):
    from .scoring import BasicScoringImpl
--- a/llama_stack/providers/inline/scoring/braintrust/init.py
+++ b/llama_stack/providers/inline/scoring/braintrust/init.py
@ -3,11 +3,11 @@
 #
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
-from typing import Dict
+from typing import Any, Dict
 from pydantic import BaseModel
-from llama_stack.distribution.datatypes import Api, ProviderSpec
+from llama_stack.distribution.datatypes import Api
 from .config import BraintrustScoringConfig
@ -18,7 +18,7 @@ class BraintrustProviderDataValidator(BaseModel):
 async def get_provider_impl(
    config: BraintrustScoringConfig,
-    deps: Dict[Api, ProviderSpec],
+    deps: Dict[Api, Any],
 ):
    from .braintrust import BraintrustScoringImpl
--- a/llama_stack/providers/inline/scoring/llm_as_judge/init.py
+++ b/llama_stack/providers/inline/scoring/llm_as_judge/init.py
@ -3,16 +3,16 @@
 #
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
-from typing import Dict
+from typing import Any, Dict
-from llama_stack.distribution.datatypes import Api, ProviderSpec
+from llama_stack.distribution.datatypes import Api
 from .config import LlmAsJudgeScoringConfig
 async def get_provider_impl(
    config: LlmAsJudgeScoringConfig,
-    deps: Dict[Api, ProviderSpec],
+    deps: Dict[Api, Any],
 ):
    from .scoring import LlmAsJudgeScoringImpl
--- a/llama_stack/providers/inline/tool_runtime/code_interpreter/init.py
+++ b/llama_stack/providers/inline/tool_runtime/code_interpreter/init.py
@ -4,12 +4,14 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 from typing import Any, Dict
 from .config import CodeInterpreterToolConfig
 __all__ = ["CodeInterpreterToolConfig", "CodeInterpreterToolRuntimeImpl"]
-async def get_provider_impl(config: CodeInterpreterToolConfig, _deps):
+async def get_provider_impl(config: CodeInterpreterToolConfig, _deps: Dict[str, Any]):
    from .code_interpreter import CodeInterpreterToolRuntimeImpl
    impl = CodeInterpreterToolRuntimeImpl(config)
--- a/llama_stack/providers/inline/vector_io/chroma/init.py
+++ b/llama_stack/providers/inline/vector_io/chroma/init.py
@ -4,14 +4,14 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
-from typing import Dict
+from typing import Any, Dict
-from llama_stack.providers.datatypes import Api, ProviderSpec
+from llama_stack.providers.datatypes import Api
 from .config import ChromaVectorIOConfig
-async def get_provider_impl(config: ChromaVectorIOConfig, deps: Dict[Api, ProviderSpec]):
+async def get_provider_impl(config: ChromaVectorIOConfig, deps: Dict[Api, Any]):
    from llama_stack.providers.remote.vector_io.chroma.chroma import (
        ChromaVectorIOAdapter,
    )
--- a/llama_stack/providers/inline/vector_io/faiss/init.py
+++ b/llama_stack/providers/inline/vector_io/faiss/init.py
@ -4,14 +4,14 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
-from typing import Dict
+from typing import Any, Dict
-from llama_stack.providers.datatypes import Api, ProviderSpec
+from llama_stack.providers.datatypes import Api
 from .config import FaissVectorIOConfig
-async def get_provider_impl(config: FaissVectorIOConfig, deps: Dict[Api, ProviderSpec]):
+async def get_provider_impl(config: FaissVectorIOConfig, deps: Dict[Api, Any]):
    from .faiss import FaissVectorIOAdapter
    assert isinstance(config, FaissVectorIOConfig), f"Unexpected config type: {type(config)}"
--- a/llama_stack/providers/inline/vector_io/milvus/init.py
+++ b/llama_stack/providers/inline/vector_io/milvus/init.py
@ -4,14 +4,14 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
-from typing import Dict
+from typing import Any, Dict
-from llama_stack.providers.datatypes import Api, ProviderSpec
+from llama_stack.providers.datatypes import Api
 from .config import MilvusVectorIOConfig
-async def get_provider_impl(config: MilvusVectorIOConfig, deps: Dict[Api, ProviderSpec]):
+async def get_provider_impl(config: MilvusVectorIOConfig, deps: Dict[Api, Any]):
    from llama_stack.providers.remote.vector_io.milvus.milvus import MilvusVectorIOAdapter
    impl = MilvusVectorIOAdapter(config, deps[Api.inference])
--- a/llama_stack/providers/inline/vector_io/sqlite_vec/init.py
+++ b/llama_stack/providers/inline/vector_io/sqlite_vec/init.py
@ -4,14 +4,14 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
-from typing import Dict
+from typing import Any, Dict
-from llama_stack.providers.datatypes import Api, ProviderSpec
+from llama_stack.providers.datatypes import Api
 from .config import SQLiteVectorIOConfig
-async def get_provider_impl(config: SQLiteVectorIOConfig, deps: Dict[Api, ProviderSpec]):
+async def get_provider_impl(config: SQLiteVectorIOConfig, deps: Dict[Api, Any]):
    from .sqlite_vec import SQLiteVecVectorIOAdapter
    assert isinstance(config, SQLiteVectorIOConfig), f"Unexpected config type: {type(config)}"
--- a/llama_stack/providers/remote/inference/fireworks/models.py
+++ b/llama_stack/providers/remote/inference/fireworks/models.py
@ -24,10 +24,6 @@ MODEL_ENTRIES = [
        "accounts/fireworks/models/llama-v3p1-405b-instruct",
        CoreModelId.llama3_1_405b_instruct.value,
    ),
    build_hf_repo_model_entry(
        "accounts/fireworks/models/llama-v3p2-1b-instruct",
        CoreModelId.llama3_2_1b_instruct.value,
    ),
    build_hf_repo_model_entry(
        "accounts/fireworks/models/llama-v3p2-3b-instruct",
        CoreModelId.llama3_2_3b_instruct.value,
--- a/llama_stack/templates/ci-tests/run.yaml
+++ b/llama_stack/templates/ci-tests/run.yaml
@ -128,16 +128,6 @@ models:
  provider_id: fireworks
  provider_model_id: accounts/fireworks/models/llama-v3p1-405b-instruct
  model_type: llm
 - metadata: {}
  model_id: accounts/fireworks/models/llama-v3p2-1b-instruct
  provider_id: fireworks
  provider_model_id: accounts/fireworks/models/llama-v3p2-1b-instruct
  model_type: llm
 - metadata: {}
  model_id: meta-llama/Llama-3.2-1B-Instruct
  provider_id: fireworks
  provider_model_id: accounts/fireworks/models/llama-v3p2-1b-instruct
  model_type: llm
 - metadata: {}
  model_id: accounts/fireworks/models/llama-v3p2-3b-instruct
  provider_id: fireworks
--- a/llama_stack/templates/dev/run.yaml
+++ b/llama_stack/templates/dev/run.yaml
@ -186,16 +186,6 @@ models:
  provider_id: fireworks
  provider_model_id: accounts/fireworks/models/llama-v3p1-405b-instruct
  model_type: llm
 - metadata: {}
  model_id: accounts/fireworks/models/llama-v3p2-1b-instruct
  provider_id: fireworks
  provider_model_id: accounts/fireworks/models/llama-v3p2-1b-instruct
  model_type: llm
 - metadata: {}
  model_id: meta-llama/Llama-3.2-1B-Instruct
  provider_id: fireworks
  provider_model_id: accounts/fireworks/models/llama-v3p2-1b-instruct
  model_type: llm
 - metadata: {}
  model_id: accounts/fireworks/models/llama-v3p2-3b-instruct
  provider_id: fireworks
--- a/llama_stack/templates/fireworks/run-with-safety.yaml
+++ b/llama_stack/templates/fireworks/run-with-safety.yaml
@ -140,16 +140,6 @@ models:
  provider_id: fireworks
  provider_model_id: accounts/fireworks/models/llama-v3p1-405b-instruct
  model_type: llm
 - metadata: {}
  model_id: accounts/fireworks/models/llama-v3p2-1b-instruct
  provider_id: fireworks
  provider_model_id: accounts/fireworks/models/llama-v3p2-1b-instruct
  model_type: llm
 - metadata: {}
  model_id: meta-llama/Llama-3.2-1B-Instruct
  provider_id: fireworks
  provider_model_id: accounts/fireworks/models/llama-v3p2-1b-instruct
  model_type: llm
 - metadata: {}
  model_id: accounts/fireworks/models/llama-v3p2-3b-instruct
  provider_id: fireworks
--- a/llama_stack/templates/fireworks/run.yaml
+++ b/llama_stack/templates/fireworks/run.yaml
@ -134,16 +134,6 @@ models:
  provider_id: fireworks
  provider_model_id: accounts/fireworks/models/llama-v3p1-405b-instruct
  model_type: llm
 - metadata: {}
  model_id: accounts/fireworks/models/llama-v3p2-1b-instruct
  provider_id: fireworks
  provider_model_id: accounts/fireworks/models/llama-v3p2-1b-instruct
  model_type: llm
 - metadata: {}
  model_id: meta-llama/Llama-3.2-1B-Instruct
  provider_id: fireworks
  provider_model_id: accounts/fireworks/models/llama-v3p2-1b-instruct
  model_type: llm
 - metadata: {}
  model_id: accounts/fireworks/models/llama-v3p2-3b-instruct
  provider_id: fireworks