Support for Llama3.2 models and Swift SDK (#98)

2025-07-15 01:26:10 +00:00 · 2024-09-25 10:29:58 -07:00 · 2024-09-25 10:29:58 -07:00 · 56aed59eb4
commit 56aed59eb4
parent 95abbf576b
56 changed files with 3745 additions and 630 deletions
--- a/docs/cli_reference.md
+++ b/docs/cli_reference.md
@ -37,50 +37,74 @@ llama model list
 You should see a table like this:

 <pre style="font-family: monospace;">
-+---------------------------------------+---------------------------------------------+----------------+----------------------------+
-| Model Descriptor                      | HuggingFace Repo                            | Context Length | Hardware Requirements      |
-+---------------------------------------+---------------------------------------------+----------------+----------------------------+
-| Meta-Llama3.1-8B                      | meta-llama/Meta-Llama-3.1-8B                | 128K           | 1 GPU, each >= 20GB VRAM   |
-+---------------------------------------+---------------------------------------------+----------------+----------------------------+
-| Meta-Llama3.1-70B                     | meta-llama/Meta-Llama-3.1-70B               | 128K           | 8 GPUs, each >= 20GB VRAM  |
-+---------------------------------------+---------------------------------------------+----------------+----------------------------+
-| Meta-Llama3.1-405B:bf16-mp8           |                                             | 128K           | 8 GPUs, each >= 120GB VRAM |
-+---------------------------------------+---------------------------------------------+----------------+----------------------------+
-| Meta-Llama3.1-405B                    | meta-llama/Meta-Llama-3.1-405B-FP8          | 128K           | 8 GPUs, each >= 70GB VRAM  |
-+---------------------------------------+---------------------------------------------+----------------+----------------------------+
-| Meta-Llama3.1-405B:bf16-mp16          | meta-llama/Meta-Llama-3.1-405B              | 128K           | 16 GPUs, each >= 70GB VRAM |
-+---------------------------------------+---------------------------------------------+----------------+----------------------------+
-| Meta-Llama3.1-8B-Instruct             | meta-llama/Meta-Llama-3.1-8B-Instruct       | 128K           | 1 GPU, each >= 20GB VRAM   |
-+---------------------------------------+---------------------------------------------+----------------+----------------------------+
-| Meta-Llama3.1-70B-Instruct            | meta-llama/Meta-Llama-3.1-70B-Instruct      | 128K           | 8 GPUs, each >= 20GB VRAM  |
-+---------------------------------------+---------------------------------------------+----------------+----------------------------+
-| Meta-Llama3.1-405B-Instruct:bf16-mp8  |                                             | 128K           | 8 GPUs, each >= 120GB VRAM |
-+---------------------------------------+---------------------------------------------+----------------+----------------------------+
-| Meta-Llama3.1-405B-Instruct           | meta-llama/Meta-Llama-3.1-405B-Instruct-FP8 | 128K           | 8 GPUs, each >= 70GB VRAM  |
-+---------------------------------------+---------------------------------------------+----------------+----------------------------+
-| Meta-Llama3.1-405B-Instruct:bf16-mp16 | meta-llama/Meta-Llama-3.1-405B-Instruct     | 128K           | 16 GPUs, each >= 70GB VRAM |
-+---------------------------------------+---------------------------------------------+----------------+----------------------------+
-| Llama-Guard-3-8B                      | meta-llama/Llama-Guard-3-8B                 | 128K           | 1 GPU, each >= 20GB VRAM   |
-+---------------------------------------+---------------------------------------------+----------------+----------------------------+
-| Llama-Guard-3-8B:int8-mp1             | meta-llama/Llama-Guard-3-8B-INT8            | 128K           | 1 GPU, each >= 10GB VRAM   |
-+---------------------------------------+---------------------------------------------+----------------+----------------------------+
-| Prompt-Guard-86M                      | meta-llama/Prompt-Guard-86M                 | 128K           | 1 GPU, each >= 1GB VRAM    |
-+---------------------------------------+---------------------------------------------+----------------+----------------------------+
+----------------------------------+------------------------------------------+----------------+
+| Model Descriptor                 | HuggingFace Repo                         | Context Length |
+----------------------------------+------------------------------------------+----------------+
+| Llama3.1-8B                      | meta-llama/Llama-3.1-8B                  | 128K           |
+----------------------------------+------------------------------------------+----------------+
+| Llama3.1-70B                     | meta-llama/Llama-3.1-70B                 | 128K           |
+----------------------------------+------------------------------------------+----------------+
+| Llama3.1-405B:bf16-mp8           | meta-llama/Llama-3.1-405B                | 128K           |
+----------------------------------+------------------------------------------+----------------+
+| Llama3.1-405B                    | meta-llama/Llama-3.1-405B-FP8            | 128K           |
+----------------------------------+------------------------------------------+----------------+
+| Llama3.1-405B:bf16-mp16          | meta-llama/Llama-3.1-405B                | 128K           |
+----------------------------------+------------------------------------------+----------------+
+| Llama3.1-8B-Instruct             | meta-llama/Llama-3.1-8B-Instruct         | 128K           |
+----------------------------------+------------------------------------------+----------------+
+| Llama3.1-70B-Instruct            | meta-llama/Llama-3.1-70B-Instruct        | 128K           |
+----------------------------------+------------------------------------------+----------------+
+| Llama3.1-405B-Instruct:bf16-mp8  | meta-llama/Llama-3.1-405B-Instruct       | 128K           |
+----------------------------------+------------------------------------------+----------------+
+| Llama3.1-405B-Instruct           | meta-llama/Llama-3.1-405B-Instruct-FP8   | 128K           |
+----------------------------------+------------------------------------------+----------------+
+| Llama3.1-405B-Instruct:bf16-mp16 | meta-llama/Llama-3.1-405B-Instruct       | 128K           |
+----------------------------------+------------------------------------------+----------------+
+| Llama3.2-1B                      | meta-llama/Llama-3.2-1B                  | 128K           |
+----------------------------------+------------------------------------------+----------------+
+| Llama3.2-3B                      | meta-llama/Llama-3.2-3B                  | 128K           |
+----------------------------------+------------------------------------------+----------------+
+| Llama3.2-11B-Vision              | meta-llama/Llama-3.2-11B-Vision          | 128K           |
+----------------------------------+------------------------------------------+----------------+
+| Llama3.2-90B-Vision              | meta-llama/Llama-3.2-90B-Vision          | 128K           |
+----------------------------------+------------------------------------------+----------------+
+| Llama3.2-1B-Instruct             | meta-llama/Llama-3.2-1B-Instruct         | 128K           |
+----------------------------------+------------------------------------------+----------------+
+| Llama3.2-3B-Instruct             | meta-llama/Llama-3.2-3B-Instruct         | 128K           |
+----------------------------------+------------------------------------------+----------------+
+| Llama3.2-11B-Vision-Instruct     | meta-llama/Llama-3.2-11B-Vision-Instruct | 128K           |
+----------------------------------+------------------------------------------+----------------+
+| Llama3.2-90B-Vision-Instruct     | meta-llama/Llama-3.2-90B-Vision-Instruct | 128K           |
+----------------------------------+------------------------------------------+----------------+
+| Llama-Guard-3-11B-Vision         | meta-llama/Llama-Guard-3-11B-Vision      | 128K           |
+----------------------------------+------------------------------------------+----------------+
+| Llama-Guard-3-1B:int4-mp1        | meta-llama/Llama-Guard-3-1B-INT4         | 128K           |
+----------------------------------+------------------------------------------+----------------+
+| Llama-Guard-3-1B                 | meta-llama/Llama-Guard-3-1B              | 128K           |
+----------------------------------+------------------------------------------+----------------+
+| Llama-Guard-3-8B                 | meta-llama/Llama-Guard-3-8B              | 128K           |
+----------------------------------+------------------------------------------+----------------+
+| Llama-Guard-3-8B:int8-mp1        | meta-llama/Llama-Guard-3-8B-INT8         | 128K           |
+----------------------------------+------------------------------------------+----------------+
+| Prompt-Guard-86M                 | meta-llama/Prompt-Guard-86M              | 128K           |
+----------------------------------+------------------------------------------+----------------+
+| Llama-Guard-2-8B                 | meta-llama/Llama-Guard-2-8B              | 4K             |
+----------------------------------+------------------------------------------+----------------+
 </pre>

 To download models, you can use the llama download command.

 #### Downloading from [Meta](https://llama.meta.com/llama-downloads/)

-Here is an example download command to get the 8B/70B Instruct model. You will need META_URL which can be obtained from [here](https://llama.meta.com/docs/getting_the_models/meta/)
+Here is an example download command to get the 3B-Instruct/11B-Vision-Instruct model. You will need META_URL which can be obtained from [here](https://llama.meta.com/docs/getting_the_models/meta/)

 Download the required checkpoints using the following commands:
 ```bash
 # download the 8B model, this can be run on a single GPU
-llama download --source meta --model-id Meta-Llama3.1-8B-Instruct --meta-url META_URL
+llama download --source meta --model-id Llama3.2-3B-Instruct --meta-url META_URL

 # you can also get the 70B model, this will require 8 GPUs however
-llama download --source meta --model-id Meta-Llama3.1-70B-Instruct --meta-url META_URL
+llama download --source meta --model-id Llama3.2-11B-Vision-Instruct --meta-url META_URL

 # llama-agents have safety enabled by default. For this, you will need
 # safety models -- Llama-Guard and Prompt-Guard
@ -124,7 +148,7 @@ The `llama model` command helps you explore the model’s interface.
 ### 2.1 Subcommands
 1. `download`: Download the model from different sources. (meta, huggingface)
 2. `list`: Lists all the models available for download with hardware requirements to deploy the models.
-3. `template`: <TODO: What is a template?>
+3. `prompt-format`: Show llama model message formats.
 4. `describe`: Describes all the properties of the model.

 ### 2.2 Sample Usage
@ -135,7 +159,7 @@ The `llama model` command helps you explore the model’s interface.
 llama model --help
 ```
 <pre style="font-family: monospace;">
-usage: llama model [-h] {download,list,template,describe} ...
+usage: llama model [-h] {download,list,prompt-format,describe} ...

 Work with llama models

@ -143,124 +167,67 @@ options:
  -h, --help            show this help message and exit

 model_subcommands:
-  {download,list,template,describe}
+  {download,list,prompt-format,describe}
 </pre>

 You can use the describe command to know more about a model:
 ```
-llama model describe -m Meta-Llama3.1-8B-Instruct
+llama model describe -m Llama3.2-3B-Instruct
 ```
 ### 2.3 Describe

 <pre style="font-family: monospace;">
-+-----------------------------+---------------------------------------+
-| Model                       | Meta-                                 |
-|                             | Llama3.1-8B-Instruct                  |
-+-----------------------------+---------------------------------------+
-| HuggingFace ID              | meta-llama/Meta-Llama-3.1-8B-Instruct |
-+-----------------------------+---------------------------------------+
-| Description                 | Llama 3.1 8b instruct model           |
-+-----------------------------+---------------------------------------+
-| Context Length              | 128K tokens                           |
-+-----------------------------+---------------------------------------+
-| Weights format              | bf16                                  |
-+-----------------------------+---------------------------------------+
-| Model params.json           | {                                     |
-|                             |     "dim": 4096,                      |
-|                             |     "n_layers": 32,                   |
-|                             |     "n_heads": 32,                    |
-|                             |     "n_kv_heads": 8,                  |
-|                             |     "vocab_size": 128256,             |
-|                             |     "ffn_dim_multiplier": 1.3,        |
-|                             |     "multiple_of": 1024,              |
-|                             |     "norm_eps": 1e-05,                |
-|                             |     "rope_theta": 500000.0,           |
-|                             |     "use_scaled_rope": true           |
-|                             | }                                     |
-+-----------------------------+---------------------------------------+
-| Recommended sampling params | {                                     |
-|                             |     "strategy": "top_p",              |
-|                             |     "temperature": 1.0,               |
-|                             |     "top_p": 0.9,                     |
-|                             |     "top_k": 0                        |
-|                             | }                                     |
-+-----------------------------+---------------------------------------+
+-----------------------------+----------------------------------+
+| Model                       | Llama3.2-3B-Instruct             |
+-----------------------------+----------------------------------+
+| HuggingFace ID              | meta-llama/Llama-3.2-3B-Instruct |
+-----------------------------+----------------------------------+
+| Description                 | Llama 3.2 3b instruct model      |
+-----------------------------+----------------------------------+
+| Context Length              | 128K tokens                      |
+-----------------------------+----------------------------------+
+| Weights format              | bf16                             |
+-----------------------------+----------------------------------+
+| Model params.json           | {                                |
+|                             |     "dim": 3072,                 |
+|                             |     "n_layers": 28,              |
+|                             |     "n_heads": 24,               |
+|                             |     "n_kv_heads": 8,             |
+|                             |     "vocab_size": 128256,        |
+|                             |     "ffn_dim_multiplier": 1.0,   |
+|                             |     "multiple_of": 256,          |
+|                             |     "norm_eps": 1e-05,           |
+|                             |     "rope_theta": 500000.0,      |
+|                             |     "use_scaled_rope": true      |
+|                             | }                                |
+-----------------------------+----------------------------------+
+| Recommended sampling params | {                                |
+|                             |     "strategy": "top_p",         |
+|                             |     "temperature": 1.0,          |
+|                             |     "top_p": 0.9,                |
+|                             |     "top_k": 0                   |
+|                             | }                                |
+-----------------------------+----------------------------------+
 </pre>
-### 2.4 Template
-You can even run `llama model template` see all of the templates and their tokens:
+### 2.4 Prompt Format
+You can even run `llama model prompt-format` see all of the templates and their tokens:

 ```
-llama model template
+llama model prompt-format -m Llama3.2-3B-Instruct
 ```
+<p align="center">
+<img width="719" alt="image" src="https://github.com/user-attachments/assets/c5332026-8c0b-4edc-b438-ec60cd7ca554">
+</p>

-<pre style="font-family: monospace;">
-+-----------+---------------------------------+
-| Role      | Template Name                   |
-+-----------+---------------------------------+
-| user      | user-default                    |
-| assistant | assistant-builtin-tool-call     |
-| assistant | assistant-custom-tool-call      |
-| assistant | assistant-default               |
-| system    | system-builtin-and-custom-tools |
-| system    | system-builtin-tools-only       |
-| system    | system-custom-tools-only        |
-| system    | system-default                  |
-| tool      | tool-success                    |
-| tool      | tool-failure                    |
-+-----------+---------------------------------+
-</pre>

-And fetch an example by passing it to `--name`:
-```
-llama model template --name tool-success
-```
-
-<pre style="font-family: monospace;">
-+----------+----------------------------------------------------------------+
-| Name     | tool-success                                                   |
-+----------+----------------------------------------------------------------+
-| Template | <|start_header_id|>ipython<|end_header_id|>                    |
-|          |                                                                |
-|          | completed                                                      |
-|          | [stdout]{"results":["something                                 |
-|          | something"]}[/stdout]<|eot_id|>                                |
-|          |                                                                |
-+----------+----------------------------------------------------------------+
-| Notes    | Note ipython header and [stdout]                               |
-+----------+----------------------------------------------------------------+
-</pre>
-
-Or:
-```
-llama model template --name system-builtin-tools-only
-```
-
-<pre style="font-family: monospace;">
-+----------+--------------------------------------------+
-| Name     | system-builtin-tools-only                  |
-+----------+--------------------------------------------+
-| Template | <|start_header_id|>system<|end_header_id|> |
-|          |                                            |
-|          | Environment: ipython                       |
-|          | Tools: brave_search, wolfram_alpha         |
-|          |                                            |
-|          | Cutting Knowledge Date: December 2023      |
-|          | Today Date: 21 August 2024                 |
-|          | <|eot_id|>                                 |
-|          |                                            |
-+----------+--------------------------------------------+
-| Notes    |                                            |
-+----------+--------------------------------------------+
-</pre>
-
-These commands can help understand the model interface and how prompts / messages are formatted for various scenarios.
+You will be shown a Markdown formatted description of the model interface and how prompts / messages are formatted for various scenarios.

 **NOTE**: Outputs in terminal are color printed to show special tokens.


 ## Step 3: Building, and Configuring Llama Stack Distributions

- Please see our [Getting Started](getting_started.md) guide for details.
+- Please see our [Getting Started](getting_started.md) guide for more details on how to build and start a Llama Stack distribution.

 ### Step 3.1 Build
 In the following steps, imagine we'll be working with a `Meta-Llama3.1-8B-Instruct` model. We will name our build `8b-instruct` to help us remember the config. We will start build our distribution (in the form of a Conda environment, or Docker image). In this step, we will specify:
--- a/docs/dog.jpg
+++ b/docs/dog.jpg
--- a/docs/getting_started.ipynb
+++ b/docs/getting_started.ipynb
--- a/docs/getting_started.md
+++ b/docs/getting_started.md
@ -1,9 +1,70 @@
+# llama-stack
+
+[![PyPI - Downloads](https://img.shields.io/pypi/dm/llama-stack)](https://pypi.org/project/llama-stack/)
+[![Discord](https://img.shields.io/discord/1257833999603335178)](https://discord.gg/TZAAYNVtrU)
+
+This repository contains the specifications and implementations of the APIs which are part of the Llama Stack.
+
+The Llama Stack defines and standardizes the building blocks needed to bring generative AI applications to market. These blocks span the entire development lifecycle: from model training and fine-tuning, through product evaluation, to invoking AI agents in production. Beyond definition, we're developing open-source versions and partnering with cloud providers, ensuring developers can assemble AI solutions using consistent, interlocking pieces across platforms. The ultimate goal is to accelerate innovation in the AI space.
+
+The Stack APIs are rapidly improving, but still very much work in progress and we invite feedback as well as direct contributions.
+
+
+## APIs
+
+The Llama Stack consists of the following set of APIs:
+
+- Inference
+- Safety
+- Memory
+- Agentic System
+- Evaluation
+- Post Training
+- Synthetic Data Generation
+- Reward Scoring
+
+Each of the APIs themselves is a collection of REST endpoints.
+
+## API Providers
+
+A Provider is what makes the API real -- they provide the actual implementation backing the API.
+
+As an example, for Inference, we could have the implementation be backed by open source libraries like `[ torch | vLLM | TensorRT ]` as possible options.
+
+A provider can also be just a pointer to a remote REST service -- for example, cloud providers or dedicated inference providers could serve these APIs.
+
+
+## Llama Stack Distribution
+
+A Distribution is where APIs and Providers are assembled together to provide a consistent whole to the end application developer. You can mix-and-match providers -- some could be backed by local code and some could be remote. As a hobbyist, you can serve a small model locally, but can choose a cloud provider for a large model. Regardless, the higher level APIs your app needs to work with don't need to change at all. You can even imagine moving across the server / mobile-device boundary as well always using the same uniform set of APIs for developing Generative AI applications.
+
+
+## Installation
+
+You can install this repository as a [package](https://pypi.org/project/llama-stack/) with `pip install llama-stack`
+
+If you want to install from source:
+
+```bash
+mkdir -p ~/local
+cd ~/local
+git clone git@github.com:meta-llama/llama-stack.git
+
+conda create -n stack python=3.10
+conda activate stack
+
+cd llama-stack
+$CONDA_PREFIX/bin/pip install -e .
+```
+
 # Getting Started

 The `llama` CLI tool helps you setup and use the Llama toolchain & agentic systems. It should be available on your path after installing the `llama-stack` package.

 This guides allows you to quickly get started with building and running a Llama Stack server in < 5 minutes!

+You may also checkout this [notebook](https://github.com/meta-llama/llama-stack/blob/main/docs/getting_started.ipynb) for trying out out demo scripts.
+
 ## Quick Cheatsheet
 - Quick 3 line command to build and start a LlamaStack server using our Meta Reference implementation for all API endpoints with `conda` as build type.

@ -12,7 +73,7 @@ This guides allows you to quickly get started with building and running a Llama
 ```
 llama stack build

-> Enter an unique name for identifying your Llama Stack build distribution (e.g. my-local-stack): my-local-llama-stack
+> Enter an unique name for identifying your Llama Stack build distribution (e.g. my-local-stack): my-local-stack
 > Enter the image type you want your distribution to be built with (docker or conda): conda

 Llama Stack is composed of several APIs working together. Let's configure the providers (implementations) you want to use for these APIs.
@ -24,47 +85,57 @@ llama stack build

 > (Optional) Enter a short description for your Llama Stack distribution:

-Build spec configuration saved at ~/.conda/envs/llamastack-my-local-llama-stack/my-local-llama-stack-build.yaml
+Build spec configuration saved at ~/.conda/envs/llamastack-my-local-stack/my-local-stack-build.yaml
+You can now run `llama stack configure my-local-stack`
 ```

 **`llama stack configure`**
 - Run `llama stack configure <name>` with the name you have previously defined in `build` step.
 ```
-llama stack configure my-local-llama-stack
+llama stack configure <name>
+```
+- You will be prompted to enter configurations for your Llama Stack

-Configuring APIs to serve...
-Enter comma-separated list of APIs to serve:
+```
+$ llama stack configure my-local-stack

+Could not find my-local-stack. Trying conda build name instead...
 Configuring API `inference`...
-
-Configuring provider `meta-reference`...
-Enter value for model (default: Meta-Llama3.1-8B-Instruct) (required):
+=== Configuring provider `meta-reference` for API inference...
+Enter value for model (default: Llama3.1-8B-Instruct) (required):
 Do you want to configure quantization? (y/n): n
 Enter value for torch_seed (optional):
-Enter value for max_seq_len (required): 4096
+Enter value for max_seq_len (default: 4096) (required):
 Enter value for max_batch_size (default: 1) (required):
-Configuring API `safety`...

-Configuring provider `meta-reference`...
+Configuring API `safety`...
+=== Configuring provider `meta-reference` for API safety...
 Do you want to configure llama_guard_shield? (y/n): n
 Do you want to configure prompt_guard_shield? (y/n): n
+
 Configuring API `agents`...
+=== Configuring provider `meta-reference` for API agents...
+Enter `type` for persistence_store (options: redis, sqlite, postgres) (default: sqlite):
+
+Configuring SqliteKVStoreConfig:
+Enter value for namespace (optional):
+Enter value for db_path (default: /home/xiyan/.llama/runtime/kvstore.db) (required):

-Configuring provider `meta-reference`...
 Configuring API `memory`...
+=== Configuring provider `meta-reference` for API memory...
+> Please enter the supported memory bank type your provider has for memory: vector

-Configuring provider `meta-reference`...
 Configuring API `telemetry`...
+=== Configuring provider `meta-reference` for API telemetry...

-Configuring provider `meta-reference`...
-> YAML configuration has been written to ~/.llama/builds/conda/my-local-llama-stack-run.yaml.
-You can now run `llama stack run my-local-llama-stack --port PORT` or `llama stack run ~/.llama/builds/conda/my-local-llama-stack-run.yaml --port PORT
+> YAML configuration has been written to ~/.llama/builds/conda/my-local-stack-run.yaml.
+You can now run `llama stack run my-local-stack --port PORT`
 ```

 **`llama stack run`**
 - Run `llama stack run <name>` with the name you have previously defined.
 ```
-llama stack run my-local-llama-stack
+llama stack run my-local-stack

 ...
 > initializing model parallel with size 1
@ -126,7 +197,7 @@ llama stack build
 Running the command above will allow you to fill in the configuration to build your Llama Stack distribution, you will see the following outputs.

 ```
-> Enter an unique name for identifying your Llama Stack build distribution (e.g. my-local-stack): my-local-llama-stack
+> Enter an unique name for identifying your Llama Stack build distribution (e.g. my-local-stack): 8b-instruct
 > Enter the image type you want your distribution to be built with (docker or conda): conda

 Llama Stack is composed of several APIs working together. Let's configure the providers (implementations) you want to use for these APIs.
@ -138,9 +209,14 @@ Running the command above will allow you to fill in the configuration to build y

 > (Optional) Enter a short description for your Llama Stack distribution:

-Build spec configuration saved at ~/.conda/envs/llamastack-my-local-llama-stack/my-local-llama-stack-build.yaml
+Build spec configuration saved at ~/.conda/envs/llamastack-my-local-llama-stack/8b-instruct-build.yaml
 ```

+**Ollama (optional)**
+
+If you plan to use Ollama for inference, you'll need to install the server [via these instructions](https://ollama.com/download).
+
+
 #### Building from templates
 - To build from alternative API providers, we provide distribution templates for users to get started building a distribution backed by different providers.

@ -236,7 +312,7 @@ llama stack configure [ <name> | <docker-image-name> | <path/to/name.build.yaml>
   - Run `docker images` to check list of available images on your machine.

 ```
-$ llama stack configure ~/.llama/distributions/conda/8b-instruct-build.yaml
+$ llama stack configure 8b-instruct

 Configuring API: inference (meta-reference)
 Enter value for model (existing: Meta-Llama3.1-8B-Instruct) (required):
@ -284,13 +360,13 @@ Note that all configurations as well as models are stored in `~/.llama`
 Now, let's start the Llama Stack Distribution Server. You will need the YAML configuration file which was written out at the end by the `llama stack configure` step.

 ```
-llama stack run ~/.llama/builds/conda/8b-instruct-run.yaml
+llama stack run 8b-instruct
 ```

 You should see the Llama Stack server start and print the APIs that it is supporting

 ```
-$ llama stack run ~/.llama/builds/local/conda/8b-instruct.yaml
+$ llama stack run 8b-instruct

 > initializing model parallel with size 1
 > initializing ddp with size 1
@ -357,4 +433,4 @@ Similarly you can test safety (if you configured llama-guard and/or prompt-guard
 python -m llama_stack.apis.safety.client localhost 5000
 ```

-You can find more example scripts with client SDKs to talk with the Llama Stack server in our [llama-stack-apps](https://github.com/meta-llama/llama-stack-apps/tree/main/sdk_examples) repo.
+You can find more example scripts with client SDKs to talk with the Llama Stack server in our [llama-stack-apps](https://github.com/meta-llama/llama-stack-apps) repo.
--- a/docs/resources/llama-stack-spec.html
+++ b/docs/resources/llama-stack-spec.html
@ -21,7 +21,7 @@
    "info": {
        "title": "[DRAFT] Llama Stack Specification",
        "version": "0.0.1",
-        "description": "This is the specification of the llama stack that provides\n                a set of endpoints and their corresponding interfaces that are tailored to\n                best leverage Llama Models. The specification is still in draft and subject to change.\n                Generated at 2024-09-23 10:56:42.866760"
+        "description": "This is the specification of the llama stack that provides\n                a set of endpoints and their corresponding interfaces that are tailored to\n                best leverage Llama Models. The specification is still in draft and subject to change.\n                Generated at 2024-09-23 16:58:41.469308"
    },
    "servers": [
        {
@ -2027,10 +2027,20 @@
                            {
                                "type": "string"
                            },
+                            {
+                                "$ref": "#/components/schemas/ImageMedia"
+                            },
                            {
                                "type": "array",
                                "items": {
-                                    "type": "string"
+                                    "oneOf": [
+                                        {
+                                            "type": "string"
+                                        },
+                                        {
+                                            "$ref": "#/components/schemas/ImageMedia"
+                                        }
+                                    ]
                                }
                            }
                        ]
@ -2053,6 +2063,35 @@
                    "tool_calls"
                ]
            },
+            "ImageMedia": {
+                "type": "object",
+                "properties": {
+                    "image": {
+                        "oneOf": [
+                            {
+                                "type": "object",
+                                "properties": {
+                                    "format": {
+                                        "type": "string"
+                                    },
+                                    "format_description": {
+                                        "type": "string"
+                                    }
+                                },
+                                "additionalProperties": false,
+                                "title": "This class represents an image object.  To create"
+                            },
+                            {
+                                "$ref": "#/components/schemas/URL"
+                            }
+                        ]
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "image"
+                ]
+            },
            "SamplingParams": {
                "type": "object",
                "properties": {
@ -2115,10 +2154,20 @@
                            {
                                "type": "string"
                            },
+                            {
+                                "$ref": "#/components/schemas/ImageMedia"
+                            },
                            {
                                "type": "array",
                                "items": {
-                                    "type": "string"
+                                    "oneOf": [
+                                        {
+                                            "type": "string"
+                                        },
+                                        {
+                                            "$ref": "#/components/schemas/ImageMedia"
+                                        }
+                                    ]
                                }
                            }
                        ]
@ -2267,6 +2316,28 @@
                    "required": {
                        "type": "boolean",
                        "default": true
+                    },
+                    "default": {
+                        "oneOf": [
+                            {
+                                "type": "null"
+                            },
+                            {
+                                "type": "boolean"
+                            },
+                            {
+                                "type": "number"
+                            },
+                            {
+                                "type": "string"
+                            },
+                            {
+                                "type": "array"
+                            },
+                            {
+                                "type": "object"
+                            }
+                        ]
                    }
                },
                "additionalProperties": false,
@ -2278,7 +2349,8 @@
                "type": "string",
                "enum": [
                    "json",
-                    "function_tag"
+                    "function_tag",
+                    "python_list"
                ],
                "title": "This Enum refers to the prompt format for calling custom / zero shot tools",
                "description": "`json` --\n    Refers to the json format for calling tools.\n    The json format takes the form like\n    {\n        \"type\": \"function\",\n        \"function\" : {\n            \"name\": \"function_name\",\n            \"description\": \"function_description\",\n            \"parameters\": {...}\n        }\n    }\n\n`function_tag` --\n    This is an example of how you could define\n    your own user defined format for making tool calls.\n    The function_tag format looks like this,\n    <function=function_name>(parameters)</function>\n\nThe detailed prompts for each of these formats are added to llama cli"
@ -2309,10 +2381,20 @@
                            {
                                "type": "string"
                            },
+                            {
+                                "$ref": "#/components/schemas/ImageMedia"
+                            },
                            {
                                "type": "array",
                                "items": {
-                                    "type": "string"
+                                    "oneOf": [
+                                        {
+                                            "type": "string"
+                                        },
+                                        {
+                                            "$ref": "#/components/schemas/ImageMedia"
+                                        }
+                                    ]
                                }
                            }
                        ]
@ -2326,6 +2408,11 @@
                    "content"
                ]
            },
+            "URL": {
+                "type": "string",
+                "format": "uri",
+                "pattern": "^(https?://|file://|data:)"
+            },
            "UserMessage": {
                "type": "object",
                "properties": {
@ -2339,10 +2426,20 @@
                            {
                                "type": "string"
                            },
+                            {
+                                "$ref": "#/components/schemas/ImageMedia"
+                            },
                            {
                                "type": "array",
                                "items": {
-                                    "type": "string"
+                                    "oneOf": [
+                                        {
+                                            "type": "string"
+                                        },
+                                        {
+                                            "$ref": "#/components/schemas/ImageMedia"
+                                        }
+                                    ]
                                }
                            }
                        ]
@ -2352,10 +2449,20 @@
                            {
                                "type": "string"
                            },
+                            {
+                                "$ref": "#/components/schemas/ImageMedia"
+                            },
                            {
                                "type": "array",
                                "items": {
-                                    "type": "string"
+                                    "oneOf": [
+                                        {
+                                            "type": "string"
+                                        },
+                                        {
+                                            "$ref": "#/components/schemas/ImageMedia"
+                                        }
+                                    ]
                                }
                            }
                        ]
@ -2455,10 +2562,20 @@
                                {
                                    "type": "string"
                                },
+                                {
+                                    "$ref": "#/components/schemas/ImageMedia"
+                                },
                                {
                                    "type": "array",
                                    "items": {
-                                        "type": "string"
+                                        "oneOf": [
+                                            {
+                                                "type": "string"
+                                            },
+                                            {
+                                                "$ref": "#/components/schemas/ImageMedia"
+                                            }
+                                        ]
                                    }
                                }
                            ]
@ -2714,10 +2831,20 @@
                            {
                                "type": "string"
                            },
+                            {
+                                "$ref": "#/components/schemas/ImageMedia"
+                            },
                            {
                                "type": "array",
                                "items": {
-                                    "type": "string"
+                                    "oneOf": [
+                                        {
+                                            "type": "string"
+                                        },
+                                        {
+                                            "$ref": "#/components/schemas/ImageMedia"
+                                        }
+                                    ]
                                }
                            }
                        ]
@ -3298,11 +3425,6 @@
                    "engine"
                ]
            },
-            "URL": {
-                "type": "string",
-                "format": "uri",
-                "pattern": "^(https?://|file://|data:)"
-            },
            "WolframAlphaToolDefinition": {
                "type": "object",
                "properties": {
@ -3396,10 +3518,20 @@
                            {
                                "type": "string"
                            },
+                            {
+                                "$ref": "#/components/schemas/ImageMedia"
+                            },
                            {
                                "type": "array",
                                "items": {
-                                    "type": "string"
+                                    "oneOf": [
+                                        {
+                                            "type": "string"
+                                        },
+                                        {
+                                            "$ref": "#/components/schemas/ImageMedia"
+                                        }
+                                    ]
                                }
                            },
                            {
@ -3731,10 +3863,20 @@
                            {
                                "type": "string"
                            },
+                            {
+                                "$ref": "#/components/schemas/ImageMedia"
+                            },
                            {
                                "type": "array",
                                "items": {
-                                    "type": "string"
+                                    "oneOf": [
+                                        {
+                                            "type": "string"
+                                        },
+                                        {
+                                            "$ref": "#/components/schemas/ImageMedia"
+                                        }
+                                    ]
                                }
                            }
                        ]
@ -3888,10 +4030,20 @@
                            {
                                "type": "string"
                            },
+                            {
+                                "$ref": "#/components/schemas/ImageMedia"
+                            },
                            {
                                "type": "array",
                                "items": {
-                                    "type": "string"
+                                    "oneOf": [
+                                        {
+                                            "type": "string"
+                                        },
+                                        {
+                                            "$ref": "#/components/schemas/ImageMedia"
+                                        }
+                                    ]
                                }
                            }
                        ]
@ -4316,10 +4468,20 @@
                                {
                                    "type": "string"
                                },
+                                {
+                                    "$ref": "#/components/schemas/ImageMedia"
+                                },
                                {
                                    "type": "array",
                                    "items": {
-                                        "type": "string"
+                                        "oneOf": [
+                                            {
+                                                "type": "string"
+                                            },
+                                            {
+                                                "$ref": "#/components/schemas/ImageMedia"
+                                            }
+                                        ]
                                    }
                                }
                            ]
@ -4515,10 +4677,20 @@
                            {
                                "type": "string"
                            },
+                            {
+                                "$ref": "#/components/schemas/ImageMedia"
+                            },
                            {
                                "type": "array",
                                "items": {
-                                    "type": "string"
+                                    "oneOf": [
+                                        {
+                                            "type": "string"
+                                        },
+                                        {
+                                            "$ref": "#/components/schemas/ImageMedia"
+                                        }
+                                    ]
                                }
                            },
                            {
@ -5407,10 +5579,20 @@
                            {
                                "type": "string"
                            },
+                            {
+                                "$ref": "#/components/schemas/ImageMedia"
+                            },
                            {
                                "type": "array",
                                "items": {
-                                    "type": "string"
+                                    "oneOf": [
+                                        {
+                                            "type": "string"
+                                        },
+                                        {
+                                            "$ref": "#/components/schemas/ImageMedia"
+                                        }
+                                    ]
                                }
                            }
                        ]
@ -5460,10 +5642,20 @@
                                        {
                                            "type": "string"
                                        },
+                                        {
+                                            "$ref": "#/components/schemas/ImageMedia"
+                                        },
                                        {
                                            "type": "array",
                                            "items": {
-                                                "type": "string"
+                                                "oneOf": [
+                                                    {
+                                                        "type": "string"
+                                                    },
+                                                    {
+                                                        "$ref": "#/components/schemas/ImageMedia"
+                                                    }
+                                                ]
                                            }
                                        }
                                    ]
@ -6027,32 +6219,32 @@
        }
    ],
    "tags": [
-        {
-            "name": "Inference"
-        },
        {
            "name": "Shields"
        },
        {
-            "name": "Models"
-        },
-        {
-            "name": "MemoryBanks"
-        },
-        {
-            "name": "SyntheticDataGeneration"
+            "name": "BatchInference"
        },
        {
            "name": "RewardScoring"
        },
        {
-            "name": "PostTraining"
+            "name": "SyntheticDataGeneration"
+        },
+        {
+            "name": "Agents"
+        },
+        {
+            "name": "MemoryBanks"
        },
        {
            "name": "Safety"
        },
        {
-            "name": "Evaluations"
+            "name": "Models"
+        },
+        {
+            "name": "Inference"
        },
        {
            "name": "Memory"
@ -6061,14 +6253,14 @@
            "name": "Telemetry"
        },
        {
-            "name": "Agents"
-        },
-        {
-            "name": "BatchInference"
+            "name": "PostTraining"
        },
        {
            "name": "Datasets"
        },
+        {
+            "name": "Evaluations"
+        },
        {
            "name": "BuiltinTool",
            "description": "<SchemaDefinition schemaRef=\"#/components/schemas/BuiltinTool\" />"
@ -6077,6 +6269,10 @@
            "name": "CompletionMessage",
            "description": "<SchemaDefinition schemaRef=\"#/components/schemas/CompletionMessage\" />"
        },
+        {
+            "name": "ImageMedia",
+            "description": "<SchemaDefinition schemaRef=\"#/components/schemas/ImageMedia\" />"
+        },
        {
            "name": "SamplingParams",
            "description": "<SchemaDefinition schemaRef=\"#/components/schemas/SamplingParams\" />"
@ -6117,6 +6313,10 @@
            "name": "ToolResponseMessage",
            "description": "<SchemaDefinition schemaRef=\"#/components/schemas/ToolResponseMessage\" />"
        },
+        {
+            "name": "URL",
+            "description": "<SchemaDefinition schemaRef=\"#/components/schemas/URL\" />"
+        },
        {
            "name": "UserMessage",
            "description": "<SchemaDefinition schemaRef=\"#/components/schemas/UserMessage\" />"
@ -6221,10 +6421,6 @@
            "name": "SearchToolDefinition",
            "description": "<SchemaDefinition schemaRef=\"#/components/schemas/SearchToolDefinition\" />"
        },
-        {
-            "name": "URL",
-            "description": "<SchemaDefinition schemaRef=\"#/components/schemas/URL\" />"
-        },
        {
            "name": "WolframAlphaToolDefinition",
            "description": "<SchemaDefinition schemaRef=\"#/components/schemas/WolframAlphaToolDefinition\" />"
@ -6661,6 +6857,7 @@
                "FunctionCallToolDefinition",
                "GetAgentsSessionRequest",
                "GetDocumentsRequest",
+                "ImageMedia",
                "InferenceStep",
                "InsertDocumentsRequest",
                "LogEventRequest",
--- a/docs/resources/llama-stack-spec.yaml
+++ b/docs/resources/llama-stack-spec.yaml
@ -210,8 +210,11 @@ components:
        content:
          oneOf:
          - type: string
+          - $ref: '#/components/schemas/ImageMedia'
          - items:
-              type: string
+              oneOf:
+              - type: string
+              - $ref: '#/components/schemas/ImageMedia'
            type: array
          - $ref: '#/components/schemas/URL'
        mime_type:
@ -273,8 +276,11 @@ components:
          items:
            oneOf:
            - type: string
+            - $ref: '#/components/schemas/ImageMedia'
            - items:
-                type: string
+                oneOf:
+                - type: string
+                - $ref: '#/components/schemas/ImageMedia'
              type: array
          type: array
        logprobs:
@ -441,8 +447,11 @@ components:
        content:
          oneOf:
          - type: string
+          - $ref: '#/components/schemas/ImageMedia'
          - items:
-              type: string
+              oneOf:
+              - type: string
+              - $ref: '#/components/schemas/ImageMedia'
            type: array
        role:
          const: assistant
@ -466,8 +475,11 @@ components:
        content:
          oneOf:
          - type: string
+          - $ref: '#/components/schemas/ImageMedia'
          - items:
-              type: string
+              oneOf:
+              - type: string
+              - $ref: '#/components/schemas/ImageMedia'
            type: array
        logprobs:
          additionalProperties: false
@ -742,8 +754,11 @@ components:
          items:
            oneOf:
            - type: string
+            - $ref: '#/components/schemas/ImageMedia'
            - items:
-                type: string
+                oneOf:
+                - type: string
+                - $ref: '#/components/schemas/ImageMedia'
              type: array
          type: array
        model:
@ -893,6 +908,23 @@ components:
      required:
      - document_ids
      type: object
+    ImageMedia:
+      additionalProperties: false
+      properties:
+        image:
+          oneOf:
+          - additionalProperties: false
+            properties:
+              format:
+                type: string
+              format_description:
+                type: string
+            title: This class represents an image object.  To create
+            type: object
+          - $ref: '#/components/schemas/URL'
+      required:
+      - image
+      type: object
    InferenceStep:
      additionalProperties: false
      properties:
@ -1041,8 +1073,11 @@ components:
        content:
          oneOf:
          - type: string
+          - $ref: '#/components/schemas/ImageMedia'
          - items:
-              type: string
+              oneOf:
+              - type: string
+              - $ref: '#/components/schemas/ImageMedia'
            type: array
          - $ref: '#/components/schemas/URL'
        document_id:
@ -1108,8 +1143,11 @@ components:
        inserted_context:
          oneOf:
          - type: string
+          - $ref: '#/components/schemas/ImageMedia'
          - items:
-              type: string
+              oneOf:
+              - type: string
+              - $ref: '#/components/schemas/ImageMedia'
            type: array
        memory_bank_ids:
          items:
@ -1545,8 +1583,11 @@ components:
        query:
          oneOf:
          - type: string
+          - $ref: '#/components/schemas/ImageMedia'
          - items:
-              type: string
+              oneOf:
+              - type: string
+              - $ref: '#/components/schemas/ImageMedia'
            type: array
      required:
      - bank_id
@ -1562,8 +1603,11 @@ components:
              content:
                oneOf:
                - type: string
+                - $ref: '#/components/schemas/ImageMedia'
                - items:
-                    type: string
+                    oneOf:
+                    - type: string
+                    - $ref: '#/components/schemas/ImageMedia'
                  type: array
              document_id:
                type: string
@ -2067,8 +2111,11 @@ components:
        content:
          oneOf:
          - type: string
+          - $ref: '#/components/schemas/ImageMedia'
          - items:
-              type: string
+              oneOf:
+              - type: string
+              - $ref: '#/components/schemas/ImageMedia'
            type: array
        role:
          const: system
@ -2203,6 +2250,14 @@ components:
    ToolParamDefinition:
      additionalProperties: false
      properties:
+        default:
+          oneOf:
+          - type: 'null'
+          - type: boolean
+          - type: number
+          - type: string
+          - type: array
+          - type: object
        description:
          type: string
        param_type:
@ -2225,6 +2280,7 @@ components:
      enum:
      - json
      - function_tag
+      - python_list
      title: This Enum refers to the prompt format for calling custom / zero shot
        tools
      type: string
@ -2236,8 +2292,11 @@ components:
        content:
          oneOf:
          - type: string
+          - $ref: '#/components/schemas/ImageMedia'
          - items:
-              type: string
+              oneOf:
+              - type: string
+              - $ref: '#/components/schemas/ImageMedia'
            type: array
        tool_name:
          oneOf:
@ -2256,8 +2315,11 @@ components:
        content:
          oneOf:
          - type: string
+          - $ref: '#/components/schemas/ImageMedia'
          - items:
-              type: string
+              oneOf:
+              - type: string
+              - $ref: '#/components/schemas/ImageMedia'
            type: array
        role:
          const: ipython
@ -2451,14 +2513,20 @@ components:
        content:
          oneOf:
          - type: string
+          - $ref: '#/components/schemas/ImageMedia'
          - items:
-              type: string
+              oneOf:
+              - type: string
+              - $ref: '#/components/schemas/ImageMedia'
            type: array
        context:
          oneOf:
          - type: string
+          - $ref: '#/components/schemas/ImageMedia'
          - items:
-              type: string
+              oneOf:
+              - type: string
+              - $ref: '#/components/schemas/ImageMedia'
            type: array
        role:
          const: user
@ -2501,7 +2569,7 @@ info:
  description: "This is the specification of the llama stack that provides\n     \
    \           a set of endpoints and their corresponding interfaces that are tailored\
    \ to\n                best leverage Llama Models. The specification is still in\
-    \ draft and subject to change.\n                Generated at 2024-09-23 10:56:42.866760"
+    \ draft and subject to change.\n                Generated at 2024-09-23 16:58:41.469308"
  title: '[DRAFT] Llama Stack Specification'
  version: 0.0.1
 jsonSchemaDialect: https://json-schema.org/draft/2020-12/schema
@ -3739,25 +3807,27 @@ security:
 servers:
 - url: http://any-hosted-llama-stack.com
 tags:
- name: Inference
 - name: Shields
- name: Models
- name: MemoryBanks
- name: SyntheticDataGeneration
+- name: BatchInference
 - name: RewardScoring
- name: PostTraining
+- name: SyntheticDataGeneration
+- name: Agents
+- name: MemoryBanks
 - name: Safety
- name: Evaluations
+- name: Models
+- name: Inference
 - name: Memory
 - name: Telemetry
- name: Agents
- name: BatchInference
+- name: PostTraining
 - name: Datasets
+- name: Evaluations
 - description: <SchemaDefinition schemaRef="#/components/schemas/BuiltinTool" />
  name: BuiltinTool
 - description: <SchemaDefinition schemaRef="#/components/schemas/CompletionMessage"
    />
  name: CompletionMessage
+- description: <SchemaDefinition schemaRef="#/components/schemas/ImageMedia" />
+  name: ImageMedia
 - description: <SchemaDefinition schemaRef="#/components/schemas/SamplingParams" />
  name: SamplingParams
 - description: <SchemaDefinition schemaRef="#/components/schemas/SamplingStrategy"
@ -3790,6 +3860,8 @@ tags:
 - description: <SchemaDefinition schemaRef="#/components/schemas/ToolResponseMessage"
    />
  name: ToolResponseMessage
+- description: <SchemaDefinition schemaRef="#/components/schemas/URL" />
+  name: URL
 - description: <SchemaDefinition schemaRef="#/components/schemas/UserMessage" />
  name: UserMessage
 - description: <SchemaDefinition schemaRef="#/components/schemas/BatchChatCompletionRequest"
@ -3876,8 +3948,6 @@ tags:
 - description: <SchemaDefinition schemaRef="#/components/schemas/SearchToolDefinition"
    />
  name: SearchToolDefinition
- description: <SchemaDefinition schemaRef="#/components/schemas/URL" />
-  name: URL
 - description: <SchemaDefinition schemaRef="#/components/schemas/WolframAlphaToolDefinition"
    />
  name: WolframAlphaToolDefinition
@ -4233,6 +4303,7 @@ x-tagGroups:
  - FunctionCallToolDefinition
  - GetAgentsSessionRequest
  - GetDocumentsRequest
+  - ImageMedia
  - InferenceStep
  - InsertDocumentsRequest
  - LogEventRequest