Support for Llama3.2 models and Swift SDK (#98)

This commit is contained in:
Ashwin Bharambe 2024-09-25 10:29:58 -07:00 committed by GitHub
parent 95abbf576b
commit 56aed59eb4
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
56 changed files with 3745 additions and 630 deletions

View file

@ -37,50 +37,74 @@ llama model list
You should see a table like this:
<pre style="font-family: monospace;">
+---------------------------------------+---------------------------------------------+----------------+----------------------------+
| Model Descriptor | HuggingFace Repo | Context Length | Hardware Requirements |
+---------------------------------------+---------------------------------------------+----------------+----------------------------+
| Meta-Llama3.1-8B | meta-llama/Meta-Llama-3.1-8B | 128K | 1 GPU, each >= 20GB VRAM |
+---------------------------------------+---------------------------------------------+----------------+----------------------------+
| Meta-Llama3.1-70B | meta-llama/Meta-Llama-3.1-70B | 128K | 8 GPUs, each >= 20GB VRAM |
+---------------------------------------+---------------------------------------------+----------------+----------------------------+
| Meta-Llama3.1-405B:bf16-mp8 | | 128K | 8 GPUs, each >= 120GB VRAM |
+---------------------------------------+---------------------------------------------+----------------+----------------------------+
| Meta-Llama3.1-405B | meta-llama/Meta-Llama-3.1-405B-FP8 | 128K | 8 GPUs, each >= 70GB VRAM |
+---------------------------------------+---------------------------------------------+----------------+----------------------------+
| Meta-Llama3.1-405B:bf16-mp16 | meta-llama/Meta-Llama-3.1-405B | 128K | 16 GPUs, each >= 70GB VRAM |
+---------------------------------------+---------------------------------------------+----------------+----------------------------+
| Meta-Llama3.1-8B-Instruct | meta-llama/Meta-Llama-3.1-8B-Instruct | 128K | 1 GPU, each >= 20GB VRAM |
+---------------------------------------+---------------------------------------------+----------------+----------------------------+
| Meta-Llama3.1-70B-Instruct | meta-llama/Meta-Llama-3.1-70B-Instruct | 128K | 8 GPUs, each >= 20GB VRAM |
+---------------------------------------+---------------------------------------------+----------------+----------------------------+
| Meta-Llama3.1-405B-Instruct:bf16-mp8 | | 128K | 8 GPUs, each >= 120GB VRAM |
+---------------------------------------+---------------------------------------------+----------------+----------------------------+
| Meta-Llama3.1-405B-Instruct | meta-llama/Meta-Llama-3.1-405B-Instruct-FP8 | 128K | 8 GPUs, each >= 70GB VRAM |
+---------------------------------------+---------------------------------------------+----------------+----------------------------+
| Meta-Llama3.1-405B-Instruct:bf16-mp16 | meta-llama/Meta-Llama-3.1-405B-Instruct | 128K | 16 GPUs, each >= 70GB VRAM |
+---------------------------------------+---------------------------------------------+----------------+----------------------------+
| Llama-Guard-3-8B | meta-llama/Llama-Guard-3-8B | 128K | 1 GPU, each >= 20GB VRAM |
+---------------------------------------+---------------------------------------------+----------------+----------------------------+
| Llama-Guard-3-8B:int8-mp1 | meta-llama/Llama-Guard-3-8B-INT8 | 128K | 1 GPU, each >= 10GB VRAM |
+---------------------------------------+---------------------------------------------+----------------+----------------------------+
| Prompt-Guard-86M | meta-llama/Prompt-Guard-86M | 128K | 1 GPU, each >= 1GB VRAM |
+---------------------------------------+---------------------------------------------+----------------+----------------------------+
+----------------------------------+------------------------------------------+----------------+
| Model Descriptor | HuggingFace Repo | Context Length |
+----------------------------------+------------------------------------------+----------------+
| Llama3.1-8B | meta-llama/Llama-3.1-8B | 128K |
+----------------------------------+------------------------------------------+----------------+
| Llama3.1-70B | meta-llama/Llama-3.1-70B | 128K |
+----------------------------------+------------------------------------------+----------------+
| Llama3.1-405B:bf16-mp8 | meta-llama/Llama-3.1-405B | 128K |
+----------------------------------+------------------------------------------+----------------+
| Llama3.1-405B | meta-llama/Llama-3.1-405B-FP8 | 128K |
+----------------------------------+------------------------------------------+----------------+
| Llama3.1-405B:bf16-mp16 | meta-llama/Llama-3.1-405B | 128K |
+----------------------------------+------------------------------------------+----------------+
| Llama3.1-8B-Instruct | meta-llama/Llama-3.1-8B-Instruct | 128K |
+----------------------------------+------------------------------------------+----------------+
| Llama3.1-70B-Instruct | meta-llama/Llama-3.1-70B-Instruct | 128K |
+----------------------------------+------------------------------------------+----------------+
| Llama3.1-405B-Instruct:bf16-mp8 | meta-llama/Llama-3.1-405B-Instruct | 128K |
+----------------------------------+------------------------------------------+----------------+
| Llama3.1-405B-Instruct | meta-llama/Llama-3.1-405B-Instruct-FP8 | 128K |
+----------------------------------+------------------------------------------+----------------+
| Llama3.1-405B-Instruct:bf16-mp16 | meta-llama/Llama-3.1-405B-Instruct | 128K |
+----------------------------------+------------------------------------------+----------------+
| Llama3.2-1B | meta-llama/Llama-3.2-1B | 128K |
+----------------------------------+------------------------------------------+----------------+
| Llama3.2-3B | meta-llama/Llama-3.2-3B | 128K |
+----------------------------------+------------------------------------------+----------------+
| Llama3.2-11B-Vision | meta-llama/Llama-3.2-11B-Vision | 128K |
+----------------------------------+------------------------------------------+----------------+
| Llama3.2-90B-Vision | meta-llama/Llama-3.2-90B-Vision | 128K |
+----------------------------------+------------------------------------------+----------------+
| Llama3.2-1B-Instruct | meta-llama/Llama-3.2-1B-Instruct | 128K |
+----------------------------------+------------------------------------------+----------------+
| Llama3.2-3B-Instruct | meta-llama/Llama-3.2-3B-Instruct | 128K |
+----------------------------------+------------------------------------------+----------------+
| Llama3.2-11B-Vision-Instruct | meta-llama/Llama-3.2-11B-Vision-Instruct | 128K |
+----------------------------------+------------------------------------------+----------------+
| Llama3.2-90B-Vision-Instruct | meta-llama/Llama-3.2-90B-Vision-Instruct | 128K |
+----------------------------------+------------------------------------------+----------------+
| Llama-Guard-3-11B-Vision | meta-llama/Llama-Guard-3-11B-Vision | 128K |
+----------------------------------+------------------------------------------+----------------+
| Llama-Guard-3-1B:int4-mp1 | meta-llama/Llama-Guard-3-1B-INT4 | 128K |
+----------------------------------+------------------------------------------+----------------+
| Llama-Guard-3-1B | meta-llama/Llama-Guard-3-1B | 128K |
+----------------------------------+------------------------------------------+----------------+
| Llama-Guard-3-8B | meta-llama/Llama-Guard-3-8B | 128K |
+----------------------------------+------------------------------------------+----------------+
| Llama-Guard-3-8B:int8-mp1 | meta-llama/Llama-Guard-3-8B-INT8 | 128K |
+----------------------------------+------------------------------------------+----------------+
| Prompt-Guard-86M | meta-llama/Prompt-Guard-86M | 128K |
+----------------------------------+------------------------------------------+----------------+
| Llama-Guard-2-8B | meta-llama/Llama-Guard-2-8B | 4K |
+----------------------------------+------------------------------------------+----------------+
</pre>
To download models, you can use the llama download command.
#### Downloading from [Meta](https://llama.meta.com/llama-downloads/)
Here is an example download command to get the 8B/70B Instruct model. You will need META_URL which can be obtained from [here](https://llama.meta.com/docs/getting_the_models/meta/)
Here is an example download command to get the 3B-Instruct/11B-Vision-Instruct model. You will need META_URL which can be obtained from [here](https://llama.meta.com/docs/getting_the_models/meta/)
Download the required checkpoints using the following commands:
```bash
# download the 8B model, this can be run on a single GPU
llama download --source meta --model-id Meta-Llama3.1-8B-Instruct --meta-url META_URL
llama download --source meta --model-id Llama3.2-3B-Instruct --meta-url META_URL
# you can also get the 70B model, this will require 8 GPUs however
llama download --source meta --model-id Meta-Llama3.1-70B-Instruct --meta-url META_URL
llama download --source meta --model-id Llama3.2-11B-Vision-Instruct --meta-url META_URL
# llama-agents have safety enabled by default. For this, you will need
# safety models -- Llama-Guard and Prompt-Guard
@ -124,7 +148,7 @@ The `llama model` command helps you explore the models interface.
### 2.1 Subcommands
1. `download`: Download the model from different sources. (meta, huggingface)
2. `list`: Lists all the models available for download with hardware requirements to deploy the models.
3. `template`: <TODO: What is a template?>
3. `prompt-format`: Show llama model message formats.
4. `describe`: Describes all the properties of the model.
### 2.2 Sample Usage
@ -135,7 +159,7 @@ The `llama model` command helps you explore the models interface.
llama model --help
```
<pre style="font-family: monospace;">
usage: llama model [-h] {download,list,template,describe} ...
usage: llama model [-h] {download,list,prompt-format,describe} ...
Work with llama models
@ -143,124 +167,67 @@ options:
-h, --help show this help message and exit
model_subcommands:
{download,list,template,describe}
{download,list,prompt-format,describe}
</pre>
You can use the describe command to know more about a model:
```
llama model describe -m Meta-Llama3.1-8B-Instruct
llama model describe -m Llama3.2-3B-Instruct
```
### 2.3 Describe
<pre style="font-family: monospace;">
+-----------------------------+---------------------------------------+
| Model | Meta- |
| | Llama3.1-8B-Instruct |
+-----------------------------+---------------------------------------+
| HuggingFace ID | meta-llama/Meta-Llama-3.1-8B-Instruct |
+-----------------------------+---------------------------------------+
| Description | Llama 3.1 8b instruct model |
+-----------------------------+---------------------------------------+
| Context Length | 128K tokens |
+-----------------------------+---------------------------------------+
| Weights format | bf16 |
+-----------------------------+---------------------------------------+
| Model params.json | { |
| | "dim": 4096, |
| | "n_layers": 32, |
| | "n_heads": 32, |
| | "n_kv_heads": 8, |
| | "vocab_size": 128256, |
| | "ffn_dim_multiplier": 1.3, |
| | "multiple_of": 1024, |
| | "norm_eps": 1e-05, |
| | "rope_theta": 500000.0, |
| | "use_scaled_rope": true |
| | } |
+-----------------------------+---------------------------------------+
| Recommended sampling params | { |
| | "strategy": "top_p", |
| | "temperature": 1.0, |
| | "top_p": 0.9, |
| | "top_k": 0 |
| | } |
+-----------------------------+---------------------------------------+
+-----------------------------+----------------------------------+
| Model | Llama3.2-3B-Instruct |
+-----------------------------+----------------------------------+
| HuggingFace ID | meta-llama/Llama-3.2-3B-Instruct |
+-----------------------------+----------------------------------+
| Description | Llama 3.2 3b instruct model |
+-----------------------------+----------------------------------+
| Context Length | 128K tokens |
+-----------------------------+----------------------------------+
| Weights format | bf16 |
+-----------------------------+----------------------------------+
| Model params.json | { |
| | "dim": 3072, |
| | "n_layers": 28, |
| | "n_heads": 24, |
| | "n_kv_heads": 8, |
| | "vocab_size": 128256, |
| | "ffn_dim_multiplier": 1.0, |
| | "multiple_of": 256, |
| | "norm_eps": 1e-05, |
| | "rope_theta": 500000.0, |
| | "use_scaled_rope": true |
| | } |
+-----------------------------+----------------------------------+
| Recommended sampling params | { |
| | "strategy": "top_p", |
| | "temperature": 1.0, |
| | "top_p": 0.9, |
| | "top_k": 0 |
| | } |
+-----------------------------+----------------------------------+
</pre>
### 2.4 Template
You can even run `llama model template` see all of the templates and their tokens:
### 2.4 Prompt Format
You can even run `llama model prompt-format` see all of the templates and their tokens:
```
llama model template
llama model prompt-format -m Llama3.2-3B-Instruct
```
<p align="center">
<img width="719" alt="image" src="https://github.com/user-attachments/assets/c5332026-8c0b-4edc-b438-ec60cd7ca554">
</p>
<pre style="font-family: monospace;">
+-----------+---------------------------------+
| Role | Template Name |
+-----------+---------------------------------+
| user | user-default |
| assistant | assistant-builtin-tool-call |
| assistant | assistant-custom-tool-call |
| assistant | assistant-default |
| system | system-builtin-and-custom-tools |
| system | system-builtin-tools-only |
| system | system-custom-tools-only |
| system | system-default |
| tool | tool-success |
| tool | tool-failure |
+-----------+---------------------------------+
</pre>
And fetch an example by passing it to `--name`:
```
llama model template --name tool-success
```
<pre style="font-family: monospace;">
+----------+----------------------------------------------------------------+
| Name | tool-success |
+----------+----------------------------------------------------------------+
| Template | <|start_header_id|>ipython<|end_header_id|> |
| | |
| | completed |
| | [stdout]{"results":["something |
| | something"]}[/stdout]<|eot_id|> |
| | |
+----------+----------------------------------------------------------------+
| Notes | Note ipython header and [stdout] |
+----------+----------------------------------------------------------------+
</pre>
Or:
```
llama model template --name system-builtin-tools-only
```
<pre style="font-family: monospace;">
+----------+--------------------------------------------+
| Name | system-builtin-tools-only |
+----------+--------------------------------------------+
| Template | <|start_header_id|>system<|end_header_id|> |
| | |
| | Environment: ipython |
| | Tools: brave_search, wolfram_alpha |
| | |
| | Cutting Knowledge Date: December 2023 |
| | Today Date: 21 August 2024 |
| | <|eot_id|> |
| | |
+----------+--------------------------------------------+
| Notes | |
+----------+--------------------------------------------+
</pre>
These commands can help understand the model interface and how prompts / messages are formatted for various scenarios.
You will be shown a Markdown formatted description of the model interface and how prompts / messages are formatted for various scenarios.
**NOTE**: Outputs in terminal are color printed to show special tokens.
## Step 3: Building, and Configuring Llama Stack Distributions
- Please see our [Getting Started](getting_started.md) guide for details.
- Please see our [Getting Started](getting_started.md) guide for more details on how to build and start a Llama Stack distribution.
### Step 3.1 Build
In the following steps, imagine we'll be working with a `Meta-Llama3.1-8B-Instruct` model. We will name our build `8b-instruct` to help us remember the config. We will start build our distribution (in the form of a Conda environment, or Docker image). In this step, we will specify:

BIN
docs/dog.jpg Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 39 KiB

325
docs/getting_started.ipynb Normal file

File diff suppressed because one or more lines are too long

View file

@ -1,9 +1,70 @@
# llama-stack
[![PyPI - Downloads](https://img.shields.io/pypi/dm/llama-stack)](https://pypi.org/project/llama-stack/)
[![Discord](https://img.shields.io/discord/1257833999603335178)](https://discord.gg/TZAAYNVtrU)
This repository contains the specifications and implementations of the APIs which are part of the Llama Stack.
The Llama Stack defines and standardizes the building blocks needed to bring generative AI applications to market. These blocks span the entire development lifecycle: from model training and fine-tuning, through product evaluation, to invoking AI agents in production. Beyond definition, we're developing open-source versions and partnering with cloud providers, ensuring developers can assemble AI solutions using consistent, interlocking pieces across platforms. The ultimate goal is to accelerate innovation in the AI space.
The Stack APIs are rapidly improving, but still very much work in progress and we invite feedback as well as direct contributions.
## APIs
The Llama Stack consists of the following set of APIs:
- Inference
- Safety
- Memory
- Agentic System
- Evaluation
- Post Training
- Synthetic Data Generation
- Reward Scoring
Each of the APIs themselves is a collection of REST endpoints.
## API Providers
A Provider is what makes the API real -- they provide the actual implementation backing the API.
As an example, for Inference, we could have the implementation be backed by open source libraries like `[ torch | vLLM | TensorRT ]` as possible options.
A provider can also be just a pointer to a remote REST service -- for example, cloud providers or dedicated inference providers could serve these APIs.
## Llama Stack Distribution
A Distribution is where APIs and Providers are assembled together to provide a consistent whole to the end application developer. You can mix-and-match providers -- some could be backed by local code and some could be remote. As a hobbyist, you can serve a small model locally, but can choose a cloud provider for a large model. Regardless, the higher level APIs your app needs to work with don't need to change at all. You can even imagine moving across the server / mobile-device boundary as well always using the same uniform set of APIs for developing Generative AI applications.
## Installation
You can install this repository as a [package](https://pypi.org/project/llama-stack/) with `pip install llama-stack`
If you want to install from source:
```bash
mkdir -p ~/local
cd ~/local
git clone git@github.com:meta-llama/llama-stack.git
conda create -n stack python=3.10
conda activate stack
cd llama-stack
$CONDA_PREFIX/bin/pip install -e .
```
# Getting Started
The `llama` CLI tool helps you setup and use the Llama toolchain & agentic systems. It should be available on your path after installing the `llama-stack` package.
This guides allows you to quickly get started with building and running a Llama Stack server in < 5 minutes!
You may also checkout this [notebook](https://github.com/meta-llama/llama-stack/blob/main/docs/getting_started.ipynb) for trying out out demo scripts.
## Quick Cheatsheet
- Quick 3 line command to build and start a LlamaStack server using our Meta Reference implementation for all API endpoints with `conda` as build type.
@ -12,7 +73,7 @@ This guides allows you to quickly get started with building and running a Llama
```
llama stack build
> Enter an unique name for identifying your Llama Stack build distribution (e.g. my-local-stack): my-local-llama-stack
> Enter an unique name for identifying your Llama Stack build distribution (e.g. my-local-stack): my-local-stack
> Enter the image type you want your distribution to be built with (docker or conda): conda
Llama Stack is composed of several APIs working together. Let's configure the providers (implementations) you want to use for these APIs.
@ -24,47 +85,57 @@ llama stack build
> (Optional) Enter a short description for your Llama Stack distribution:
Build spec configuration saved at ~/.conda/envs/llamastack-my-local-llama-stack/my-local-llama-stack-build.yaml
Build spec configuration saved at ~/.conda/envs/llamastack-my-local-stack/my-local-stack-build.yaml
You can now run `llama stack configure my-local-stack`
```
**`llama stack configure`**
- Run `llama stack configure <name>` with the name you have previously defined in `build` step.
```
llama stack configure my-local-llama-stack
llama stack configure <name>
```
- You will be prompted to enter configurations for your Llama Stack
Configuring APIs to serve...
Enter comma-separated list of APIs to serve:
```
$ llama stack configure my-local-stack
Could not find my-local-stack. Trying conda build name instead...
Configuring API `inference`...
Configuring provider `meta-reference`...
Enter value for model (default: Meta-Llama3.1-8B-Instruct) (required):
=== Configuring provider `meta-reference` for API inference...
Enter value for model (default: Llama3.1-8B-Instruct) (required):
Do you want to configure quantization? (y/n): n
Enter value for torch_seed (optional):
Enter value for max_seq_len (required): 4096
Enter value for max_seq_len (default: 4096) (required):
Enter value for max_batch_size (default: 1) (required):
Configuring API `safety`...
Configuring provider `meta-reference`...
Configuring API `safety`...
=== Configuring provider `meta-reference` for API safety...
Do you want to configure llama_guard_shield? (y/n): n
Do you want to configure prompt_guard_shield? (y/n): n
Configuring API `agents`...
=== Configuring provider `meta-reference` for API agents...
Enter `type` for persistence_store (options: redis, sqlite, postgres) (default: sqlite):
Configuring SqliteKVStoreConfig:
Enter value for namespace (optional):
Enter value for db_path (default: /home/xiyan/.llama/runtime/kvstore.db) (required):
Configuring provider `meta-reference`...
Configuring API `memory`...
=== Configuring provider `meta-reference` for API memory...
> Please enter the supported memory bank type your provider has for memory: vector
Configuring provider `meta-reference`...
Configuring API `telemetry`...
=== Configuring provider `meta-reference` for API telemetry...
Configuring provider `meta-reference`...
> YAML configuration has been written to ~/.llama/builds/conda/my-local-llama-stack-run.yaml.
You can now run `llama stack run my-local-llama-stack --port PORT` or `llama stack run ~/.llama/builds/conda/my-local-llama-stack-run.yaml --port PORT
> YAML configuration has been written to ~/.llama/builds/conda/my-local-stack-run.yaml.
You can now run `llama stack run my-local-stack --port PORT`
```
**`llama stack run`**
- Run `llama stack run <name>` with the name you have previously defined.
```
llama stack run my-local-llama-stack
llama stack run my-local-stack
...
> initializing model parallel with size 1
@ -126,7 +197,7 @@ llama stack build
Running the command above will allow you to fill in the configuration to build your Llama Stack distribution, you will see the following outputs.
```
> Enter an unique name for identifying your Llama Stack build distribution (e.g. my-local-stack): my-local-llama-stack
> Enter an unique name for identifying your Llama Stack build distribution (e.g. my-local-stack): 8b-instruct
> Enter the image type you want your distribution to be built with (docker or conda): conda
Llama Stack is composed of several APIs working together. Let's configure the providers (implementations) you want to use for these APIs.
@ -138,9 +209,14 @@ Running the command above will allow you to fill in the configuration to build y
> (Optional) Enter a short description for your Llama Stack distribution:
Build spec configuration saved at ~/.conda/envs/llamastack-my-local-llama-stack/my-local-llama-stack-build.yaml
Build spec configuration saved at ~/.conda/envs/llamastack-my-local-llama-stack/8b-instruct-build.yaml
```
**Ollama (optional)**
If you plan to use Ollama for inference, you'll need to install the server [via these instructions](https://ollama.com/download).
#### Building from templates
- To build from alternative API providers, we provide distribution templates for users to get started building a distribution backed by different providers.
@ -236,7 +312,7 @@ llama stack configure [ <name> | <docker-image-name> | <path/to/name.build.yaml>
- Run `docker images` to check list of available images on your machine.
```
$ llama stack configure ~/.llama/distributions/conda/8b-instruct-build.yaml
$ llama stack configure 8b-instruct
Configuring API: inference (meta-reference)
Enter value for model (existing: Meta-Llama3.1-8B-Instruct) (required):
@ -284,13 +360,13 @@ Note that all configurations as well as models are stored in `~/.llama`
Now, let's start the Llama Stack Distribution Server. You will need the YAML configuration file which was written out at the end by the `llama stack configure` step.
```
llama stack run ~/.llama/builds/conda/8b-instruct-run.yaml
llama stack run 8b-instruct
```
You should see the Llama Stack server start and print the APIs that it is supporting
```
$ llama stack run ~/.llama/builds/local/conda/8b-instruct.yaml
$ llama stack run 8b-instruct
> initializing model parallel with size 1
> initializing ddp with size 1
@ -357,4 +433,4 @@ Similarly you can test safety (if you configured llama-guard and/or prompt-guard
python -m llama_stack.apis.safety.client localhost 5000
```
You can find more example scripts with client SDKs to talk with the Llama Stack server in our [llama-stack-apps](https://github.com/meta-llama/llama-stack-apps/tree/main/sdk_examples) repo.
You can find more example scripts with client SDKs to talk with the Llama Stack server in our [llama-stack-apps](https://github.com/meta-llama/llama-stack-apps) repo.

View file

@ -21,7 +21,7 @@
"info": {
"title": "[DRAFT] Llama Stack Specification",
"version": "0.0.1",
"description": "This is the specification of the llama stack that provides\n a set of endpoints and their corresponding interfaces that are tailored to\n best leverage Llama Models. The specification is still in draft and subject to change.\n Generated at 2024-09-23 10:56:42.866760"
"description": "This is the specification of the llama stack that provides\n a set of endpoints and their corresponding interfaces that are tailored to\n best leverage Llama Models. The specification is still in draft and subject to change.\n Generated at 2024-09-23 16:58:41.469308"
},
"servers": [
{
@ -2027,10 +2027,20 @@
{
"type": "string"
},
{
"$ref": "#/components/schemas/ImageMedia"
},
{
"type": "array",
"items": {
"type": "string"
"oneOf": [
{
"type": "string"
},
{
"$ref": "#/components/schemas/ImageMedia"
}
]
}
}
]
@ -2053,6 +2063,35 @@
"tool_calls"
]
},
"ImageMedia": {
"type": "object",
"properties": {
"image": {
"oneOf": [
{
"type": "object",
"properties": {
"format": {
"type": "string"
},
"format_description": {
"type": "string"
}
},
"additionalProperties": false,
"title": "This class represents an image object. To create"
},
{
"$ref": "#/components/schemas/URL"
}
]
}
},
"additionalProperties": false,
"required": [
"image"
]
},
"SamplingParams": {
"type": "object",
"properties": {
@ -2115,10 +2154,20 @@
{
"type": "string"
},
{
"$ref": "#/components/schemas/ImageMedia"
},
{
"type": "array",
"items": {
"type": "string"
"oneOf": [
{
"type": "string"
},
{
"$ref": "#/components/schemas/ImageMedia"
}
]
}
}
]
@ -2267,6 +2316,28 @@
"required": {
"type": "boolean",
"default": true
},
"default": {
"oneOf": [
{
"type": "null"
},
{
"type": "boolean"
},
{
"type": "number"
},
{
"type": "string"
},
{
"type": "array"
},
{
"type": "object"
}
]
}
},
"additionalProperties": false,
@ -2278,7 +2349,8 @@
"type": "string",
"enum": [
"json",
"function_tag"
"function_tag",
"python_list"
],
"title": "This Enum refers to the prompt format for calling custom / zero shot tools",
"description": "`json` --\n Refers to the json format for calling tools.\n The json format takes the form like\n {\n \"type\": \"function\",\n \"function\" : {\n \"name\": \"function_name\",\n \"description\": \"function_description\",\n \"parameters\": {...}\n }\n }\n\n`function_tag` --\n This is an example of how you could define\n your own user defined format for making tool calls.\n The function_tag format looks like this,\n <function=function_name>(parameters)</function>\n\nThe detailed prompts for each of these formats are added to llama cli"
@ -2309,10 +2381,20 @@
{
"type": "string"
},
{
"$ref": "#/components/schemas/ImageMedia"
},
{
"type": "array",
"items": {
"type": "string"
"oneOf": [
{
"type": "string"
},
{
"$ref": "#/components/schemas/ImageMedia"
}
]
}
}
]
@ -2326,6 +2408,11 @@
"content"
]
},
"URL": {
"type": "string",
"format": "uri",
"pattern": "^(https?://|file://|data:)"
},
"UserMessage": {
"type": "object",
"properties": {
@ -2339,10 +2426,20 @@
{
"type": "string"
},
{
"$ref": "#/components/schemas/ImageMedia"
},
{
"type": "array",
"items": {
"type": "string"
"oneOf": [
{
"type": "string"
},
{
"$ref": "#/components/schemas/ImageMedia"
}
]
}
}
]
@ -2352,10 +2449,20 @@
{
"type": "string"
},
{
"$ref": "#/components/schemas/ImageMedia"
},
{
"type": "array",
"items": {
"type": "string"
"oneOf": [
{
"type": "string"
},
{
"$ref": "#/components/schemas/ImageMedia"
}
]
}
}
]
@ -2455,10 +2562,20 @@
{
"type": "string"
},
{
"$ref": "#/components/schemas/ImageMedia"
},
{
"type": "array",
"items": {
"type": "string"
"oneOf": [
{
"type": "string"
},
{
"$ref": "#/components/schemas/ImageMedia"
}
]
}
}
]
@ -2714,10 +2831,20 @@
{
"type": "string"
},
{
"$ref": "#/components/schemas/ImageMedia"
},
{
"type": "array",
"items": {
"type": "string"
"oneOf": [
{
"type": "string"
},
{
"$ref": "#/components/schemas/ImageMedia"
}
]
}
}
]
@ -3298,11 +3425,6 @@
"engine"
]
},
"URL": {
"type": "string",
"format": "uri",
"pattern": "^(https?://|file://|data:)"
},
"WolframAlphaToolDefinition": {
"type": "object",
"properties": {
@ -3396,10 +3518,20 @@
{
"type": "string"
},
{
"$ref": "#/components/schemas/ImageMedia"
},
{
"type": "array",
"items": {
"type": "string"
"oneOf": [
{
"type": "string"
},
{
"$ref": "#/components/schemas/ImageMedia"
}
]
}
},
{
@ -3731,10 +3863,20 @@
{
"type": "string"
},
{
"$ref": "#/components/schemas/ImageMedia"
},
{
"type": "array",
"items": {
"type": "string"
"oneOf": [
{
"type": "string"
},
{
"$ref": "#/components/schemas/ImageMedia"
}
]
}
}
]
@ -3888,10 +4030,20 @@
{
"type": "string"
},
{
"$ref": "#/components/schemas/ImageMedia"
},
{
"type": "array",
"items": {
"type": "string"
"oneOf": [
{
"type": "string"
},
{
"$ref": "#/components/schemas/ImageMedia"
}
]
}
}
]
@ -4316,10 +4468,20 @@
{
"type": "string"
},
{
"$ref": "#/components/schemas/ImageMedia"
},
{
"type": "array",
"items": {
"type": "string"
"oneOf": [
{
"type": "string"
},
{
"$ref": "#/components/schemas/ImageMedia"
}
]
}
}
]
@ -4515,10 +4677,20 @@
{
"type": "string"
},
{
"$ref": "#/components/schemas/ImageMedia"
},
{
"type": "array",
"items": {
"type": "string"
"oneOf": [
{
"type": "string"
},
{
"$ref": "#/components/schemas/ImageMedia"
}
]
}
},
{
@ -5407,10 +5579,20 @@
{
"type": "string"
},
{
"$ref": "#/components/schemas/ImageMedia"
},
{
"type": "array",
"items": {
"type": "string"
"oneOf": [
{
"type": "string"
},
{
"$ref": "#/components/schemas/ImageMedia"
}
]
}
}
]
@ -5460,10 +5642,20 @@
{
"type": "string"
},
{
"$ref": "#/components/schemas/ImageMedia"
},
{
"type": "array",
"items": {
"type": "string"
"oneOf": [
{
"type": "string"
},
{
"$ref": "#/components/schemas/ImageMedia"
}
]
}
}
]
@ -6027,32 +6219,32 @@
}
],
"tags": [
{
"name": "Inference"
},
{
"name": "Shields"
},
{
"name": "Models"
},
{
"name": "MemoryBanks"
},
{
"name": "SyntheticDataGeneration"
"name": "BatchInference"
},
{
"name": "RewardScoring"
},
{
"name": "PostTraining"
"name": "SyntheticDataGeneration"
},
{
"name": "Agents"
},
{
"name": "MemoryBanks"
},
{
"name": "Safety"
},
{
"name": "Evaluations"
"name": "Models"
},
{
"name": "Inference"
},
{
"name": "Memory"
@ -6061,14 +6253,14 @@
"name": "Telemetry"
},
{
"name": "Agents"
},
{
"name": "BatchInference"
"name": "PostTraining"
},
{
"name": "Datasets"
},
{
"name": "Evaluations"
},
{
"name": "BuiltinTool",
"description": "<SchemaDefinition schemaRef=\"#/components/schemas/BuiltinTool\" />"
@ -6077,6 +6269,10 @@
"name": "CompletionMessage",
"description": "<SchemaDefinition schemaRef=\"#/components/schemas/CompletionMessage\" />"
},
{
"name": "ImageMedia",
"description": "<SchemaDefinition schemaRef=\"#/components/schemas/ImageMedia\" />"
},
{
"name": "SamplingParams",
"description": "<SchemaDefinition schemaRef=\"#/components/schemas/SamplingParams\" />"
@ -6117,6 +6313,10 @@
"name": "ToolResponseMessage",
"description": "<SchemaDefinition schemaRef=\"#/components/schemas/ToolResponseMessage\" />"
},
{
"name": "URL",
"description": "<SchemaDefinition schemaRef=\"#/components/schemas/URL\" />"
},
{
"name": "UserMessage",
"description": "<SchemaDefinition schemaRef=\"#/components/schemas/UserMessage\" />"
@ -6221,10 +6421,6 @@
"name": "SearchToolDefinition",
"description": "<SchemaDefinition schemaRef=\"#/components/schemas/SearchToolDefinition\" />"
},
{
"name": "URL",
"description": "<SchemaDefinition schemaRef=\"#/components/schemas/URL\" />"
},
{
"name": "WolframAlphaToolDefinition",
"description": "<SchemaDefinition schemaRef=\"#/components/schemas/WolframAlphaToolDefinition\" />"
@ -6661,6 +6857,7 @@
"FunctionCallToolDefinition",
"GetAgentsSessionRequest",
"GetDocumentsRequest",
"ImageMedia",
"InferenceStep",
"InsertDocumentsRequest",
"LogEventRequest",

View file

@ -210,8 +210,11 @@ components:
content:
oneOf:
- type: string
- $ref: '#/components/schemas/ImageMedia'
- items:
type: string
oneOf:
- type: string
- $ref: '#/components/schemas/ImageMedia'
type: array
- $ref: '#/components/schemas/URL'
mime_type:
@ -273,8 +276,11 @@ components:
items:
oneOf:
- type: string
- $ref: '#/components/schemas/ImageMedia'
- items:
type: string
oneOf:
- type: string
- $ref: '#/components/schemas/ImageMedia'
type: array
type: array
logprobs:
@ -441,8 +447,11 @@ components:
content:
oneOf:
- type: string
- $ref: '#/components/schemas/ImageMedia'
- items:
type: string
oneOf:
- type: string
- $ref: '#/components/schemas/ImageMedia'
type: array
role:
const: assistant
@ -466,8 +475,11 @@ components:
content:
oneOf:
- type: string
- $ref: '#/components/schemas/ImageMedia'
- items:
type: string
oneOf:
- type: string
- $ref: '#/components/schemas/ImageMedia'
type: array
logprobs:
additionalProperties: false
@ -742,8 +754,11 @@ components:
items:
oneOf:
- type: string
- $ref: '#/components/schemas/ImageMedia'
- items:
type: string
oneOf:
- type: string
- $ref: '#/components/schemas/ImageMedia'
type: array
type: array
model:
@ -893,6 +908,23 @@ components:
required:
- document_ids
type: object
ImageMedia:
additionalProperties: false
properties:
image:
oneOf:
- additionalProperties: false
properties:
format:
type: string
format_description:
type: string
title: This class represents an image object. To create
type: object
- $ref: '#/components/schemas/URL'
required:
- image
type: object
InferenceStep:
additionalProperties: false
properties:
@ -1041,8 +1073,11 @@ components:
content:
oneOf:
- type: string
- $ref: '#/components/schemas/ImageMedia'
- items:
type: string
oneOf:
- type: string
- $ref: '#/components/schemas/ImageMedia'
type: array
- $ref: '#/components/schemas/URL'
document_id:
@ -1108,8 +1143,11 @@ components:
inserted_context:
oneOf:
- type: string
- $ref: '#/components/schemas/ImageMedia'
- items:
type: string
oneOf:
- type: string
- $ref: '#/components/schemas/ImageMedia'
type: array
memory_bank_ids:
items:
@ -1545,8 +1583,11 @@ components:
query:
oneOf:
- type: string
- $ref: '#/components/schemas/ImageMedia'
- items:
type: string
oneOf:
- type: string
- $ref: '#/components/schemas/ImageMedia'
type: array
required:
- bank_id
@ -1562,8 +1603,11 @@ components:
content:
oneOf:
- type: string
- $ref: '#/components/schemas/ImageMedia'
- items:
type: string
oneOf:
- type: string
- $ref: '#/components/schemas/ImageMedia'
type: array
document_id:
type: string
@ -2067,8 +2111,11 @@ components:
content:
oneOf:
- type: string
- $ref: '#/components/schemas/ImageMedia'
- items:
type: string
oneOf:
- type: string
- $ref: '#/components/schemas/ImageMedia'
type: array
role:
const: system
@ -2203,6 +2250,14 @@ components:
ToolParamDefinition:
additionalProperties: false
properties:
default:
oneOf:
- type: 'null'
- type: boolean
- type: number
- type: string
- type: array
- type: object
description:
type: string
param_type:
@ -2225,6 +2280,7 @@ components:
enum:
- json
- function_tag
- python_list
title: This Enum refers to the prompt format for calling custom / zero shot
tools
type: string
@ -2236,8 +2292,11 @@ components:
content:
oneOf:
- type: string
- $ref: '#/components/schemas/ImageMedia'
- items:
type: string
oneOf:
- type: string
- $ref: '#/components/schemas/ImageMedia'
type: array
tool_name:
oneOf:
@ -2256,8 +2315,11 @@ components:
content:
oneOf:
- type: string
- $ref: '#/components/schemas/ImageMedia'
- items:
type: string
oneOf:
- type: string
- $ref: '#/components/schemas/ImageMedia'
type: array
role:
const: ipython
@ -2451,14 +2513,20 @@ components:
content:
oneOf:
- type: string
- $ref: '#/components/schemas/ImageMedia'
- items:
type: string
oneOf:
- type: string
- $ref: '#/components/schemas/ImageMedia'
type: array
context:
oneOf:
- type: string
- $ref: '#/components/schemas/ImageMedia'
- items:
type: string
oneOf:
- type: string
- $ref: '#/components/schemas/ImageMedia'
type: array
role:
const: user
@ -2501,7 +2569,7 @@ info:
description: "This is the specification of the llama stack that provides\n \
\ a set of endpoints and their corresponding interfaces that are tailored\
\ to\n best leverage Llama Models. The specification is still in\
\ draft and subject to change.\n Generated at 2024-09-23 10:56:42.866760"
\ draft and subject to change.\n Generated at 2024-09-23 16:58:41.469308"
title: '[DRAFT] Llama Stack Specification'
version: 0.0.1
jsonSchemaDialect: https://json-schema.org/draft/2020-12/schema
@ -3739,25 +3807,27 @@ security:
servers:
- url: http://any-hosted-llama-stack.com
tags:
- name: Inference
- name: Shields
- name: Models
- name: MemoryBanks
- name: SyntheticDataGeneration
- name: BatchInference
- name: RewardScoring
- name: PostTraining
- name: SyntheticDataGeneration
- name: Agents
- name: MemoryBanks
- name: Safety
- name: Evaluations
- name: Models
- name: Inference
- name: Memory
- name: Telemetry
- name: Agents
- name: BatchInference
- name: PostTraining
- name: Datasets
- name: Evaluations
- description: <SchemaDefinition schemaRef="#/components/schemas/BuiltinTool" />
name: BuiltinTool
- description: <SchemaDefinition schemaRef="#/components/schemas/CompletionMessage"
/>
name: CompletionMessage
- description: <SchemaDefinition schemaRef="#/components/schemas/ImageMedia" />
name: ImageMedia
- description: <SchemaDefinition schemaRef="#/components/schemas/SamplingParams" />
name: SamplingParams
- description: <SchemaDefinition schemaRef="#/components/schemas/SamplingStrategy"
@ -3790,6 +3860,8 @@ tags:
- description: <SchemaDefinition schemaRef="#/components/schemas/ToolResponseMessage"
/>
name: ToolResponseMessage
- description: <SchemaDefinition schemaRef="#/components/schemas/URL" />
name: URL
- description: <SchemaDefinition schemaRef="#/components/schemas/UserMessage" />
name: UserMessage
- description: <SchemaDefinition schemaRef="#/components/schemas/BatchChatCompletionRequest"
@ -3876,8 +3948,6 @@ tags:
- description: <SchemaDefinition schemaRef="#/components/schemas/SearchToolDefinition"
/>
name: SearchToolDefinition
- description: <SchemaDefinition schemaRef="#/components/schemas/URL" />
name: URL
- description: <SchemaDefinition schemaRef="#/components/schemas/WolframAlphaToolDefinition"
/>
name: WolframAlphaToolDefinition
@ -4233,6 +4303,7 @@ x-tagGroups:
- FunctionCallToolDefinition
- GetAgentsSessionRequest
- GetDocumentsRequest
- ImageMedia
- InferenceStep
- InsertDocumentsRequest
- LogEventRequest