mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-07-15 01:26:10 +00:00
Support for Llama3.2 models and Swift SDK (#98)
This commit is contained in:
parent
95abbf576b
commit
56aed59eb4
56 changed files with 3745 additions and 630 deletions
|
@ -37,50 +37,74 @@ llama model list
|
|||
You should see a table like this:
|
||||
|
||||
<pre style="font-family: monospace;">
|
||||
+---------------------------------------+---------------------------------------------+----------------+----------------------------+
|
||||
| Model Descriptor | HuggingFace Repo | Context Length | Hardware Requirements |
|
||||
+---------------------------------------+---------------------------------------------+----------------+----------------------------+
|
||||
| Meta-Llama3.1-8B | meta-llama/Meta-Llama-3.1-8B | 128K | 1 GPU, each >= 20GB VRAM |
|
||||
+---------------------------------------+---------------------------------------------+----------------+----------------------------+
|
||||
| Meta-Llama3.1-70B | meta-llama/Meta-Llama-3.1-70B | 128K | 8 GPUs, each >= 20GB VRAM |
|
||||
+---------------------------------------+---------------------------------------------+----------------+----------------------------+
|
||||
| Meta-Llama3.1-405B:bf16-mp8 | | 128K | 8 GPUs, each >= 120GB VRAM |
|
||||
+---------------------------------------+---------------------------------------------+----------------+----------------------------+
|
||||
| Meta-Llama3.1-405B | meta-llama/Meta-Llama-3.1-405B-FP8 | 128K | 8 GPUs, each >= 70GB VRAM |
|
||||
+---------------------------------------+---------------------------------------------+----------------+----------------------------+
|
||||
| Meta-Llama3.1-405B:bf16-mp16 | meta-llama/Meta-Llama-3.1-405B | 128K | 16 GPUs, each >= 70GB VRAM |
|
||||
+---------------------------------------+---------------------------------------------+----------------+----------------------------+
|
||||
| Meta-Llama3.1-8B-Instruct | meta-llama/Meta-Llama-3.1-8B-Instruct | 128K | 1 GPU, each >= 20GB VRAM |
|
||||
+---------------------------------------+---------------------------------------------+----------------+----------------------------+
|
||||
| Meta-Llama3.1-70B-Instruct | meta-llama/Meta-Llama-3.1-70B-Instruct | 128K | 8 GPUs, each >= 20GB VRAM |
|
||||
+---------------------------------------+---------------------------------------------+----------------+----------------------------+
|
||||
| Meta-Llama3.1-405B-Instruct:bf16-mp8 | | 128K | 8 GPUs, each >= 120GB VRAM |
|
||||
+---------------------------------------+---------------------------------------------+----------------+----------------------------+
|
||||
| Meta-Llama3.1-405B-Instruct | meta-llama/Meta-Llama-3.1-405B-Instruct-FP8 | 128K | 8 GPUs, each >= 70GB VRAM |
|
||||
+---------------------------------------+---------------------------------------------+----------------+----------------------------+
|
||||
| Meta-Llama3.1-405B-Instruct:bf16-mp16 | meta-llama/Meta-Llama-3.1-405B-Instruct | 128K | 16 GPUs, each >= 70GB VRAM |
|
||||
+---------------------------------------+---------------------------------------------+----------------+----------------------------+
|
||||
| Llama-Guard-3-8B | meta-llama/Llama-Guard-3-8B | 128K | 1 GPU, each >= 20GB VRAM |
|
||||
+---------------------------------------+---------------------------------------------+----------------+----------------------------+
|
||||
| Llama-Guard-3-8B:int8-mp1 | meta-llama/Llama-Guard-3-8B-INT8 | 128K | 1 GPU, each >= 10GB VRAM |
|
||||
+---------------------------------------+---------------------------------------------+----------------+----------------------------+
|
||||
| Prompt-Guard-86M | meta-llama/Prompt-Guard-86M | 128K | 1 GPU, each >= 1GB VRAM |
|
||||
+---------------------------------------+---------------------------------------------+----------------+----------------------------+
|
||||
+----------------------------------+------------------------------------------+----------------+
|
||||
| Model Descriptor | HuggingFace Repo | Context Length |
|
||||
+----------------------------------+------------------------------------------+----------------+
|
||||
| Llama3.1-8B | meta-llama/Llama-3.1-8B | 128K |
|
||||
+----------------------------------+------------------------------------------+----------------+
|
||||
| Llama3.1-70B | meta-llama/Llama-3.1-70B | 128K |
|
||||
+----------------------------------+------------------------------------------+----------------+
|
||||
| Llama3.1-405B:bf16-mp8 | meta-llama/Llama-3.1-405B | 128K |
|
||||
+----------------------------------+------------------------------------------+----------------+
|
||||
| Llama3.1-405B | meta-llama/Llama-3.1-405B-FP8 | 128K |
|
||||
+----------------------------------+------------------------------------------+----------------+
|
||||
| Llama3.1-405B:bf16-mp16 | meta-llama/Llama-3.1-405B | 128K |
|
||||
+----------------------------------+------------------------------------------+----------------+
|
||||
| Llama3.1-8B-Instruct | meta-llama/Llama-3.1-8B-Instruct | 128K |
|
||||
+----------------------------------+------------------------------------------+----------------+
|
||||
| Llama3.1-70B-Instruct | meta-llama/Llama-3.1-70B-Instruct | 128K |
|
||||
+----------------------------------+------------------------------------------+----------------+
|
||||
| Llama3.1-405B-Instruct:bf16-mp8 | meta-llama/Llama-3.1-405B-Instruct | 128K |
|
||||
+----------------------------------+------------------------------------------+----------------+
|
||||
| Llama3.1-405B-Instruct | meta-llama/Llama-3.1-405B-Instruct-FP8 | 128K |
|
||||
+----------------------------------+------------------------------------------+----------------+
|
||||
| Llama3.1-405B-Instruct:bf16-mp16 | meta-llama/Llama-3.1-405B-Instruct | 128K |
|
||||
+----------------------------------+------------------------------------------+----------------+
|
||||
| Llama3.2-1B | meta-llama/Llama-3.2-1B | 128K |
|
||||
+----------------------------------+------------------------------------------+----------------+
|
||||
| Llama3.2-3B | meta-llama/Llama-3.2-3B | 128K |
|
||||
+----------------------------------+------------------------------------------+----------------+
|
||||
| Llama3.2-11B-Vision | meta-llama/Llama-3.2-11B-Vision | 128K |
|
||||
+----------------------------------+------------------------------------------+----------------+
|
||||
| Llama3.2-90B-Vision | meta-llama/Llama-3.2-90B-Vision | 128K |
|
||||
+----------------------------------+------------------------------------------+----------------+
|
||||
| Llama3.2-1B-Instruct | meta-llama/Llama-3.2-1B-Instruct | 128K |
|
||||
+----------------------------------+------------------------------------------+----------------+
|
||||
| Llama3.2-3B-Instruct | meta-llama/Llama-3.2-3B-Instruct | 128K |
|
||||
+----------------------------------+------------------------------------------+----------------+
|
||||
| Llama3.2-11B-Vision-Instruct | meta-llama/Llama-3.2-11B-Vision-Instruct | 128K |
|
||||
+----------------------------------+------------------------------------------+----------------+
|
||||
| Llama3.2-90B-Vision-Instruct | meta-llama/Llama-3.2-90B-Vision-Instruct | 128K |
|
||||
+----------------------------------+------------------------------------------+----------------+
|
||||
| Llama-Guard-3-11B-Vision | meta-llama/Llama-Guard-3-11B-Vision | 128K |
|
||||
+----------------------------------+------------------------------------------+----------------+
|
||||
| Llama-Guard-3-1B:int4-mp1 | meta-llama/Llama-Guard-3-1B-INT4 | 128K |
|
||||
+----------------------------------+------------------------------------------+----------------+
|
||||
| Llama-Guard-3-1B | meta-llama/Llama-Guard-3-1B | 128K |
|
||||
+----------------------------------+------------------------------------------+----------------+
|
||||
| Llama-Guard-3-8B | meta-llama/Llama-Guard-3-8B | 128K |
|
||||
+----------------------------------+------------------------------------------+----------------+
|
||||
| Llama-Guard-3-8B:int8-mp1 | meta-llama/Llama-Guard-3-8B-INT8 | 128K |
|
||||
+----------------------------------+------------------------------------------+----------------+
|
||||
| Prompt-Guard-86M | meta-llama/Prompt-Guard-86M | 128K |
|
||||
+----------------------------------+------------------------------------------+----------------+
|
||||
| Llama-Guard-2-8B | meta-llama/Llama-Guard-2-8B | 4K |
|
||||
+----------------------------------+------------------------------------------+----------------+
|
||||
</pre>
|
||||
|
||||
To download models, you can use the llama download command.
|
||||
|
||||
#### Downloading from [Meta](https://llama.meta.com/llama-downloads/)
|
||||
|
||||
Here is an example download command to get the 8B/70B Instruct model. You will need META_URL which can be obtained from [here](https://llama.meta.com/docs/getting_the_models/meta/)
|
||||
Here is an example download command to get the 3B-Instruct/11B-Vision-Instruct model. You will need META_URL which can be obtained from [here](https://llama.meta.com/docs/getting_the_models/meta/)
|
||||
|
||||
Download the required checkpoints using the following commands:
|
||||
```bash
|
||||
# download the 8B model, this can be run on a single GPU
|
||||
llama download --source meta --model-id Meta-Llama3.1-8B-Instruct --meta-url META_URL
|
||||
llama download --source meta --model-id Llama3.2-3B-Instruct --meta-url META_URL
|
||||
|
||||
# you can also get the 70B model, this will require 8 GPUs however
|
||||
llama download --source meta --model-id Meta-Llama3.1-70B-Instruct --meta-url META_URL
|
||||
llama download --source meta --model-id Llama3.2-11B-Vision-Instruct --meta-url META_URL
|
||||
|
||||
# llama-agents have safety enabled by default. For this, you will need
|
||||
# safety models -- Llama-Guard and Prompt-Guard
|
||||
|
@ -124,7 +148,7 @@ The `llama model` command helps you explore the model’s interface.
|
|||
### 2.1 Subcommands
|
||||
1. `download`: Download the model from different sources. (meta, huggingface)
|
||||
2. `list`: Lists all the models available for download with hardware requirements to deploy the models.
|
||||
3. `template`: <TODO: What is a template?>
|
||||
3. `prompt-format`: Show llama model message formats.
|
||||
4. `describe`: Describes all the properties of the model.
|
||||
|
||||
### 2.2 Sample Usage
|
||||
|
@ -135,7 +159,7 @@ The `llama model` command helps you explore the model’s interface.
|
|||
llama model --help
|
||||
```
|
||||
<pre style="font-family: monospace;">
|
||||
usage: llama model [-h] {download,list,template,describe} ...
|
||||
usage: llama model [-h] {download,list,prompt-format,describe} ...
|
||||
|
||||
Work with llama models
|
||||
|
||||
|
@ -143,124 +167,67 @@ options:
|
|||
-h, --help show this help message and exit
|
||||
|
||||
model_subcommands:
|
||||
{download,list,template,describe}
|
||||
{download,list,prompt-format,describe}
|
||||
</pre>
|
||||
|
||||
You can use the describe command to know more about a model:
|
||||
```
|
||||
llama model describe -m Meta-Llama3.1-8B-Instruct
|
||||
llama model describe -m Llama3.2-3B-Instruct
|
||||
```
|
||||
### 2.3 Describe
|
||||
|
||||
<pre style="font-family: monospace;">
|
||||
+-----------------------------+---------------------------------------+
|
||||
| Model | Meta- |
|
||||
| | Llama3.1-8B-Instruct |
|
||||
+-----------------------------+---------------------------------------+
|
||||
| HuggingFace ID | meta-llama/Meta-Llama-3.1-8B-Instruct |
|
||||
+-----------------------------+---------------------------------------+
|
||||
| Description | Llama 3.1 8b instruct model |
|
||||
+-----------------------------+---------------------------------------+
|
||||
| Context Length | 128K tokens |
|
||||
+-----------------------------+---------------------------------------+
|
||||
| Weights format | bf16 |
|
||||
+-----------------------------+---------------------------------------+
|
||||
| Model params.json | { |
|
||||
| | "dim": 4096, |
|
||||
| | "n_layers": 32, |
|
||||
| | "n_heads": 32, |
|
||||
| | "n_kv_heads": 8, |
|
||||
| | "vocab_size": 128256, |
|
||||
| | "ffn_dim_multiplier": 1.3, |
|
||||
| | "multiple_of": 1024, |
|
||||
| | "norm_eps": 1e-05, |
|
||||
| | "rope_theta": 500000.0, |
|
||||
| | "use_scaled_rope": true |
|
||||
| | } |
|
||||
+-----------------------------+---------------------------------------+
|
||||
| Recommended sampling params | { |
|
||||
| | "strategy": "top_p", |
|
||||
| | "temperature": 1.0, |
|
||||
| | "top_p": 0.9, |
|
||||
| | "top_k": 0 |
|
||||
| | } |
|
||||
+-----------------------------+---------------------------------------+
|
||||
+-----------------------------+----------------------------------+
|
||||
| Model | Llama3.2-3B-Instruct |
|
||||
+-----------------------------+----------------------------------+
|
||||
| HuggingFace ID | meta-llama/Llama-3.2-3B-Instruct |
|
||||
+-----------------------------+----------------------------------+
|
||||
| Description | Llama 3.2 3b instruct model |
|
||||
+-----------------------------+----------------------------------+
|
||||
| Context Length | 128K tokens |
|
||||
+-----------------------------+----------------------------------+
|
||||
| Weights format | bf16 |
|
||||
+-----------------------------+----------------------------------+
|
||||
| Model params.json | { |
|
||||
| | "dim": 3072, |
|
||||
| | "n_layers": 28, |
|
||||
| | "n_heads": 24, |
|
||||
| | "n_kv_heads": 8, |
|
||||
| | "vocab_size": 128256, |
|
||||
| | "ffn_dim_multiplier": 1.0, |
|
||||
| | "multiple_of": 256, |
|
||||
| | "norm_eps": 1e-05, |
|
||||
| | "rope_theta": 500000.0, |
|
||||
| | "use_scaled_rope": true |
|
||||
| | } |
|
||||
+-----------------------------+----------------------------------+
|
||||
| Recommended sampling params | { |
|
||||
| | "strategy": "top_p", |
|
||||
| | "temperature": 1.0, |
|
||||
| | "top_p": 0.9, |
|
||||
| | "top_k": 0 |
|
||||
| | } |
|
||||
+-----------------------------+----------------------------------+
|
||||
</pre>
|
||||
### 2.4 Template
|
||||
You can even run `llama model template` see all of the templates and their tokens:
|
||||
### 2.4 Prompt Format
|
||||
You can even run `llama model prompt-format` see all of the templates and their tokens:
|
||||
|
||||
```
|
||||
llama model template
|
||||
llama model prompt-format -m Llama3.2-3B-Instruct
|
||||
```
|
||||
<p align="center">
|
||||
<img width="719" alt="image" src="https://github.com/user-attachments/assets/c5332026-8c0b-4edc-b438-ec60cd7ca554">
|
||||
</p>
|
||||
|
||||
<pre style="font-family: monospace;">
|
||||
+-----------+---------------------------------+
|
||||
| Role | Template Name |
|
||||
+-----------+---------------------------------+
|
||||
| user | user-default |
|
||||
| assistant | assistant-builtin-tool-call |
|
||||
| assistant | assistant-custom-tool-call |
|
||||
| assistant | assistant-default |
|
||||
| system | system-builtin-and-custom-tools |
|
||||
| system | system-builtin-tools-only |
|
||||
| system | system-custom-tools-only |
|
||||
| system | system-default |
|
||||
| tool | tool-success |
|
||||
| tool | tool-failure |
|
||||
+-----------+---------------------------------+
|
||||
</pre>
|
||||
|
||||
And fetch an example by passing it to `--name`:
|
||||
```
|
||||
llama model template --name tool-success
|
||||
```
|
||||
|
||||
<pre style="font-family: monospace;">
|
||||
+----------+----------------------------------------------------------------+
|
||||
| Name | tool-success |
|
||||
+----------+----------------------------------------------------------------+
|
||||
| Template | <|start_header_id|>ipython<|end_header_id|> |
|
||||
| | |
|
||||
| | completed |
|
||||
| | [stdout]{"results":["something |
|
||||
| | something"]}[/stdout]<|eot_id|> |
|
||||
| | |
|
||||
+----------+----------------------------------------------------------------+
|
||||
| Notes | Note ipython header and [stdout] |
|
||||
+----------+----------------------------------------------------------------+
|
||||
</pre>
|
||||
|
||||
Or:
|
||||
```
|
||||
llama model template --name system-builtin-tools-only
|
||||
```
|
||||
|
||||
<pre style="font-family: monospace;">
|
||||
+----------+--------------------------------------------+
|
||||
| Name | system-builtin-tools-only |
|
||||
+----------+--------------------------------------------+
|
||||
| Template | <|start_header_id|>system<|end_header_id|> |
|
||||
| | |
|
||||
| | Environment: ipython |
|
||||
| | Tools: brave_search, wolfram_alpha |
|
||||
| | |
|
||||
| | Cutting Knowledge Date: December 2023 |
|
||||
| | Today Date: 21 August 2024 |
|
||||
| | <|eot_id|> |
|
||||
| | |
|
||||
+----------+--------------------------------------------+
|
||||
| Notes | |
|
||||
+----------+--------------------------------------------+
|
||||
</pre>
|
||||
|
||||
These commands can help understand the model interface and how prompts / messages are formatted for various scenarios.
|
||||
You will be shown a Markdown formatted description of the model interface and how prompts / messages are formatted for various scenarios.
|
||||
|
||||
**NOTE**: Outputs in terminal are color printed to show special tokens.
|
||||
|
||||
|
||||
## Step 3: Building, and Configuring Llama Stack Distributions
|
||||
|
||||
- Please see our [Getting Started](getting_started.md) guide for details.
|
||||
- Please see our [Getting Started](getting_started.md) guide for more details on how to build and start a Llama Stack distribution.
|
||||
|
||||
### Step 3.1 Build
|
||||
In the following steps, imagine we'll be working with a `Meta-Llama3.1-8B-Instruct` model. We will name our build `8b-instruct` to help us remember the config. We will start build our distribution (in the form of a Conda environment, or Docker image). In this step, we will specify:
|
||||
|
|
BIN
docs/dog.jpg
Normal file
BIN
docs/dog.jpg
Normal file
Binary file not shown.
After Width: | Height: | Size: 39 KiB |
325
docs/getting_started.ipynb
Normal file
325
docs/getting_started.ipynb
Normal file
File diff suppressed because one or more lines are too long
|
@ -1,9 +1,70 @@
|
|||
# llama-stack
|
||||
|
||||
[](https://pypi.org/project/llama-stack/)
|
||||
[](https://discord.gg/TZAAYNVtrU)
|
||||
|
||||
This repository contains the specifications and implementations of the APIs which are part of the Llama Stack.
|
||||
|
||||
The Llama Stack defines and standardizes the building blocks needed to bring generative AI applications to market. These blocks span the entire development lifecycle: from model training and fine-tuning, through product evaluation, to invoking AI agents in production. Beyond definition, we're developing open-source versions and partnering with cloud providers, ensuring developers can assemble AI solutions using consistent, interlocking pieces across platforms. The ultimate goal is to accelerate innovation in the AI space.
|
||||
|
||||
The Stack APIs are rapidly improving, but still very much work in progress and we invite feedback as well as direct contributions.
|
||||
|
||||
|
||||
## APIs
|
||||
|
||||
The Llama Stack consists of the following set of APIs:
|
||||
|
||||
- Inference
|
||||
- Safety
|
||||
- Memory
|
||||
- Agentic System
|
||||
- Evaluation
|
||||
- Post Training
|
||||
- Synthetic Data Generation
|
||||
- Reward Scoring
|
||||
|
||||
Each of the APIs themselves is a collection of REST endpoints.
|
||||
|
||||
## API Providers
|
||||
|
||||
A Provider is what makes the API real -- they provide the actual implementation backing the API.
|
||||
|
||||
As an example, for Inference, we could have the implementation be backed by open source libraries like `[ torch | vLLM | TensorRT ]` as possible options.
|
||||
|
||||
A provider can also be just a pointer to a remote REST service -- for example, cloud providers or dedicated inference providers could serve these APIs.
|
||||
|
||||
|
||||
## Llama Stack Distribution
|
||||
|
||||
A Distribution is where APIs and Providers are assembled together to provide a consistent whole to the end application developer. You can mix-and-match providers -- some could be backed by local code and some could be remote. As a hobbyist, you can serve a small model locally, but can choose a cloud provider for a large model. Regardless, the higher level APIs your app needs to work with don't need to change at all. You can even imagine moving across the server / mobile-device boundary as well always using the same uniform set of APIs for developing Generative AI applications.
|
||||
|
||||
|
||||
## Installation
|
||||
|
||||
You can install this repository as a [package](https://pypi.org/project/llama-stack/) with `pip install llama-stack`
|
||||
|
||||
If you want to install from source:
|
||||
|
||||
```bash
|
||||
mkdir -p ~/local
|
||||
cd ~/local
|
||||
git clone git@github.com:meta-llama/llama-stack.git
|
||||
|
||||
conda create -n stack python=3.10
|
||||
conda activate stack
|
||||
|
||||
cd llama-stack
|
||||
$CONDA_PREFIX/bin/pip install -e .
|
||||
```
|
||||
|
||||
# Getting Started
|
||||
|
||||
The `llama` CLI tool helps you setup and use the Llama toolchain & agentic systems. It should be available on your path after installing the `llama-stack` package.
|
||||
|
||||
This guides allows you to quickly get started with building and running a Llama Stack server in < 5 minutes!
|
||||
|
||||
You may also checkout this [notebook](https://github.com/meta-llama/llama-stack/blob/main/docs/getting_started.ipynb) for trying out out demo scripts.
|
||||
|
||||
## Quick Cheatsheet
|
||||
- Quick 3 line command to build and start a LlamaStack server using our Meta Reference implementation for all API endpoints with `conda` as build type.
|
||||
|
||||
|
@ -12,7 +73,7 @@ This guides allows you to quickly get started with building and running a Llama
|
|||
```
|
||||
llama stack build
|
||||
|
||||
> Enter an unique name for identifying your Llama Stack build distribution (e.g. my-local-stack): my-local-llama-stack
|
||||
> Enter an unique name for identifying your Llama Stack build distribution (e.g. my-local-stack): my-local-stack
|
||||
> Enter the image type you want your distribution to be built with (docker or conda): conda
|
||||
|
||||
Llama Stack is composed of several APIs working together. Let's configure the providers (implementations) you want to use for these APIs.
|
||||
|
@ -24,47 +85,57 @@ llama stack build
|
|||
|
||||
> (Optional) Enter a short description for your Llama Stack distribution:
|
||||
|
||||
Build spec configuration saved at ~/.conda/envs/llamastack-my-local-llama-stack/my-local-llama-stack-build.yaml
|
||||
Build spec configuration saved at ~/.conda/envs/llamastack-my-local-stack/my-local-stack-build.yaml
|
||||
You can now run `llama stack configure my-local-stack`
|
||||
```
|
||||
|
||||
**`llama stack configure`**
|
||||
- Run `llama stack configure <name>` with the name you have previously defined in `build` step.
|
||||
```
|
||||
llama stack configure my-local-llama-stack
|
||||
llama stack configure <name>
|
||||
```
|
||||
- You will be prompted to enter configurations for your Llama Stack
|
||||
|
||||
Configuring APIs to serve...
|
||||
Enter comma-separated list of APIs to serve:
|
||||
```
|
||||
$ llama stack configure my-local-stack
|
||||
|
||||
Could not find my-local-stack. Trying conda build name instead...
|
||||
Configuring API `inference`...
|
||||
|
||||
Configuring provider `meta-reference`...
|
||||
Enter value for model (default: Meta-Llama3.1-8B-Instruct) (required):
|
||||
=== Configuring provider `meta-reference` for API inference...
|
||||
Enter value for model (default: Llama3.1-8B-Instruct) (required):
|
||||
Do you want to configure quantization? (y/n): n
|
||||
Enter value for torch_seed (optional):
|
||||
Enter value for max_seq_len (required): 4096
|
||||
Enter value for max_seq_len (default: 4096) (required):
|
||||
Enter value for max_batch_size (default: 1) (required):
|
||||
Configuring API `safety`...
|
||||
|
||||
Configuring provider `meta-reference`...
|
||||
Configuring API `safety`...
|
||||
=== Configuring provider `meta-reference` for API safety...
|
||||
Do you want to configure llama_guard_shield? (y/n): n
|
||||
Do you want to configure prompt_guard_shield? (y/n): n
|
||||
|
||||
Configuring API `agents`...
|
||||
=== Configuring provider `meta-reference` for API agents...
|
||||
Enter `type` for persistence_store (options: redis, sqlite, postgres) (default: sqlite):
|
||||
|
||||
Configuring SqliteKVStoreConfig:
|
||||
Enter value for namespace (optional):
|
||||
Enter value for db_path (default: /home/xiyan/.llama/runtime/kvstore.db) (required):
|
||||
|
||||
Configuring provider `meta-reference`...
|
||||
Configuring API `memory`...
|
||||
=== Configuring provider `meta-reference` for API memory...
|
||||
> Please enter the supported memory bank type your provider has for memory: vector
|
||||
|
||||
Configuring provider `meta-reference`...
|
||||
Configuring API `telemetry`...
|
||||
=== Configuring provider `meta-reference` for API telemetry...
|
||||
|
||||
Configuring provider `meta-reference`...
|
||||
> YAML configuration has been written to ~/.llama/builds/conda/my-local-llama-stack-run.yaml.
|
||||
You can now run `llama stack run my-local-llama-stack --port PORT` or `llama stack run ~/.llama/builds/conda/my-local-llama-stack-run.yaml --port PORT
|
||||
> YAML configuration has been written to ~/.llama/builds/conda/my-local-stack-run.yaml.
|
||||
You can now run `llama stack run my-local-stack --port PORT`
|
||||
```
|
||||
|
||||
**`llama stack run`**
|
||||
- Run `llama stack run <name>` with the name you have previously defined.
|
||||
```
|
||||
llama stack run my-local-llama-stack
|
||||
llama stack run my-local-stack
|
||||
|
||||
...
|
||||
> initializing model parallel with size 1
|
||||
|
@ -126,7 +197,7 @@ llama stack build
|
|||
Running the command above will allow you to fill in the configuration to build your Llama Stack distribution, you will see the following outputs.
|
||||
|
||||
```
|
||||
> Enter an unique name for identifying your Llama Stack build distribution (e.g. my-local-stack): my-local-llama-stack
|
||||
> Enter an unique name for identifying your Llama Stack build distribution (e.g. my-local-stack): 8b-instruct
|
||||
> Enter the image type you want your distribution to be built with (docker or conda): conda
|
||||
|
||||
Llama Stack is composed of several APIs working together. Let's configure the providers (implementations) you want to use for these APIs.
|
||||
|
@ -138,9 +209,14 @@ Running the command above will allow you to fill in the configuration to build y
|
|||
|
||||
> (Optional) Enter a short description for your Llama Stack distribution:
|
||||
|
||||
Build spec configuration saved at ~/.conda/envs/llamastack-my-local-llama-stack/my-local-llama-stack-build.yaml
|
||||
Build spec configuration saved at ~/.conda/envs/llamastack-my-local-llama-stack/8b-instruct-build.yaml
|
||||
```
|
||||
|
||||
**Ollama (optional)**
|
||||
|
||||
If you plan to use Ollama for inference, you'll need to install the server [via these instructions](https://ollama.com/download).
|
||||
|
||||
|
||||
#### Building from templates
|
||||
- To build from alternative API providers, we provide distribution templates for users to get started building a distribution backed by different providers.
|
||||
|
||||
|
@ -236,7 +312,7 @@ llama stack configure [ <name> | <docker-image-name> | <path/to/name.build.yaml>
|
|||
- Run `docker images` to check list of available images on your machine.
|
||||
|
||||
```
|
||||
$ llama stack configure ~/.llama/distributions/conda/8b-instruct-build.yaml
|
||||
$ llama stack configure 8b-instruct
|
||||
|
||||
Configuring API: inference (meta-reference)
|
||||
Enter value for model (existing: Meta-Llama3.1-8B-Instruct) (required):
|
||||
|
@ -284,13 +360,13 @@ Note that all configurations as well as models are stored in `~/.llama`
|
|||
Now, let's start the Llama Stack Distribution Server. You will need the YAML configuration file which was written out at the end by the `llama stack configure` step.
|
||||
|
||||
```
|
||||
llama stack run ~/.llama/builds/conda/8b-instruct-run.yaml
|
||||
llama stack run 8b-instruct
|
||||
```
|
||||
|
||||
You should see the Llama Stack server start and print the APIs that it is supporting
|
||||
|
||||
```
|
||||
$ llama stack run ~/.llama/builds/local/conda/8b-instruct.yaml
|
||||
$ llama stack run 8b-instruct
|
||||
|
||||
> initializing model parallel with size 1
|
||||
> initializing ddp with size 1
|
||||
|
@ -357,4 +433,4 @@ Similarly you can test safety (if you configured llama-guard and/or prompt-guard
|
|||
python -m llama_stack.apis.safety.client localhost 5000
|
||||
```
|
||||
|
||||
You can find more example scripts with client SDKs to talk with the Llama Stack server in our [llama-stack-apps](https://github.com/meta-llama/llama-stack-apps/tree/main/sdk_examples) repo.
|
||||
You can find more example scripts with client SDKs to talk with the Llama Stack server in our [llama-stack-apps](https://github.com/meta-llama/llama-stack-apps) repo.
|
||||
|
|
|
@ -21,7 +21,7 @@
|
|||
"info": {
|
||||
"title": "[DRAFT] Llama Stack Specification",
|
||||
"version": "0.0.1",
|
||||
"description": "This is the specification of the llama stack that provides\n a set of endpoints and their corresponding interfaces that are tailored to\n best leverage Llama Models. The specification is still in draft and subject to change.\n Generated at 2024-09-23 10:56:42.866760"
|
||||
"description": "This is the specification of the llama stack that provides\n a set of endpoints and their corresponding interfaces that are tailored to\n best leverage Llama Models. The specification is still in draft and subject to change.\n Generated at 2024-09-23 16:58:41.469308"
|
||||
},
|
||||
"servers": [
|
||||
{
|
||||
|
@ -2027,10 +2027,20 @@
|
|||
{
|
||||
"type": "string"
|
||||
},
|
||||
{
|
||||
"$ref": "#/components/schemas/ImageMedia"
|
||||
},
|
||||
{
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "string"
|
||||
"oneOf": [
|
||||
{
|
||||
"type": "string"
|
||||
},
|
||||
{
|
||||
"$ref": "#/components/schemas/ImageMedia"
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
||||
]
|
||||
|
@ -2053,6 +2063,35 @@
|
|||
"tool_calls"
|
||||
]
|
||||
},
|
||||
"ImageMedia": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"image": {
|
||||
"oneOf": [
|
||||
{
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"format": {
|
||||
"type": "string"
|
||||
},
|
||||
"format_description": {
|
||||
"type": "string"
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
"title": "This class represents an image object. To create"
|
||||
},
|
||||
{
|
||||
"$ref": "#/components/schemas/URL"
|
||||
}
|
||||
]
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
"required": [
|
||||
"image"
|
||||
]
|
||||
},
|
||||
"SamplingParams": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
|
@ -2115,10 +2154,20 @@
|
|||
{
|
||||
"type": "string"
|
||||
},
|
||||
{
|
||||
"$ref": "#/components/schemas/ImageMedia"
|
||||
},
|
||||
{
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "string"
|
||||
"oneOf": [
|
||||
{
|
||||
"type": "string"
|
||||
},
|
||||
{
|
||||
"$ref": "#/components/schemas/ImageMedia"
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
||||
]
|
||||
|
@ -2267,6 +2316,28 @@
|
|||
"required": {
|
||||
"type": "boolean",
|
||||
"default": true
|
||||
},
|
||||
"default": {
|
||||
"oneOf": [
|
||||
{
|
||||
"type": "null"
|
||||
},
|
||||
{
|
||||
"type": "boolean"
|
||||
},
|
||||
{
|
||||
"type": "number"
|
||||
},
|
||||
{
|
||||
"type": "string"
|
||||
},
|
||||
{
|
||||
"type": "array"
|
||||
},
|
||||
{
|
||||
"type": "object"
|
||||
}
|
||||
]
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
|
@ -2278,7 +2349,8 @@
|
|||
"type": "string",
|
||||
"enum": [
|
||||
"json",
|
||||
"function_tag"
|
||||
"function_tag",
|
||||
"python_list"
|
||||
],
|
||||
"title": "This Enum refers to the prompt format for calling custom / zero shot tools",
|
||||
"description": "`json` --\n Refers to the json format for calling tools.\n The json format takes the form like\n {\n \"type\": \"function\",\n \"function\" : {\n \"name\": \"function_name\",\n \"description\": \"function_description\",\n \"parameters\": {...}\n }\n }\n\n`function_tag` --\n This is an example of how you could define\n your own user defined format for making tool calls.\n The function_tag format looks like this,\n <function=function_name>(parameters)</function>\n\nThe detailed prompts for each of these formats are added to llama cli"
|
||||
|
@ -2309,10 +2381,20 @@
|
|||
{
|
||||
"type": "string"
|
||||
},
|
||||
{
|
||||
"$ref": "#/components/schemas/ImageMedia"
|
||||
},
|
||||
{
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "string"
|
||||
"oneOf": [
|
||||
{
|
||||
"type": "string"
|
||||
},
|
||||
{
|
||||
"$ref": "#/components/schemas/ImageMedia"
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
||||
]
|
||||
|
@ -2326,6 +2408,11 @@
|
|||
"content"
|
||||
]
|
||||
},
|
||||
"URL": {
|
||||
"type": "string",
|
||||
"format": "uri",
|
||||
"pattern": "^(https?://|file://|data:)"
|
||||
},
|
||||
"UserMessage": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
|
@ -2339,10 +2426,20 @@
|
|||
{
|
||||
"type": "string"
|
||||
},
|
||||
{
|
||||
"$ref": "#/components/schemas/ImageMedia"
|
||||
},
|
||||
{
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "string"
|
||||
"oneOf": [
|
||||
{
|
||||
"type": "string"
|
||||
},
|
||||
{
|
||||
"$ref": "#/components/schemas/ImageMedia"
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
||||
]
|
||||
|
@ -2352,10 +2449,20 @@
|
|||
{
|
||||
"type": "string"
|
||||
},
|
||||
{
|
||||
"$ref": "#/components/schemas/ImageMedia"
|
||||
},
|
||||
{
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "string"
|
||||
"oneOf": [
|
||||
{
|
||||
"type": "string"
|
||||
},
|
||||
{
|
||||
"$ref": "#/components/schemas/ImageMedia"
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
||||
]
|
||||
|
@ -2455,10 +2562,20 @@
|
|||
{
|
||||
"type": "string"
|
||||
},
|
||||
{
|
||||
"$ref": "#/components/schemas/ImageMedia"
|
||||
},
|
||||
{
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "string"
|
||||
"oneOf": [
|
||||
{
|
||||
"type": "string"
|
||||
},
|
||||
{
|
||||
"$ref": "#/components/schemas/ImageMedia"
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
||||
]
|
||||
|
@ -2714,10 +2831,20 @@
|
|||
{
|
||||
"type": "string"
|
||||
},
|
||||
{
|
||||
"$ref": "#/components/schemas/ImageMedia"
|
||||
},
|
||||
{
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "string"
|
||||
"oneOf": [
|
||||
{
|
||||
"type": "string"
|
||||
},
|
||||
{
|
||||
"$ref": "#/components/schemas/ImageMedia"
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
||||
]
|
||||
|
@ -3298,11 +3425,6 @@
|
|||
"engine"
|
||||
]
|
||||
},
|
||||
"URL": {
|
||||
"type": "string",
|
||||
"format": "uri",
|
||||
"pattern": "^(https?://|file://|data:)"
|
||||
},
|
||||
"WolframAlphaToolDefinition": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
|
@ -3396,10 +3518,20 @@
|
|||
{
|
||||
"type": "string"
|
||||
},
|
||||
{
|
||||
"$ref": "#/components/schemas/ImageMedia"
|
||||
},
|
||||
{
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "string"
|
||||
"oneOf": [
|
||||
{
|
||||
"type": "string"
|
||||
},
|
||||
{
|
||||
"$ref": "#/components/schemas/ImageMedia"
|
||||
}
|
||||
]
|
||||
}
|
||||
},
|
||||
{
|
||||
|
@ -3731,10 +3863,20 @@
|
|||
{
|
||||
"type": "string"
|
||||
},
|
||||
{
|
||||
"$ref": "#/components/schemas/ImageMedia"
|
||||
},
|
||||
{
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "string"
|
||||
"oneOf": [
|
||||
{
|
||||
"type": "string"
|
||||
},
|
||||
{
|
||||
"$ref": "#/components/schemas/ImageMedia"
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
||||
]
|
||||
|
@ -3888,10 +4030,20 @@
|
|||
{
|
||||
"type": "string"
|
||||
},
|
||||
{
|
||||
"$ref": "#/components/schemas/ImageMedia"
|
||||
},
|
||||
{
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "string"
|
||||
"oneOf": [
|
||||
{
|
||||
"type": "string"
|
||||
},
|
||||
{
|
||||
"$ref": "#/components/schemas/ImageMedia"
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
||||
]
|
||||
|
@ -4316,10 +4468,20 @@
|
|||
{
|
||||
"type": "string"
|
||||
},
|
||||
{
|
||||
"$ref": "#/components/schemas/ImageMedia"
|
||||
},
|
||||
{
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "string"
|
||||
"oneOf": [
|
||||
{
|
||||
"type": "string"
|
||||
},
|
||||
{
|
||||
"$ref": "#/components/schemas/ImageMedia"
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
||||
]
|
||||
|
@ -4515,10 +4677,20 @@
|
|||
{
|
||||
"type": "string"
|
||||
},
|
||||
{
|
||||
"$ref": "#/components/schemas/ImageMedia"
|
||||
},
|
||||
{
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "string"
|
||||
"oneOf": [
|
||||
{
|
||||
"type": "string"
|
||||
},
|
||||
{
|
||||
"$ref": "#/components/schemas/ImageMedia"
|
||||
}
|
||||
]
|
||||
}
|
||||
},
|
||||
{
|
||||
|
@ -5407,10 +5579,20 @@
|
|||
{
|
||||
"type": "string"
|
||||
},
|
||||
{
|
||||
"$ref": "#/components/schemas/ImageMedia"
|
||||
},
|
||||
{
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "string"
|
||||
"oneOf": [
|
||||
{
|
||||
"type": "string"
|
||||
},
|
||||
{
|
||||
"$ref": "#/components/schemas/ImageMedia"
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
||||
]
|
||||
|
@ -5460,10 +5642,20 @@
|
|||
{
|
||||
"type": "string"
|
||||
},
|
||||
{
|
||||
"$ref": "#/components/schemas/ImageMedia"
|
||||
},
|
||||
{
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "string"
|
||||
"oneOf": [
|
||||
{
|
||||
"type": "string"
|
||||
},
|
||||
{
|
||||
"$ref": "#/components/schemas/ImageMedia"
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
||||
]
|
||||
|
@ -6027,32 +6219,32 @@
|
|||
}
|
||||
],
|
||||
"tags": [
|
||||
{
|
||||
"name": "Inference"
|
||||
},
|
||||
{
|
||||
"name": "Shields"
|
||||
},
|
||||
{
|
||||
"name": "Models"
|
||||
},
|
||||
{
|
||||
"name": "MemoryBanks"
|
||||
},
|
||||
{
|
||||
"name": "SyntheticDataGeneration"
|
||||
"name": "BatchInference"
|
||||
},
|
||||
{
|
||||
"name": "RewardScoring"
|
||||
},
|
||||
{
|
||||
"name": "PostTraining"
|
||||
"name": "SyntheticDataGeneration"
|
||||
},
|
||||
{
|
||||
"name": "Agents"
|
||||
},
|
||||
{
|
||||
"name": "MemoryBanks"
|
||||
},
|
||||
{
|
||||
"name": "Safety"
|
||||
},
|
||||
{
|
||||
"name": "Evaluations"
|
||||
"name": "Models"
|
||||
},
|
||||
{
|
||||
"name": "Inference"
|
||||
},
|
||||
{
|
||||
"name": "Memory"
|
||||
|
@ -6061,14 +6253,14 @@
|
|||
"name": "Telemetry"
|
||||
},
|
||||
{
|
||||
"name": "Agents"
|
||||
},
|
||||
{
|
||||
"name": "BatchInference"
|
||||
"name": "PostTraining"
|
||||
},
|
||||
{
|
||||
"name": "Datasets"
|
||||
},
|
||||
{
|
||||
"name": "Evaluations"
|
||||
},
|
||||
{
|
||||
"name": "BuiltinTool",
|
||||
"description": "<SchemaDefinition schemaRef=\"#/components/schemas/BuiltinTool\" />"
|
||||
|
@ -6077,6 +6269,10 @@
|
|||
"name": "CompletionMessage",
|
||||
"description": "<SchemaDefinition schemaRef=\"#/components/schemas/CompletionMessage\" />"
|
||||
},
|
||||
{
|
||||
"name": "ImageMedia",
|
||||
"description": "<SchemaDefinition schemaRef=\"#/components/schemas/ImageMedia\" />"
|
||||
},
|
||||
{
|
||||
"name": "SamplingParams",
|
||||
"description": "<SchemaDefinition schemaRef=\"#/components/schemas/SamplingParams\" />"
|
||||
|
@ -6117,6 +6313,10 @@
|
|||
"name": "ToolResponseMessage",
|
||||
"description": "<SchemaDefinition schemaRef=\"#/components/schemas/ToolResponseMessage\" />"
|
||||
},
|
||||
{
|
||||
"name": "URL",
|
||||
"description": "<SchemaDefinition schemaRef=\"#/components/schemas/URL\" />"
|
||||
},
|
||||
{
|
||||
"name": "UserMessage",
|
||||
"description": "<SchemaDefinition schemaRef=\"#/components/schemas/UserMessage\" />"
|
||||
|
@ -6221,10 +6421,6 @@
|
|||
"name": "SearchToolDefinition",
|
||||
"description": "<SchemaDefinition schemaRef=\"#/components/schemas/SearchToolDefinition\" />"
|
||||
},
|
||||
{
|
||||
"name": "URL",
|
||||
"description": "<SchemaDefinition schemaRef=\"#/components/schemas/URL\" />"
|
||||
},
|
||||
{
|
||||
"name": "WolframAlphaToolDefinition",
|
||||
"description": "<SchemaDefinition schemaRef=\"#/components/schemas/WolframAlphaToolDefinition\" />"
|
||||
|
@ -6661,6 +6857,7 @@
|
|||
"FunctionCallToolDefinition",
|
||||
"GetAgentsSessionRequest",
|
||||
"GetDocumentsRequest",
|
||||
"ImageMedia",
|
||||
"InferenceStep",
|
||||
"InsertDocumentsRequest",
|
||||
"LogEventRequest",
|
||||
|
|
|
@ -210,8 +210,11 @@ components:
|
|||
content:
|
||||
oneOf:
|
||||
- type: string
|
||||
- $ref: '#/components/schemas/ImageMedia'
|
||||
- items:
|
||||
type: string
|
||||
oneOf:
|
||||
- type: string
|
||||
- $ref: '#/components/schemas/ImageMedia'
|
||||
type: array
|
||||
- $ref: '#/components/schemas/URL'
|
||||
mime_type:
|
||||
|
@ -273,8 +276,11 @@ components:
|
|||
items:
|
||||
oneOf:
|
||||
- type: string
|
||||
- $ref: '#/components/schemas/ImageMedia'
|
||||
- items:
|
||||
type: string
|
||||
oneOf:
|
||||
- type: string
|
||||
- $ref: '#/components/schemas/ImageMedia'
|
||||
type: array
|
||||
type: array
|
||||
logprobs:
|
||||
|
@ -441,8 +447,11 @@ components:
|
|||
content:
|
||||
oneOf:
|
||||
- type: string
|
||||
- $ref: '#/components/schemas/ImageMedia'
|
||||
- items:
|
||||
type: string
|
||||
oneOf:
|
||||
- type: string
|
||||
- $ref: '#/components/schemas/ImageMedia'
|
||||
type: array
|
||||
role:
|
||||
const: assistant
|
||||
|
@ -466,8 +475,11 @@ components:
|
|||
content:
|
||||
oneOf:
|
||||
- type: string
|
||||
- $ref: '#/components/schemas/ImageMedia'
|
||||
- items:
|
||||
type: string
|
||||
oneOf:
|
||||
- type: string
|
||||
- $ref: '#/components/schemas/ImageMedia'
|
||||
type: array
|
||||
logprobs:
|
||||
additionalProperties: false
|
||||
|
@ -742,8 +754,11 @@ components:
|
|||
items:
|
||||
oneOf:
|
||||
- type: string
|
||||
- $ref: '#/components/schemas/ImageMedia'
|
||||
- items:
|
||||
type: string
|
||||
oneOf:
|
||||
- type: string
|
||||
- $ref: '#/components/schemas/ImageMedia'
|
||||
type: array
|
||||
type: array
|
||||
model:
|
||||
|
@ -893,6 +908,23 @@ components:
|
|||
required:
|
||||
- document_ids
|
||||
type: object
|
||||
ImageMedia:
|
||||
additionalProperties: false
|
||||
properties:
|
||||
image:
|
||||
oneOf:
|
||||
- additionalProperties: false
|
||||
properties:
|
||||
format:
|
||||
type: string
|
||||
format_description:
|
||||
type: string
|
||||
title: This class represents an image object. To create
|
||||
type: object
|
||||
- $ref: '#/components/schemas/URL'
|
||||
required:
|
||||
- image
|
||||
type: object
|
||||
InferenceStep:
|
||||
additionalProperties: false
|
||||
properties:
|
||||
|
@ -1041,8 +1073,11 @@ components:
|
|||
content:
|
||||
oneOf:
|
||||
- type: string
|
||||
- $ref: '#/components/schemas/ImageMedia'
|
||||
- items:
|
||||
type: string
|
||||
oneOf:
|
||||
- type: string
|
||||
- $ref: '#/components/schemas/ImageMedia'
|
||||
type: array
|
||||
- $ref: '#/components/schemas/URL'
|
||||
document_id:
|
||||
|
@ -1108,8 +1143,11 @@ components:
|
|||
inserted_context:
|
||||
oneOf:
|
||||
- type: string
|
||||
- $ref: '#/components/schemas/ImageMedia'
|
||||
- items:
|
||||
type: string
|
||||
oneOf:
|
||||
- type: string
|
||||
- $ref: '#/components/schemas/ImageMedia'
|
||||
type: array
|
||||
memory_bank_ids:
|
||||
items:
|
||||
|
@ -1545,8 +1583,11 @@ components:
|
|||
query:
|
||||
oneOf:
|
||||
- type: string
|
||||
- $ref: '#/components/schemas/ImageMedia'
|
||||
- items:
|
||||
type: string
|
||||
oneOf:
|
||||
- type: string
|
||||
- $ref: '#/components/schemas/ImageMedia'
|
||||
type: array
|
||||
required:
|
||||
- bank_id
|
||||
|
@ -1562,8 +1603,11 @@ components:
|
|||
content:
|
||||
oneOf:
|
||||
- type: string
|
||||
- $ref: '#/components/schemas/ImageMedia'
|
||||
- items:
|
||||
type: string
|
||||
oneOf:
|
||||
- type: string
|
||||
- $ref: '#/components/schemas/ImageMedia'
|
||||
type: array
|
||||
document_id:
|
||||
type: string
|
||||
|
@ -2067,8 +2111,11 @@ components:
|
|||
content:
|
||||
oneOf:
|
||||
- type: string
|
||||
- $ref: '#/components/schemas/ImageMedia'
|
||||
- items:
|
||||
type: string
|
||||
oneOf:
|
||||
- type: string
|
||||
- $ref: '#/components/schemas/ImageMedia'
|
||||
type: array
|
||||
role:
|
||||
const: system
|
||||
|
@ -2203,6 +2250,14 @@ components:
|
|||
ToolParamDefinition:
|
||||
additionalProperties: false
|
||||
properties:
|
||||
default:
|
||||
oneOf:
|
||||
- type: 'null'
|
||||
- type: boolean
|
||||
- type: number
|
||||
- type: string
|
||||
- type: array
|
||||
- type: object
|
||||
description:
|
||||
type: string
|
||||
param_type:
|
||||
|
@ -2225,6 +2280,7 @@ components:
|
|||
enum:
|
||||
- json
|
||||
- function_tag
|
||||
- python_list
|
||||
title: This Enum refers to the prompt format for calling custom / zero shot
|
||||
tools
|
||||
type: string
|
||||
|
@ -2236,8 +2292,11 @@ components:
|
|||
content:
|
||||
oneOf:
|
||||
- type: string
|
||||
- $ref: '#/components/schemas/ImageMedia'
|
||||
- items:
|
||||
type: string
|
||||
oneOf:
|
||||
- type: string
|
||||
- $ref: '#/components/schemas/ImageMedia'
|
||||
type: array
|
||||
tool_name:
|
||||
oneOf:
|
||||
|
@ -2256,8 +2315,11 @@ components:
|
|||
content:
|
||||
oneOf:
|
||||
- type: string
|
||||
- $ref: '#/components/schemas/ImageMedia'
|
||||
- items:
|
||||
type: string
|
||||
oneOf:
|
||||
- type: string
|
||||
- $ref: '#/components/schemas/ImageMedia'
|
||||
type: array
|
||||
role:
|
||||
const: ipython
|
||||
|
@ -2451,14 +2513,20 @@ components:
|
|||
content:
|
||||
oneOf:
|
||||
- type: string
|
||||
- $ref: '#/components/schemas/ImageMedia'
|
||||
- items:
|
||||
type: string
|
||||
oneOf:
|
||||
- type: string
|
||||
- $ref: '#/components/schemas/ImageMedia'
|
||||
type: array
|
||||
context:
|
||||
oneOf:
|
||||
- type: string
|
||||
- $ref: '#/components/schemas/ImageMedia'
|
||||
- items:
|
||||
type: string
|
||||
oneOf:
|
||||
- type: string
|
||||
- $ref: '#/components/schemas/ImageMedia'
|
||||
type: array
|
||||
role:
|
||||
const: user
|
||||
|
@ -2501,7 +2569,7 @@ info:
|
|||
description: "This is the specification of the llama stack that provides\n \
|
||||
\ a set of endpoints and their corresponding interfaces that are tailored\
|
||||
\ to\n best leverage Llama Models. The specification is still in\
|
||||
\ draft and subject to change.\n Generated at 2024-09-23 10:56:42.866760"
|
||||
\ draft and subject to change.\n Generated at 2024-09-23 16:58:41.469308"
|
||||
title: '[DRAFT] Llama Stack Specification'
|
||||
version: 0.0.1
|
||||
jsonSchemaDialect: https://json-schema.org/draft/2020-12/schema
|
||||
|
@ -3739,25 +3807,27 @@ security:
|
|||
servers:
|
||||
- url: http://any-hosted-llama-stack.com
|
||||
tags:
|
||||
- name: Inference
|
||||
- name: Shields
|
||||
- name: Models
|
||||
- name: MemoryBanks
|
||||
- name: SyntheticDataGeneration
|
||||
- name: BatchInference
|
||||
- name: RewardScoring
|
||||
- name: PostTraining
|
||||
- name: SyntheticDataGeneration
|
||||
- name: Agents
|
||||
- name: MemoryBanks
|
||||
- name: Safety
|
||||
- name: Evaluations
|
||||
- name: Models
|
||||
- name: Inference
|
||||
- name: Memory
|
||||
- name: Telemetry
|
||||
- name: Agents
|
||||
- name: BatchInference
|
||||
- name: PostTraining
|
||||
- name: Datasets
|
||||
- name: Evaluations
|
||||
- description: <SchemaDefinition schemaRef="#/components/schemas/BuiltinTool" />
|
||||
name: BuiltinTool
|
||||
- description: <SchemaDefinition schemaRef="#/components/schemas/CompletionMessage"
|
||||
/>
|
||||
name: CompletionMessage
|
||||
- description: <SchemaDefinition schemaRef="#/components/schemas/ImageMedia" />
|
||||
name: ImageMedia
|
||||
- description: <SchemaDefinition schemaRef="#/components/schemas/SamplingParams" />
|
||||
name: SamplingParams
|
||||
- description: <SchemaDefinition schemaRef="#/components/schemas/SamplingStrategy"
|
||||
|
@ -3790,6 +3860,8 @@ tags:
|
|||
- description: <SchemaDefinition schemaRef="#/components/schemas/ToolResponseMessage"
|
||||
/>
|
||||
name: ToolResponseMessage
|
||||
- description: <SchemaDefinition schemaRef="#/components/schemas/URL" />
|
||||
name: URL
|
||||
- description: <SchemaDefinition schemaRef="#/components/schemas/UserMessage" />
|
||||
name: UserMessage
|
||||
- description: <SchemaDefinition schemaRef="#/components/schemas/BatchChatCompletionRequest"
|
||||
|
@ -3876,8 +3948,6 @@ tags:
|
|||
- description: <SchemaDefinition schemaRef="#/components/schemas/SearchToolDefinition"
|
||||
/>
|
||||
name: SearchToolDefinition
|
||||
- description: <SchemaDefinition schemaRef="#/components/schemas/URL" />
|
||||
name: URL
|
||||
- description: <SchemaDefinition schemaRef="#/components/schemas/WolframAlphaToolDefinition"
|
||||
/>
|
||||
name: WolframAlphaToolDefinition
|
||||
|
@ -4233,6 +4303,7 @@ x-tagGroups:
|
|||
- FunctionCallToolDefinition
|
||||
- GetAgentsSessionRequest
|
||||
- GetDocumentsRequest
|
||||
- ImageMedia
|
||||
- InferenceStep
|
||||
- InsertDocumentsRequest
|
||||
- LogEventRequest
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue