mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-08-01 16:24:44 +00:00
Merge branch 'main' into add-nvidia-inference-adapter
This commit is contained in:
commit
8a35dc8b0e
28 changed files with 429 additions and 478 deletions
35
CHANGELOG.md
Normal file
35
CHANGELOG.md
Normal file
|
@ -0,0 +1,35 @@
|
||||||
|
# Changelog
|
||||||
|
|
||||||
|
## 0.0.53
|
||||||
|
|
||||||
|
### Added
|
||||||
|
- Resource-oriented design for models, shields, memory banks, datasets and eval tasks
|
||||||
|
- Persistence for registered objects with distribution
|
||||||
|
- Ability to persist memory banks created for FAISS
|
||||||
|
- PostgreSQL KVStore implementation
|
||||||
|
- Environment variable placeholder support in run.yaml files
|
||||||
|
- Comprehensive Zero-to-Hero notebooks and quickstart guides
|
||||||
|
- Support for quantized models in Ollama
|
||||||
|
- Vision models support for Together, Fireworks, Meta-Reference, and Ollama, and vLLM
|
||||||
|
- Bedrock distribution with safety shields support
|
||||||
|
- Evals API with task registration and scoring functions
|
||||||
|
- MMLU and SimpleQA benchmark scoring functions
|
||||||
|
- Huggingface dataset provider integration for benchmarks
|
||||||
|
- Support for custom dataset registration from local paths
|
||||||
|
- Benchmark evaluation CLI tools with visualization tables
|
||||||
|
- RAG evaluation scoring functions and metrics
|
||||||
|
- Local persistence for datasets and eval tasks
|
||||||
|
|
||||||
|
### Changed
|
||||||
|
- Split safety into distinct providers (llama-guard, prompt-guard, code-scanner)
|
||||||
|
- Changed provider naming convention (`impls` → `inline`, `adapters` → `remote`)
|
||||||
|
- Updated API signatures for dataset and eval task registration
|
||||||
|
- Restructured folder organization for providers
|
||||||
|
- Enhanced Docker build configuration
|
||||||
|
- Added version prefixing for REST API routes
|
||||||
|
- Enhanced evaluation task registration workflow
|
||||||
|
- Improved benchmark evaluation output formatting
|
||||||
|
- Restructured evals folder organization for better modularity
|
||||||
|
|
||||||
|
### Removed
|
||||||
|
- `llama stack configure` command
|
|
@ -1,4 +1,5 @@
|
||||||
include requirements.txt
|
include requirements.txt
|
||||||
|
include distributions/dependencies.json
|
||||||
include llama_stack/distribution/*.sh
|
include llama_stack/distribution/*.sh
|
||||||
include llama_stack/cli/scripts/*.sh
|
include llama_stack/cli/scripts/*.sh
|
||||||
include llama_stack/templates/*/*.yaml
|
include llama_stack/templates/*/*.yaml
|
||||||
|
|
171
distributions/dependencies.json
Normal file
171
distributions/dependencies.json
Normal file
|
@ -0,0 +1,171 @@
|
||||||
|
{
|
||||||
|
"together": [
|
||||||
|
"aiosqlite",
|
||||||
|
"blobfile",
|
||||||
|
"chardet",
|
||||||
|
"chromadb-client",
|
||||||
|
"faiss-cpu",
|
||||||
|
"fastapi",
|
||||||
|
"fire",
|
||||||
|
"httpx",
|
||||||
|
"matplotlib",
|
||||||
|
"nltk",
|
||||||
|
"numpy",
|
||||||
|
"pandas",
|
||||||
|
"pillow",
|
||||||
|
"psycopg2-binary",
|
||||||
|
"pypdf",
|
||||||
|
"redis",
|
||||||
|
"scikit-learn",
|
||||||
|
"scipy",
|
||||||
|
"sentencepiece",
|
||||||
|
"together",
|
||||||
|
"tqdm",
|
||||||
|
"transformers",
|
||||||
|
"uvicorn",
|
||||||
|
"sentence-transformers --no-deps",
|
||||||
|
"torch --index-url https://download.pytorch.org/whl/cpu"
|
||||||
|
],
|
||||||
|
"remote-vllm": [
|
||||||
|
"aiosqlite",
|
||||||
|
"blobfile",
|
||||||
|
"chardet",
|
||||||
|
"chromadb-client",
|
||||||
|
"faiss-cpu",
|
||||||
|
"fastapi",
|
||||||
|
"fire",
|
||||||
|
"httpx",
|
||||||
|
"matplotlib",
|
||||||
|
"nltk",
|
||||||
|
"numpy",
|
||||||
|
"openai",
|
||||||
|
"pandas",
|
||||||
|
"pillow",
|
||||||
|
"psycopg2-binary",
|
||||||
|
"pypdf",
|
||||||
|
"redis",
|
||||||
|
"scikit-learn",
|
||||||
|
"scipy",
|
||||||
|
"sentencepiece",
|
||||||
|
"tqdm",
|
||||||
|
"transformers",
|
||||||
|
"uvicorn",
|
||||||
|
"sentence-transformers --no-deps",
|
||||||
|
"torch --index-url https://download.pytorch.org/whl/cpu"
|
||||||
|
],
|
||||||
|
"fireworks": [
|
||||||
|
"aiosqlite",
|
||||||
|
"blobfile",
|
||||||
|
"chardet",
|
||||||
|
"chromadb-client",
|
||||||
|
"faiss-cpu",
|
||||||
|
"fastapi",
|
||||||
|
"fire",
|
||||||
|
"fireworks-ai",
|
||||||
|
"httpx",
|
||||||
|
"matplotlib",
|
||||||
|
"nltk",
|
||||||
|
"numpy",
|
||||||
|
"pandas",
|
||||||
|
"pillow",
|
||||||
|
"psycopg2-binary",
|
||||||
|
"pypdf",
|
||||||
|
"redis",
|
||||||
|
"scikit-learn",
|
||||||
|
"scipy",
|
||||||
|
"sentencepiece",
|
||||||
|
"tqdm",
|
||||||
|
"transformers",
|
||||||
|
"uvicorn",
|
||||||
|
"sentence-transformers --no-deps",
|
||||||
|
"torch --index-url https://download.pytorch.org/whl/cpu"
|
||||||
|
],
|
||||||
|
"tgi": [
|
||||||
|
"aiohttp",
|
||||||
|
"aiosqlite",
|
||||||
|
"blobfile",
|
||||||
|
"chardet",
|
||||||
|
"chromadb-client",
|
||||||
|
"faiss-cpu",
|
||||||
|
"fastapi",
|
||||||
|
"fire",
|
||||||
|
"httpx",
|
||||||
|
"huggingface_hub",
|
||||||
|
"matplotlib",
|
||||||
|
"nltk",
|
||||||
|
"numpy",
|
||||||
|
"pandas",
|
||||||
|
"pillow",
|
||||||
|
"psycopg2-binary",
|
||||||
|
"pypdf",
|
||||||
|
"redis",
|
||||||
|
"scikit-learn",
|
||||||
|
"scipy",
|
||||||
|
"sentencepiece",
|
||||||
|
"tqdm",
|
||||||
|
"transformers",
|
||||||
|
"uvicorn",
|
||||||
|
"sentence-transformers --no-deps",
|
||||||
|
"torch --index-url https://download.pytorch.org/whl/cpu"
|
||||||
|
],
|
||||||
|
"meta-reference-gpu": [
|
||||||
|
"accelerate",
|
||||||
|
"aiosqlite",
|
||||||
|
"blobfile",
|
||||||
|
"chardet",
|
||||||
|
"chromadb-client",
|
||||||
|
"fairscale",
|
||||||
|
"faiss-cpu",
|
||||||
|
"fastapi",
|
||||||
|
"fire",
|
||||||
|
"httpx",
|
||||||
|
"lm-format-enforcer",
|
||||||
|
"matplotlib",
|
||||||
|
"nltk",
|
||||||
|
"numpy",
|
||||||
|
"pandas",
|
||||||
|
"pillow",
|
||||||
|
"psycopg2-binary",
|
||||||
|
"pypdf",
|
||||||
|
"redis",
|
||||||
|
"scikit-learn",
|
||||||
|
"scipy",
|
||||||
|
"sentencepiece",
|
||||||
|
"torch",
|
||||||
|
"torchvision",
|
||||||
|
"tqdm",
|
||||||
|
"transformers",
|
||||||
|
"uvicorn",
|
||||||
|
"zmq",
|
||||||
|
"sentence-transformers --no-deps",
|
||||||
|
"torch --index-url https://download.pytorch.org/whl/cpu"
|
||||||
|
],
|
||||||
|
"ollama": [
|
||||||
|
"aiohttp",
|
||||||
|
"aiosqlite",
|
||||||
|
"blobfile",
|
||||||
|
"chardet",
|
||||||
|
"chromadb-client",
|
||||||
|
"faiss-cpu",
|
||||||
|
"fastapi",
|
||||||
|
"fire",
|
||||||
|
"httpx",
|
||||||
|
"matplotlib",
|
||||||
|
"nltk",
|
||||||
|
"numpy",
|
||||||
|
"ollama",
|
||||||
|
"pandas",
|
||||||
|
"pillow",
|
||||||
|
"psycopg2-binary",
|
||||||
|
"pypdf",
|
||||||
|
"redis",
|
||||||
|
"scikit-learn",
|
||||||
|
"scipy",
|
||||||
|
"sentencepiece",
|
||||||
|
"tqdm",
|
||||||
|
"transformers",
|
||||||
|
"uvicorn",
|
||||||
|
"sentence-transformers --no-deps",
|
||||||
|
"torch --index-url https://download.pytorch.org/whl/cpu"
|
||||||
|
]
|
||||||
|
}
|
|
@ -21,7 +21,7 @@
|
||||||
"info": {
|
"info": {
|
||||||
"title": "[DRAFT] Llama Stack Specification",
|
"title": "[DRAFT] Llama Stack Specification",
|
||||||
"version": "alpha",
|
"version": "alpha",
|
||||||
"description": "This is the specification of the llama stack that provides\n a set of endpoints and their corresponding interfaces that are tailored to\n best leverage Llama Models. The specification is still in draft and subject to change.\n Generated at 2024-11-18 23:37:24.867143"
|
"description": "This is the specification of the llama stack that provides\n a set of endpoints and their corresponding interfaces that are tailored to\n best leverage Llama Models. The specification is still in draft and subject to change.\n Generated at 2024-11-19 09:14:01.145131"
|
||||||
},
|
},
|
||||||
"servers": [
|
"servers": [
|
||||||
{
|
{
|
||||||
|
|
|
@ -3400,7 +3400,7 @@ info:
|
||||||
description: "This is the specification of the llama stack that provides\n \
|
description: "This is the specification of the llama stack that provides\n \
|
||||||
\ a set of endpoints and their corresponding interfaces that are tailored\
|
\ a set of endpoints and their corresponding interfaces that are tailored\
|
||||||
\ to\n best leverage Llama Models. The specification is still in\
|
\ to\n best leverage Llama Models. The specification is still in\
|
||||||
\ draft and subject to change.\n Generated at 2024-11-18 23:37:24.867143"
|
\ draft and subject to change.\n Generated at 2024-11-19 09:14:01.145131"
|
||||||
title: '[DRAFT] Llama Stack Specification'
|
title: '[DRAFT] Llama Stack Specification'
|
||||||
version: alpha
|
version: alpha
|
||||||
jsonSchemaDialect: https://json-schema.org/draft/2020-12/schema
|
jsonSchemaDialect: https://json-schema.org/draft/2020-12/schema
|
||||||
|
|
|
@ -25,8 +25,8 @@ The following models are available by default:
|
||||||
- `meta-llama/Llama-3.1-8B-Instruct (fireworks/llama-v3p1-8b-instruct)`
|
- `meta-llama/Llama-3.1-8B-Instruct (fireworks/llama-v3p1-8b-instruct)`
|
||||||
- `meta-llama/Llama-3.1-70B-Instruct (fireworks/llama-v3p1-70b-instruct)`
|
- `meta-llama/Llama-3.1-70B-Instruct (fireworks/llama-v3p1-70b-instruct)`
|
||||||
- `meta-llama/Llama-3.1-405B-Instruct-FP8 (fireworks/llama-v3p1-405b-instruct)`
|
- `meta-llama/Llama-3.1-405B-Instruct-FP8 (fireworks/llama-v3p1-405b-instruct)`
|
||||||
- `meta-llama/Llama-3.2-3B-Instruct (fireworks/llama-v3p2-1b-instruct)`
|
- `meta-llama/Llama-3.2-1B-Instruct (fireworks/llama-v3p2-1b-instruct)`
|
||||||
- `meta-llama/Llama-3.2-11B-Vision-Instruct (fireworks/llama-v3p2-3b-instruct)`
|
- `meta-llama/Llama-3.2-3B-Instruct (fireworks/llama-v3p2-3b-instruct)`
|
||||||
- `meta-llama/Llama-3.2-11B-Vision-Instruct (fireworks/llama-v3p2-11b-vision-instruct)`
|
- `meta-llama/Llama-3.2-11B-Vision-Instruct (fireworks/llama-v3p2-11b-vision-instruct)`
|
||||||
- `meta-llama/Llama-3.2-90B-Vision-Instruct (fireworks/llama-v3p2-90b-vision-instruct)`
|
- `meta-llama/Llama-3.2-90B-Vision-Instruct (fireworks/llama-v3p2-90b-vision-instruct)`
|
||||||
- `meta-llama/Llama-Guard-3-8B (fireworks/llama-guard-3-8b)`
|
- `meta-llama/Llama-Guard-3-8B (fireworks/llama-guard-3-8b)`
|
||||||
|
|
|
@ -23,5 +23,6 @@ tgi
|
||||||
dell-tgi
|
dell-tgi
|
||||||
together
|
together
|
||||||
fireworks
|
fireworks
|
||||||
|
remote-vllm
|
||||||
bedrock
|
bedrock
|
||||||
```
|
```
|
||||||
|
|
|
@ -53,9 +53,9 @@ Please see our pages in detail for the types of distributions we offer:
|
||||||
3. [On-device Distribution](./distributions/ondevice_distro/index.md): If you want to run Llama Stack inference on your iOS / Android device.
|
3. [On-device Distribution](./distributions/ondevice_distro/index.md): If you want to run Llama Stack inference on your iOS / Android device.
|
||||||
|
|
||||||
|
|
||||||
### Quick Start Commands
|
### Table of Contents
|
||||||
|
|
||||||
Once you have decided on the inference provider and distribution to use, use the following quick start commands to get started.
|
Once you have decided on the inference provider and distribution to use, use the following guides to get started.
|
||||||
|
|
||||||
##### 1.0 Prerequisite
|
##### 1.0 Prerequisite
|
||||||
|
|
||||||
|
@ -109,421 +109,33 @@ Access to Single-Node CPU with Fireworks hosted endpoint via API_KEY from [firew
|
||||||
|
|
||||||
##### 1.1. Start the distribution
|
##### 1.1. Start the distribution
|
||||||
|
|
||||||
**(Option 1) Via Docker**
|
|
||||||
::::{tab-set}
|
::::{tab-set}
|
||||||
|
|
||||||
:::{tab-item} meta-reference-gpu
|
:::{tab-item} meta-reference-gpu
|
||||||
```
|
- [Start Meta Reference GPU Distribution](https://llama-stack.readthedocs.io/en/latest/getting_started/distributions/self_hosted_distro/meta-reference-gpu.html)
|
||||||
$ cd llama-stack/distributions/meta-reference-gpu && docker compose up
|
|
||||||
```
|
|
||||||
|
|
||||||
This will download and start running a pre-built Docker container. Alternatively, you may use the following commands:
|
|
||||||
|
|
||||||
```
|
|
||||||
docker run -it -p 5000:5000 -v ~/.llama:/root/.llama -v ./run.yaml:/root/my-run.yaml --gpus=all distribution-meta-reference-gpu --yaml_config /root/my-run.yaml
|
|
||||||
```
|
|
||||||
:::
|
:::
|
||||||
|
|
||||||
:::{tab-item} vLLM
|
:::{tab-item} vLLM
|
||||||
```
|
- [Start vLLM Distribution](https://llama-stack.readthedocs.io/en/latest/getting_started/distributions/self_hosted_distro/remote-vllm.html)
|
||||||
$ cd llama-stack/distributions/remote-vllm && docker compose up
|
|
||||||
```
|
|
||||||
|
|
||||||
The script will first start up vLLM server on port 8000, then start up Llama Stack distribution server hooking up to it for inference. You should see the following outputs --
|
|
||||||
```
|
|
||||||
<TO BE FILLED>
|
|
||||||
```
|
|
||||||
|
|
||||||
To kill the server
|
|
||||||
```
|
|
||||||
docker compose down
|
|
||||||
```
|
|
||||||
:::
|
:::
|
||||||
|
|
||||||
:::{tab-item} tgi
|
:::{tab-item} tgi
|
||||||
```
|
- [Start TGI Distribution](https://llama-stack.readthedocs.io/en/latest/getting_started/distributions/self_hosted_distro/tgi.html)
|
||||||
$ cd llama-stack/distributions/tgi && docker compose up
|
|
||||||
```
|
|
||||||
|
|
||||||
The script will first start up TGI server, then start up Llama Stack distribution server hooking up to the remote TGI provider for inference. You should see the following outputs --
|
|
||||||
```
|
|
||||||
[text-generation-inference] | 2024-10-15T18:56:33.810397Z INFO text_generation_router::server: router/src/server.rs:1813: Using config Some(Llama)
|
|
||||||
[text-generation-inference] | 2024-10-15T18:56:33.810448Z WARN text_generation_router::server: router/src/server.rs:1960: Invalid hostname, defaulting to 0.0.0.0
|
|
||||||
[text-generation-inference] | 2024-10-15T18:56:33.864143Z INFO text_generation_router::server: router/src/server.rs:2353: Connected
|
|
||||||
INFO: Started server process [1]
|
|
||||||
INFO: Waiting for application startup.
|
|
||||||
INFO: Application startup complete.
|
|
||||||
INFO: Uvicorn running on http://[::]:5000 (Press CTRL+C to quit)
|
|
||||||
```
|
|
||||||
|
|
||||||
To kill the server
|
|
||||||
```
|
|
||||||
docker compose down
|
|
||||||
```
|
|
||||||
:::
|
|
||||||
|
|
||||||
|
|
||||||
:::{tab-item} ollama
|
|
||||||
```
|
|
||||||
$ cd llama-stack/distributions/ollama && docker compose up
|
|
||||||
|
|
||||||
# OR
|
|
||||||
|
|
||||||
$ cd llama-stack/distributions/ollama-gpu && docker compose up
|
|
||||||
```
|
|
||||||
|
|
||||||
You will see outputs similar to following ---
|
|
||||||
```
|
|
||||||
[ollama] | [GIN] 2024/10/18 - 21:19:41 | 200 | 226.841µs | ::1 | GET "/api/ps"
|
|
||||||
[ollama] | [GIN] 2024/10/18 - 21:19:42 | 200 | 60.908µs | ::1 | GET "/api/ps"
|
|
||||||
INFO: Started server process [1]
|
|
||||||
INFO: Waiting for application startup.
|
|
||||||
INFO: Application startup complete.
|
|
||||||
INFO: Uvicorn running on http://[::]:5000 (Press CTRL+C to quit)
|
|
||||||
[llamastack] | Resolved 12 providers
|
|
||||||
[llamastack] | inner-inference => ollama0
|
|
||||||
[llamastack] | models => __routing_table__
|
|
||||||
[llamastack] | inference => __autorouted__
|
|
||||||
```
|
|
||||||
|
|
||||||
To kill the server
|
|
||||||
```
|
|
||||||
docker compose down
|
|
||||||
```
|
|
||||||
:::
|
|
||||||
|
|
||||||
:::{tab-item} fireworks
|
|
||||||
```
|
|
||||||
$ cd llama-stack/distributions/fireworks && docker compose up
|
|
||||||
```
|
|
||||||
|
|
||||||
Make sure your `run.yaml` file has the inference provider pointing to the correct Fireworks URL server endpoint. E.g.
|
|
||||||
```
|
|
||||||
inference:
|
|
||||||
- provider_id: fireworks
|
|
||||||
provider_type: remote::fireworks
|
|
||||||
config:
|
|
||||||
url: https://api.fireworks.ai/inference
|
|
||||||
api_key: <optional api key>
|
|
||||||
```
|
|
||||||
:::
|
|
||||||
|
|
||||||
:::{tab-item} together
|
|
||||||
```
|
|
||||||
$ cd distributions/together && docker compose up
|
|
||||||
```
|
|
||||||
|
|
||||||
Make sure your `run.yaml` file has the inference provider pointing to the correct Together URL server endpoint. E.g.
|
|
||||||
```
|
|
||||||
inference:
|
|
||||||
- provider_id: together
|
|
||||||
provider_type: remote::together
|
|
||||||
config:
|
|
||||||
url: https://api.together.xyz/v1
|
|
||||||
api_key: <optional api key>
|
|
||||||
```
|
|
||||||
:::
|
|
||||||
|
|
||||||
|
|
||||||
::::
|
|
||||||
|
|
||||||
**(Option 2) Via Conda**
|
|
||||||
|
|
||||||
::::{tab-set}
|
|
||||||
|
|
||||||
:::{tab-item} meta-reference-gpu
|
|
||||||
1. Install the `llama` CLI. See [CLI Reference](https://llama-stack.readthedocs.io/en/latest/cli_reference/index.html)
|
|
||||||
|
|
||||||
2. Build the `meta-reference-gpu` distribution
|
|
||||||
|
|
||||||
```
|
|
||||||
$ llama stack build --template meta-reference-gpu --image-type conda
|
|
||||||
```
|
|
||||||
|
|
||||||
3. Start running distribution
|
|
||||||
```
|
|
||||||
$ llama stack run ~/.llama/distributions/llamastack-meta-reference-gpu/meta-reference-gpu-run.yaml
|
|
||||||
```
|
|
||||||
|
|
||||||
Note: If you wish to use pgvector or chromadb as memory provider. You may need to update generated `run.yaml` file to point to the desired memory provider. See [Memory Providers](https://llama-stack.readthedocs.io/en/latest/api_providers/memory_api.html) for more details. Or comment out the pgvector or chromadb memory provider in `run.yaml` file to use the default inline memory provider, keeping only the following section:
|
|
||||||
```
|
|
||||||
memory:
|
|
||||||
- provider_id: faiss-0
|
|
||||||
provider_type: faiss
|
|
||||||
config:
|
|
||||||
kvstore:
|
|
||||||
namespace: null
|
|
||||||
type: sqlite
|
|
||||||
db_path: ~/.llama/runtime/faiss_store.db
|
|
||||||
```
|
|
||||||
|
|
||||||
:::
|
|
||||||
|
|
||||||
:::{tab-item} tgi
|
|
||||||
1. Install the `llama` CLI. See [CLI Reference](https://llama-stack.readthedocs.io/en/latest/cli_reference/index.html)
|
|
||||||
|
|
||||||
2. Build the `tgi` distribution
|
|
||||||
|
|
||||||
```bash
|
|
||||||
llama stack build --template tgi --image-type conda
|
|
||||||
```
|
|
||||||
|
|
||||||
3. Start a TGI server endpoint
|
|
||||||
|
|
||||||
4. Make sure in your `run.yaml` file, your `conda_env` is pointing to the conda environment and inference provider is pointing to the correct TGI server endpoint. E.g.
|
|
||||||
```
|
|
||||||
conda_env: llamastack-tgi
|
|
||||||
...
|
|
||||||
inference:
|
|
||||||
- provider_id: tgi0
|
|
||||||
provider_type: remote::tgi
|
|
||||||
config:
|
|
||||||
url: http://127.0.0.1:5009
|
|
||||||
```
|
|
||||||
|
|
||||||
5. Start Llama Stack server
|
|
||||||
```bash
|
|
||||||
$ llama stack run ~/.llama/distributions/llamastack-tgi/tgi-run.yaml
|
|
||||||
```
|
|
||||||
|
|
||||||
Note: If you wish to use pgvector or chromadb as memory provider. You may need to update generated `run.yaml` file to point to the desired memory provider. See [Memory Providers](https://llama-stack.readthedocs.io/en/latest/api_providers/memory_api.html) for more details. Or comment out the pgvector or chromadb memory provider in `run.yaml` file to use the default inline memory provider, keeping only the following section:
|
|
||||||
```
|
|
||||||
memory:
|
|
||||||
- provider_id: faiss-0
|
|
||||||
provider_type: faiss
|
|
||||||
config:
|
|
||||||
kvstore:
|
|
||||||
namespace: null
|
|
||||||
type: sqlite
|
|
||||||
db_path: ~/.llama/runtime/faiss_store.db
|
|
||||||
```
|
|
||||||
:::
|
:::
|
||||||
|
|
||||||
:::{tab-item} ollama
|
:::{tab-item} ollama
|
||||||
|
- [Start Ollama Distribution](https://llama-stack.readthedocs.io/en/latest/getting_started/distributions/self_hosted_distro/ollama.html)
|
||||||
If you wish to separately spin up a Ollama server, and connect with Llama Stack, you may use the following commands.
|
|
||||||
|
|
||||||
#### Start Ollama server.
|
|
||||||
- Please check the [Ollama Documentations](https://github.com/ollama/ollama) for more details.
|
|
||||||
|
|
||||||
**Via Docker**
|
|
||||||
```
|
|
||||||
docker run -d -v ollama:/root/.ollama -p 11434:11434 --name ollama ollama/ollama
|
|
||||||
```
|
|
||||||
|
|
||||||
**Via CLI**
|
|
||||||
```
|
|
||||||
ollama run <model_id>
|
|
||||||
```
|
|
||||||
|
|
||||||
#### Start Llama Stack server pointing to Ollama server
|
|
||||||
|
|
||||||
Make sure your `run.yaml` file has the inference provider pointing to the correct Ollama endpoint. E.g.
|
|
||||||
```
|
|
||||||
conda_env: llamastack-ollama
|
|
||||||
...
|
|
||||||
inference:
|
|
||||||
- provider_id: ollama0
|
|
||||||
provider_type: remote::ollama
|
|
||||||
config:
|
|
||||||
url: http://127.0.0.1:11434
|
|
||||||
```
|
|
||||||
|
|
||||||
```
|
|
||||||
llama stack build --template ollama --image-type conda
|
|
||||||
llama stack run ~/.llama/distributions/llamastack-ollama/ollama-run.yaml
|
|
||||||
```
|
|
||||||
|
|
||||||
Note: If you wish to use pgvector or chromadb as memory provider. You may need to update generated `run.yaml` file to point to the desired memory provider. See [Memory Providers](https://llama-stack.readthedocs.io/en/latest/api_providers/memory_api.html) for more details. Or comment out the pgvector or chromadb memory provider in `run.yaml` file to use the default inline memory provider, keeping only the following section:
|
|
||||||
```
|
|
||||||
memory:
|
|
||||||
- provider_id: faiss-0
|
|
||||||
provider_type: faiss
|
|
||||||
config:
|
|
||||||
kvstore:
|
|
||||||
namespace: null
|
|
||||||
type: sqlite
|
|
||||||
db_path: ~/.llama/runtime/faiss_store.db
|
|
||||||
```
|
|
||||||
|
|
||||||
:::
|
|
||||||
|
|
||||||
:::{tab-item} fireworks
|
|
||||||
|
|
||||||
```bash
|
|
||||||
llama stack build --template fireworks --image-type conda
|
|
||||||
# -- modify run.yaml to a valid Fireworks server endpoint
|
|
||||||
llama stack run ./run.yaml
|
|
||||||
```
|
|
||||||
|
|
||||||
Make sure your `run.yaml` file has the inference provider pointing to the correct Fireworks URL server endpoint. E.g.
|
|
||||||
```
|
|
||||||
conda_env: llamastack-fireworks
|
|
||||||
...
|
|
||||||
inference:
|
|
||||||
- provider_id: fireworks
|
|
||||||
provider_type: remote::fireworks
|
|
||||||
config:
|
|
||||||
url: https://api.fireworks.ai/inference
|
|
||||||
api_key: <optional api key>
|
|
||||||
```
|
|
||||||
:::
|
:::
|
||||||
|
|
||||||
:::{tab-item} together
|
:::{tab-item} together
|
||||||
|
- [Start Together Distribution](https://llama-stack.readthedocs.io/en/latest/getting_started/distributions/self_hosted_distro/together.html)
|
||||||
```bash
|
|
||||||
llama stack build --template together --image-type conda
|
|
||||||
# -- modify run.yaml to a valid Together server endpoint
|
|
||||||
llama stack run ~/.llama/distributions/llamastack-together/together-run.yaml
|
|
||||||
```
|
|
||||||
|
|
||||||
Make sure your `run.yaml` file has the inference provider pointing to the correct Together URL server endpoint. E.g.
|
|
||||||
```
|
|
||||||
conda_env: llamastack-together
|
|
||||||
...
|
|
||||||
inference:
|
|
||||||
- provider_id: together
|
|
||||||
provider_type: remote::together
|
|
||||||
config:
|
|
||||||
url: https://api.together.xyz/v1
|
|
||||||
api_key: <optional api key>
|
|
||||||
```
|
|
||||||
:::
|
|
||||||
|
|
||||||
::::
|
|
||||||
|
|
||||||
##### 1.2 (Optional) Update Model Serving Configuration
|
|
||||||
::::{tab-set}
|
|
||||||
|
|
||||||
:::{tab-item} meta-reference-gpu
|
|
||||||
You may change the `config.model` in `run.yaml` to update the model currently being served by the distribution. Make sure you have the model checkpoint downloaded in your `~/.llama`.
|
|
||||||
```
|
|
||||||
inference:
|
|
||||||
- provider_id: meta0
|
|
||||||
provider_type: inline::meta-reference
|
|
||||||
config:
|
|
||||||
model: Llama3.2-11B-Vision-Instruct
|
|
||||||
quantization: null
|
|
||||||
torch_seed: null
|
|
||||||
max_seq_len: 4096
|
|
||||||
max_batch_size: 1
|
|
||||||
```
|
|
||||||
|
|
||||||
Run `llama model list` to see the available models to download, and `llama model download` to download the checkpoints.
|
|
||||||
:::
|
|
||||||
|
|
||||||
:::{tab-item} tgi
|
|
||||||
To serve a new model with `tgi`, change the docker command flag `--model-id <model-to-serve>`.
|
|
||||||
|
|
||||||
This can be done by edit the `command` args in `compose.yaml`. E.g. Replace "Llama-3.2-1B-Instruct" with the model you want to serve.
|
|
||||||
|
|
||||||
```
|
|
||||||
command: ["--dtype", "bfloat16", "--usage-stats", "on", "--sharded", "false", "--model-id", "meta-llama/Llama-3.2-1B-Instruct", "--port", "5009", "--cuda-memory-fraction", "0.3"]
|
|
||||||
```
|
|
||||||
|
|
||||||
or by changing the docker run command's `--model-id` flag
|
|
||||||
```
|
|
||||||
docker run --rm -it -v $HOME/.cache/huggingface:/data -p 5009:5009 --gpus all ghcr.io/huggingface/text-generation-inference:latest --dtype bfloat16 --usage-stats on --sharded false --model-id meta-llama/Llama-3.2-1B-Instruct --port 5009
|
|
||||||
```
|
|
||||||
|
|
||||||
Make sure your `run.yaml` file has the inference provider pointing to the TGI server endpoint serving your model.
|
|
||||||
```
|
|
||||||
inference:
|
|
||||||
- provider_id: tgi0
|
|
||||||
provider_type: remote::tgi
|
|
||||||
config:
|
|
||||||
url: http://127.0.0.1:5009
|
|
||||||
```
|
|
||||||
```
|
|
||||||
|
|
||||||
Run `llama model list` to see the available models to download, and `llama model download` to download the checkpoints.
|
|
||||||
:::
|
|
||||||
|
|
||||||
:::{tab-item} ollama
|
|
||||||
You can use ollama for managing model downloads.
|
|
||||||
|
|
||||||
```
|
|
||||||
ollama pull llama3.1:8b-instruct-fp16
|
|
||||||
ollama pull llama3.1:70b-instruct-fp16
|
|
||||||
```
|
|
||||||
|
|
||||||
> Please check the [OLLAMA_SUPPORTED_MODELS](https://github.com/meta-llama/llama-stack/blob/main/llama_stack/providers.remote/inference/ollama/ollama.py) for the supported Ollama models.
|
|
||||||
|
|
||||||
|
|
||||||
To serve a new model with `ollama`
|
|
||||||
```
|
|
||||||
ollama run <model_name>
|
|
||||||
```
|
|
||||||
|
|
||||||
To make sure that the model is being served correctly, run `ollama ps` to get a list of models being served by ollama.
|
|
||||||
```
|
|
||||||
$ ollama ps
|
|
||||||
|
|
||||||
NAME ID SIZE PROCESSOR UNTIL
|
|
||||||
llama3.1:8b-instruct-fp16 4aacac419454 17 GB 100% GPU 4 minutes from now
|
|
||||||
```
|
|
||||||
|
|
||||||
To verify that the model served by ollama is correctly connected to Llama Stack server
|
|
||||||
```
|
|
||||||
$ llama-stack-client models list
|
|
||||||
+----------------------+----------------------+---------------+-----------------------------------------------+
|
|
||||||
| identifier | llama_model | provider_id | metadata |
|
|
||||||
+======================+======================+===============+===============================================+
|
|
||||||
| Llama3.1-8B-Instruct | Llama3.1-8B-Instruct | ollama0 | {'ollama_model': 'llama3.1:8b-instruct-fp16'} |
|
|
||||||
+----------------------+----------------------+---------------+-----------------------------------------------+
|
|
||||||
```
|
|
||||||
:::
|
|
||||||
|
|
||||||
:::{tab-item} together
|
|
||||||
Use `llama-stack-client models list` to check the available models served by together.
|
|
||||||
|
|
||||||
```
|
|
||||||
$ llama-stack-client models list
|
|
||||||
+------------------------------+------------------------------+---------------+------------+
|
|
||||||
| identifier | llama_model | provider_id | metadata |
|
|
||||||
+==============================+==============================+===============+============+
|
|
||||||
| Llama3.1-8B-Instruct | Llama3.1-8B-Instruct | together0 | {} |
|
|
||||||
+------------------------------+------------------------------+---------------+------------+
|
|
||||||
| Llama3.1-70B-Instruct | Llama3.1-70B-Instruct | together0 | {} |
|
|
||||||
+------------------------------+------------------------------+---------------+------------+
|
|
||||||
| Llama3.1-405B-Instruct | Llama3.1-405B-Instruct | together0 | {} |
|
|
||||||
+------------------------------+------------------------------+---------------+------------+
|
|
||||||
| Llama3.2-3B-Instruct | Llama3.2-3B-Instruct | together0 | {} |
|
|
||||||
+------------------------------+------------------------------+---------------+------------+
|
|
||||||
| Llama3.2-11B-Vision-Instruct | Llama3.2-11B-Vision-Instruct | together0 | {} |
|
|
||||||
+------------------------------+------------------------------+---------------+------------+
|
|
||||||
| Llama3.2-90B-Vision-Instruct | Llama3.2-90B-Vision-Instruct | together0 | {} |
|
|
||||||
+------------------------------+------------------------------+---------------+------------+
|
|
||||||
```
|
|
||||||
:::
|
:::
|
||||||
|
|
||||||
:::{tab-item} fireworks
|
:::{tab-item} fireworks
|
||||||
Use `llama-stack-client models list` to check the available models served by Fireworks.
|
- [Start Fireworks Distribution](https://llama-stack.readthedocs.io/en/latest/getting_started/distributions/self_hosted_distro/fireworks.html)
|
||||||
```
|
|
||||||
$ llama-stack-client models list
|
|
||||||
+------------------------------+------------------------------+---------------+------------+
|
|
||||||
| identifier | llama_model | provider_id | metadata |
|
|
||||||
+==============================+==============================+===============+============+
|
|
||||||
| Llama3.1-8B-Instruct | Llama3.1-8B-Instruct | fireworks0 | {} |
|
|
||||||
+------------------------------+------------------------------+---------------+------------+
|
|
||||||
| Llama3.1-70B-Instruct | Llama3.1-70B-Instruct | fireworks0 | {} |
|
|
||||||
+------------------------------+------------------------------+---------------+------------+
|
|
||||||
| Llama3.1-405B-Instruct | Llama3.1-405B-Instruct | fireworks0 | {} |
|
|
||||||
+------------------------------+------------------------------+---------------+------------+
|
|
||||||
| Llama3.2-1B-Instruct | Llama3.2-1B-Instruct | fireworks0 | {} |
|
|
||||||
+------------------------------+------------------------------+---------------+------------+
|
|
||||||
| Llama3.2-3B-Instruct | Llama3.2-3B-Instruct | fireworks0 | {} |
|
|
||||||
+------------------------------+------------------------------+---------------+------------+
|
|
||||||
| Llama3.2-11B-Vision-Instruct | Llama3.2-11B-Vision-Instruct | fireworks0 | {} |
|
|
||||||
+------------------------------+------------------------------+---------------+------------+
|
|
||||||
| Llama3.2-90B-Vision-Instruct | Llama3.2-90B-Vision-Instruct | fireworks0 | {} |
|
|
||||||
+------------------------------+------------------------------+---------------+------------+
|
|
||||||
```
|
|
||||||
:::
|
:::
|
||||||
|
|
||||||
::::
|
::::
|
||||||
|
|
||||||
|
|
||||||
##### Troubleshooting
|
##### Troubleshooting
|
||||||
- If you encounter any issues, search through our [GitHub Issues](https://github.com/meta-llama/llama-stack/issues), or file an new issue.
|
- If you encounter any issues, search through our [GitHub Issues](https://github.com/meta-llama/llama-stack/issues), or file an new issue.
|
||||||
- Use `--port <PORT>` flag to use a different port number. For docker run, update the `-p <PORT>:<PORT>` flag.
|
- Use `--port <PORT>` flag to use a different port number. For docker run, update the `-p <PORT>:<PORT>` flag.
|
||||||
|
@ -535,10 +147,10 @@ $ llama-stack-client models list
|
||||||
Once the server is set up, we can test it with a client to verify it's working correctly. The following command will send a chat completion request to the server's `/inference/chat_completion` API:
|
Once the server is set up, we can test it with a client to verify it's working correctly. The following command will send a chat completion request to the server's `/inference/chat_completion` API:
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
$ curl http://localhost:5000/inference/chat_completion \
|
$ curl http://localhost:5000/alpha/inference/chat-completion \
|
||||||
-H "Content-Type: application/json" \
|
-H "Content-Type: application/json" \
|
||||||
-d '{
|
-d '{
|
||||||
"model_id": "Llama3.1-8B-Instruct",
|
"model_id": "meta-llama/Llama-3.1-8B-Instruct",
|
||||||
"messages": [
|
"messages": [
|
||||||
{"role": "system", "content": "You are a helpful assistant."},
|
{"role": "system", "content": "You are a helpful assistant."},
|
||||||
{"role": "user", "content": "Write me a 2 sentence poem about the moon"}
|
{"role": "user", "content": "Write me a 2 sentence poem about the moon"}
|
||||||
|
|
|
@ -22,14 +22,22 @@ If you're looking for more specific topics like tool calling or agent setup, we
|
||||||
- Download and unzip `Ollama-darwin.zip`.
|
- Download and unzip `Ollama-darwin.zip`.
|
||||||
- Run the `Ollama` application.
|
- Run the `Ollama` application.
|
||||||
|
|
||||||
2. **Download the Ollama CLI**:
|
1. **Download the Ollama CLI**:
|
||||||
- Ensure you have the `ollama` command line tool by downloading and installing it from the same website.
|
- Ensure you have the `ollama` command line tool by downloading and installing it from the same website.
|
||||||
|
|
||||||
3. **Verify Installation**:
|
1. **Start ollama server**:
|
||||||
|
- Open the terminal and run:
|
||||||
|
```
|
||||||
|
ollama serve
|
||||||
|
```
|
||||||
|
|
||||||
|
1. **Run the model**:
|
||||||
- Open the terminal and run:
|
- Open the terminal and run:
|
||||||
```bash
|
```bash
|
||||||
ollama run llama3.2:1b
|
ollama run llama3.2:3b-instruct-fp16
|
||||||
```
|
```
|
||||||
|
**Note**: The supported models for llama stack for now is listed in [here](https://github.com/meta-llama/llama-stack/blob/main/llama_stack/providers/remote/inference/ollama/ollama.py#L43)
|
||||||
|
|
||||||
|
|
||||||
---
|
---
|
||||||
|
|
||||||
|
@ -84,6 +92,8 @@ If you're looking for more specific topics like tool calling or agent setup, we
|
||||||
```bash
|
```bash
|
||||||
llama stack run /path/to/your/distro/llamastack-ollama/ollama-run.yaml --port 5050
|
llama stack run /path/to/your/distro/llamastack-ollama/ollama-run.yaml --port 5050
|
||||||
```
|
```
|
||||||
|
Note:
|
||||||
|
1. Everytime you run a new model with `ollama run`, you will need to restart the llama stack. Otherwise it won't see the new model
|
||||||
|
|
||||||
The server will start and listen on `http://localhost:5050`.
|
The server will start and listen on `http://localhost:5050`.
|
||||||
|
|
||||||
|
@ -97,7 +107,7 @@ After setting up the server, open a new terminal window and verify it's working
|
||||||
curl http://localhost:5050/inference/chat_completion \
|
curl http://localhost:5050/inference/chat_completion \
|
||||||
-H "Content-Type: application/json" \
|
-H "Content-Type: application/json" \
|
||||||
-d '{
|
-d '{
|
||||||
"model": "llama3.2:1b",
|
"model": "Llama3.2-3B-Instruct",
|
||||||
"messages": [
|
"messages": [
|
||||||
{"role": "system", "content": "You are a helpful assistant."},
|
{"role": "system", "content": "You are a helpful assistant."},
|
||||||
{"role": "user", "content": "Write me a 2-sentence poem about the moon"}
|
{"role": "user", "content": "Write me a 2-sentence poem about the moon"}
|
||||||
|
@ -106,6 +116,8 @@ curl http://localhost:5050/inference/chat_completion \
|
||||||
}'
|
}'
|
||||||
```
|
```
|
||||||
|
|
||||||
|
You can check the available models with the command `llama-stack-client models list`.
|
||||||
|
|
||||||
**Expected Output:**
|
**Expected Output:**
|
||||||
```json
|
```json
|
||||||
{
|
{
|
||||||
|
|
|
@ -54,6 +54,7 @@ class ToolDefinitionCommon(BaseModel):
|
||||||
class SearchEngineType(Enum):
|
class SearchEngineType(Enum):
|
||||||
bing = "bing"
|
bing = "bing"
|
||||||
brave = "brave"
|
brave = "brave"
|
||||||
|
tavily = "tavily"
|
||||||
|
|
||||||
|
|
||||||
@json_schema_type
|
@json_schema_type
|
||||||
|
|
|
@ -380,6 +380,7 @@ def _hf_download(
|
||||||
|
|
||||||
def _meta_download(
|
def _meta_download(
|
||||||
model: "Model",
|
model: "Model",
|
||||||
|
model_id: str,
|
||||||
meta_url: str,
|
meta_url: str,
|
||||||
info: "LlamaDownloadInfo",
|
info: "LlamaDownloadInfo",
|
||||||
max_concurrent_downloads: int,
|
max_concurrent_downloads: int,
|
||||||
|
@ -405,8 +406,15 @@ def _meta_download(
|
||||||
downloader = ParallelDownloader(max_concurrent_downloads=max_concurrent_downloads)
|
downloader = ParallelDownloader(max_concurrent_downloads=max_concurrent_downloads)
|
||||||
asyncio.run(downloader.download_all(tasks))
|
asyncio.run(downloader.download_all(tasks))
|
||||||
|
|
||||||
print(f"\nSuccessfully downloaded model to {output_dir}")
|
cprint(f"\nSuccessfully downloaded model to {output_dir}", "green")
|
||||||
cprint(f"\nMD5 Checksums are at: {output_dir / 'checklist.chk'}", "white")
|
cprint(
|
||||||
|
f"\nView MD5 checksum files at: {output_dir / 'checklist.chk'}",
|
||||||
|
"white",
|
||||||
|
)
|
||||||
|
cprint(
|
||||||
|
f"\n[Optionally] To run MD5 checksums, use the following command: llama model verify-download --model-id {model_id}",
|
||||||
|
"yellow",
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
class ModelEntry(BaseModel):
|
class ModelEntry(BaseModel):
|
||||||
|
@ -512,7 +520,7 @@ def run_download_cmd(args: argparse.Namespace, parser: argparse.ArgumentParser):
|
||||||
)
|
)
|
||||||
if "llamameta.net" not in meta_url:
|
if "llamameta.net" not in meta_url:
|
||||||
parser.error("Invalid Meta URL provided")
|
parser.error("Invalid Meta URL provided")
|
||||||
_meta_download(model, meta_url, info, args.max_parallel)
|
_meta_download(model, model_id, meta_url, info, args.max_parallel)
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
parser.error(f"Download failed: {str(e)}")
|
parser.error(f"Download failed: {str(e)}")
|
||||||
|
|
|
@ -9,6 +9,7 @@
|
||||||
LLAMA_MODELS_DIR=${LLAMA_MODELS_DIR:-}
|
LLAMA_MODELS_DIR=${LLAMA_MODELS_DIR:-}
|
||||||
LLAMA_STACK_DIR=${LLAMA_STACK_DIR:-}
|
LLAMA_STACK_DIR=${LLAMA_STACK_DIR:-}
|
||||||
TEST_PYPI_VERSION=${TEST_PYPI_VERSION:-}
|
TEST_PYPI_VERSION=${TEST_PYPI_VERSION:-}
|
||||||
|
BUILD_PLATFORM=${BUILD_PLATFORM:-}
|
||||||
|
|
||||||
if [ "$#" -lt 4 ]; then
|
if [ "$#" -lt 4 ]; then
|
||||||
echo "Usage: $0 <build_name> <docker_base> <pip_dependencies> [<special_pip_deps>]" >&2
|
echo "Usage: $0 <build_name> <docker_base> <pip_dependencies> [<special_pip_deps>]" >&2
|
||||||
|
@ -96,7 +97,7 @@ else
|
||||||
add_to_docker "RUN pip install fastapi libcst"
|
add_to_docker "RUN pip install fastapi libcst"
|
||||||
add_to_docker <<EOF
|
add_to_docker <<EOF
|
||||||
RUN pip install --no-cache --extra-index-url https://test.pypi.org/simple/ \
|
RUN pip install --no-cache --extra-index-url https://test.pypi.org/simple/ \
|
||||||
llama-models==$TEST_PYPI_VERSION llama-stack==$TEST_PYPI_VERSION
|
llama-models==$TEST_PYPI_VERSION llama-stack-client==$TEST_PYPI_VERSION llama-stack==$TEST_PYPI_VERSION
|
||||||
EOF
|
EOF
|
||||||
else
|
else
|
||||||
add_to_docker "RUN pip install --no-cache llama-stack"
|
add_to_docker "RUN pip install --no-cache llama-stack"
|
||||||
|
@ -116,7 +117,6 @@ RUN pip install --no-cache $models_mount
|
||||||
EOF
|
EOF
|
||||||
fi
|
fi
|
||||||
|
|
||||||
|
|
||||||
add_to_docker <<EOF
|
add_to_docker <<EOF
|
||||||
|
|
||||||
# This would be good in production but for debugging flexibility lets not add it right now
|
# This would be good in production but for debugging flexibility lets not add it right now
|
||||||
|
@ -158,7 +158,9 @@ image_tag="$image_name:$version_tag"
|
||||||
|
|
||||||
# Detect platform architecture
|
# Detect platform architecture
|
||||||
ARCH=$(uname -m)
|
ARCH=$(uname -m)
|
||||||
if [ "$ARCH" = "arm64" ] || [ "$ARCH" = "aarch64" ]; then
|
if [ -n "$BUILD_PLATFORM" ]; then
|
||||||
|
PLATFORM="--platform $BUILD_PLATFORM"
|
||||||
|
elif [ "$ARCH" = "arm64" ] || [ "$ARCH" = "aarch64" ]; then
|
||||||
PLATFORM="--platform linux/arm64"
|
PLATFORM="--platform linux/arm64"
|
||||||
elif [ "$ARCH" = "x86_64" ]; then
|
elif [ "$ARCH" = "x86_64" ]; then
|
||||||
PLATFORM="--platform linux/amd64"
|
PLATFORM="--platform linux/amd64"
|
||||||
|
|
|
@ -15,6 +15,8 @@ import httpx
|
||||||
from pydantic import BaseModel, parse_obj_as
|
from pydantic import BaseModel, parse_obj_as
|
||||||
from termcolor import cprint
|
from termcolor import cprint
|
||||||
|
|
||||||
|
from llama_stack.apis.version import LLAMA_STACK_API_VERSION
|
||||||
|
|
||||||
from llama_stack.providers.datatypes import RemoteProviderConfig
|
from llama_stack.providers.datatypes import RemoteProviderConfig
|
||||||
|
|
||||||
_CLIENT_CLASSES = {}
|
_CLIENT_CLASSES = {}
|
||||||
|
@ -117,7 +119,7 @@ def create_api_client_class(protocol) -> Type:
|
||||||
break
|
break
|
||||||
kwargs[param.name] = args[i]
|
kwargs[param.name] = args[i]
|
||||||
|
|
||||||
url = f"{self.base_url}{webmethod.route}"
|
url = f"{self.base_url}/{LLAMA_STACK_API_VERSION}/{webmethod.route.lstrip('/')}"
|
||||||
|
|
||||||
def convert(value):
|
def convert(value):
|
||||||
if isinstance(value, list):
|
if isinstance(value, list):
|
||||||
|
|
|
@ -9,6 +9,8 @@ from typing import Dict, List
|
||||||
|
|
||||||
from pydantic import BaseModel
|
from pydantic import BaseModel
|
||||||
|
|
||||||
|
from llama_stack.apis.version import LLAMA_STACK_API_VERSION
|
||||||
|
|
||||||
from llama_stack.distribution.resolver import api_protocol_map
|
from llama_stack.distribution.resolver import api_protocol_map
|
||||||
|
|
||||||
from llama_stack.providers.datatypes import Api
|
from llama_stack.providers.datatypes import Api
|
||||||
|
@ -33,7 +35,7 @@ def get_all_api_endpoints() -> Dict[Api, List[ApiEndpoint]]:
|
||||||
continue
|
continue
|
||||||
|
|
||||||
webmethod = method.__webmethod__
|
webmethod = method.__webmethod__
|
||||||
route = webmethod.route
|
route = f"/{LLAMA_STACK_API_VERSION}/{webmethod.route.lstrip('/')}"
|
||||||
|
|
||||||
if webmethod.method == "GET":
|
if webmethod.method == "GET":
|
||||||
method = "get"
|
method = "get"
|
||||||
|
|
|
@ -86,10 +86,13 @@ class PhotogenTool(SingleMessageBuiltinTool):
|
||||||
class SearchTool(SingleMessageBuiltinTool):
|
class SearchTool(SingleMessageBuiltinTool):
|
||||||
def __init__(self, engine: SearchEngineType, api_key: str, **kwargs) -> None:
|
def __init__(self, engine: SearchEngineType, api_key: str, **kwargs) -> None:
|
||||||
self.api_key = api_key
|
self.api_key = api_key
|
||||||
|
self.engine_type = engine
|
||||||
if engine == SearchEngineType.bing:
|
if engine == SearchEngineType.bing:
|
||||||
self.engine = BingSearch(api_key, **kwargs)
|
self.engine = BingSearch(api_key, **kwargs)
|
||||||
elif engine == SearchEngineType.brave:
|
elif engine == SearchEngineType.brave:
|
||||||
self.engine = BraveSearch(api_key, **kwargs)
|
self.engine = BraveSearch(api_key, **kwargs)
|
||||||
|
elif engine == SearchEngineType.tavily:
|
||||||
|
self.engine = TavilySearch(api_key, **kwargs)
|
||||||
else:
|
else:
|
||||||
raise ValueError(f"Unknown search engine: {engine}")
|
raise ValueError(f"Unknown search engine: {engine}")
|
||||||
|
|
||||||
|
@ -257,6 +260,21 @@ class BraveSearch:
|
||||||
return {"query": query, "top_k": clean_response}
|
return {"query": query, "top_k": clean_response}
|
||||||
|
|
||||||
|
|
||||||
|
class TavilySearch:
|
||||||
|
def __init__(self, api_key: str) -> None:
|
||||||
|
self.api_key = api_key
|
||||||
|
|
||||||
|
async def search(self, query: str) -> str:
|
||||||
|
response = requests.post(
|
||||||
|
"https://api.tavily.com/search",
|
||||||
|
json={"api_key": self.api_key, "query": query},
|
||||||
|
)
|
||||||
|
return json.dumps(self._clean_tavily_response(response.json()))
|
||||||
|
|
||||||
|
def _clean_tavily_response(self, search_response, top_k=3):
|
||||||
|
return {"query": search_response["query"], "top_k": search_response["results"]}
|
||||||
|
|
||||||
|
|
||||||
class WolframAlphaTool(SingleMessageBuiltinTool):
|
class WolframAlphaTool(SingleMessageBuiltinTool):
|
||||||
def __init__(self, api_key: str) -> None:
|
def __init__(self, api_key: str) -> None:
|
||||||
self.api_key = api_key
|
self.api_key = api_key
|
||||||
|
|
|
@ -50,11 +50,11 @@ MODEL_ALIASES = [
|
||||||
),
|
),
|
||||||
build_model_alias(
|
build_model_alias(
|
||||||
"fireworks/llama-v3p2-1b-instruct",
|
"fireworks/llama-v3p2-1b-instruct",
|
||||||
CoreModelId.llama3_2_3b_instruct.value,
|
CoreModelId.llama3_2_1b_instruct.value,
|
||||||
),
|
),
|
||||||
build_model_alias(
|
build_model_alias(
|
||||||
"fireworks/llama-v3p2-3b-instruct",
|
"fireworks/llama-v3p2-3b-instruct",
|
||||||
CoreModelId.llama3_2_11b_vision_instruct.value,
|
CoreModelId.llama3_2_3b_instruct.value,
|
||||||
),
|
),
|
||||||
build_model_alias(
|
build_model_alias(
|
||||||
"fireworks/llama-v3p2-11b-vision-instruct",
|
"fireworks/llama-v3p2-11b-vision-instruct",
|
||||||
|
@ -214,10 +214,10 @@ class FireworksInferenceAdapter(
|
||||||
|
|
||||||
async def _to_async_generator():
|
async def _to_async_generator():
|
||||||
if "messages" in params:
|
if "messages" in params:
|
||||||
stream = await self._get_client().chat.completions.acreate(**params)
|
stream = self._get_client().chat.completions.acreate(**params)
|
||||||
else:
|
else:
|
||||||
stream = self._get_client().completion.create(**params)
|
stream = self._get_client().completion.acreate(**params)
|
||||||
for chunk in stream:
|
async for chunk in stream:
|
||||||
yield chunk
|
yield chunk
|
||||||
|
|
||||||
stream = _to_async_generator()
|
stream = _to_async_generator()
|
||||||
|
|
|
@ -264,6 +264,7 @@ class _HfAdapter(Inference, ModelsProtocolPrivate):
|
||||||
|
|
||||||
class TGIAdapter(_HfAdapter):
|
class TGIAdapter(_HfAdapter):
|
||||||
async def initialize(self, config: TGIImplConfig) -> None:
|
async def initialize(self, config: TGIImplConfig) -> None:
|
||||||
|
print(f"Initializing TGI client with url={config.url}")
|
||||||
self.client = AsyncInferenceClient(model=config.url, token=config.api_token)
|
self.client = AsyncInferenceClient(model=config.url, token=config.api_token)
|
||||||
endpoint_info = await self.client.get_endpoint_info()
|
endpoint_info = await self.client.get_endpoint_info()
|
||||||
self.max_tokens = endpoint_info["max_total_tokens"]
|
self.max_tokens = endpoint_info["max_total_tokens"]
|
||||||
|
|
|
@ -53,6 +53,7 @@ class VLLMInferenceAdapter(Inference, ModelsProtocolPrivate):
|
||||||
self.client = None
|
self.client = None
|
||||||
|
|
||||||
async def initialize(self) -> None:
|
async def initialize(self) -> None:
|
||||||
|
print(f"Initializing VLLM client with base_url={self.config.url}")
|
||||||
self.client = OpenAI(base_url=self.config.url, api_key=self.config.api_token)
|
self.client = OpenAI(base_url=self.config.url, api_key=self.config.api_token)
|
||||||
|
|
||||||
async def shutdown(self) -> None:
|
async def shutdown(self) -> None:
|
||||||
|
|
|
@ -68,6 +68,73 @@ def query_attachment_messages():
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|
||||||
|
async def create_agent_turn_with_search_tool(
|
||||||
|
agents_stack: Dict[str, object],
|
||||||
|
search_query_messages: List[object],
|
||||||
|
common_params: Dict[str, str],
|
||||||
|
search_tool_definition: SearchToolDefinition,
|
||||||
|
) -> None:
|
||||||
|
"""
|
||||||
|
Create an agent turn with a search tool.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
agents_stack (Dict[str, object]): The agents stack.
|
||||||
|
search_query_messages (List[object]): The search query messages.
|
||||||
|
common_params (Dict[str, str]): The common parameters.
|
||||||
|
search_tool_definition (SearchToolDefinition): The search tool definition.
|
||||||
|
"""
|
||||||
|
|
||||||
|
# Create an agent with the search tool
|
||||||
|
agent_config = AgentConfig(
|
||||||
|
**{
|
||||||
|
**common_params,
|
||||||
|
"tools": [search_tool_definition],
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
|
agent_id, session_id = await create_agent_session(
|
||||||
|
agents_stack.impls[Api.agents], agent_config
|
||||||
|
)
|
||||||
|
turn_request = dict(
|
||||||
|
agent_id=agent_id,
|
||||||
|
session_id=session_id,
|
||||||
|
messages=search_query_messages,
|
||||||
|
stream=True,
|
||||||
|
)
|
||||||
|
|
||||||
|
turn_response = [
|
||||||
|
chunk
|
||||||
|
async for chunk in await agents_stack.impls[Api.agents].create_agent_turn(
|
||||||
|
**turn_request
|
||||||
|
)
|
||||||
|
]
|
||||||
|
|
||||||
|
assert len(turn_response) > 0
|
||||||
|
assert all(
|
||||||
|
isinstance(chunk, AgentTurnResponseStreamChunk) for chunk in turn_response
|
||||||
|
)
|
||||||
|
|
||||||
|
check_event_types(turn_response)
|
||||||
|
|
||||||
|
# Check for tool execution events
|
||||||
|
tool_execution_events = [
|
||||||
|
chunk
|
||||||
|
for chunk in turn_response
|
||||||
|
if isinstance(chunk.event.payload, AgentTurnResponseStepCompletePayload)
|
||||||
|
and chunk.event.payload.step_details.step_type == StepType.tool_execution.value
|
||||||
|
]
|
||||||
|
assert len(tool_execution_events) > 0, "No tool execution events found"
|
||||||
|
|
||||||
|
# Check the tool execution details
|
||||||
|
tool_execution = tool_execution_events[0].event.payload.step_details
|
||||||
|
assert isinstance(tool_execution, ToolExecutionStep)
|
||||||
|
assert len(tool_execution.tool_calls) > 0
|
||||||
|
assert tool_execution.tool_calls[0].tool_name == BuiltinTool.brave_search
|
||||||
|
assert len(tool_execution.tool_responses) > 0
|
||||||
|
|
||||||
|
check_turn_complete_event(turn_response, session_id, search_query_messages)
|
||||||
|
|
||||||
|
|
||||||
class TestAgents:
|
class TestAgents:
|
||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
async def test_agent_turns_with_safety(
|
async def test_agent_turns_with_safety(
|
||||||
|
@ -215,63 +282,34 @@ class TestAgents:
|
||||||
async def test_create_agent_turn_with_brave_search(
|
async def test_create_agent_turn_with_brave_search(
|
||||||
self, agents_stack, search_query_messages, common_params
|
self, agents_stack, search_query_messages, common_params
|
||||||
):
|
):
|
||||||
agents_impl = agents_stack.impls[Api.agents]
|
|
||||||
|
|
||||||
if "BRAVE_SEARCH_API_KEY" not in os.environ:
|
if "BRAVE_SEARCH_API_KEY" not in os.environ:
|
||||||
pytest.skip("BRAVE_SEARCH_API_KEY not set, skipping test")
|
pytest.skip("BRAVE_SEARCH_API_KEY not set, skipping test")
|
||||||
|
|
||||||
# Create an agent with Brave search tool
|
search_tool_definition = SearchToolDefinition(
|
||||||
agent_config = AgentConfig(
|
type=AgentTool.brave_search.value,
|
||||||
**{
|
api_key=os.environ["BRAVE_SEARCH_API_KEY"],
|
||||||
**common_params,
|
engine=SearchEngineType.brave,
|
||||||
"tools": [
|
)
|
||||||
SearchToolDefinition(
|
await create_agent_turn_with_search_tool(
|
||||||
type=AgentTool.brave_search.value,
|
agents_stack, search_query_messages, common_params, search_tool_definition
|
||||||
api_key=os.environ["BRAVE_SEARCH_API_KEY"],
|
|
||||||
engine=SearchEngineType.brave,
|
|
||||||
)
|
|
||||||
],
|
|
||||||
}
|
|
||||||
)
|
)
|
||||||
|
|
||||||
agent_id, session_id = await create_agent_session(agents_impl, agent_config)
|
@pytest.mark.asyncio
|
||||||
turn_request = dict(
|
async def test_create_agent_turn_with_tavily_search(
|
||||||
agent_id=agent_id,
|
self, agents_stack, search_query_messages, common_params
|
||||||
session_id=session_id,
|
):
|
||||||
messages=search_query_messages,
|
if "TAVILY_SEARCH_API_KEY" not in os.environ:
|
||||||
stream=True,
|
pytest.skip("TAVILY_SEARCH_API_KEY not set, skipping test")
|
||||||
|
|
||||||
|
search_tool_definition = SearchToolDefinition(
|
||||||
|
type=AgentTool.brave_search.value, # place holder only
|
||||||
|
api_key=os.environ["TAVILY_SEARCH_API_KEY"],
|
||||||
|
engine=SearchEngineType.tavily,
|
||||||
)
|
)
|
||||||
|
await create_agent_turn_with_search_tool(
|
||||||
turn_response = [
|
agents_stack, search_query_messages, common_params, search_tool_definition
|
||||||
chunk async for chunk in await agents_impl.create_agent_turn(**turn_request)
|
|
||||||
]
|
|
||||||
|
|
||||||
assert len(turn_response) > 0
|
|
||||||
assert all(
|
|
||||||
isinstance(chunk, AgentTurnResponseStreamChunk) for chunk in turn_response
|
|
||||||
)
|
)
|
||||||
|
|
||||||
check_event_types(turn_response)
|
|
||||||
|
|
||||||
# Check for tool execution events
|
|
||||||
tool_execution_events = [
|
|
||||||
chunk
|
|
||||||
for chunk in turn_response
|
|
||||||
if isinstance(chunk.event.payload, AgentTurnResponseStepCompletePayload)
|
|
||||||
and chunk.event.payload.step_details.step_type
|
|
||||||
== StepType.tool_execution.value
|
|
||||||
]
|
|
||||||
assert len(tool_execution_events) > 0, "No tool execution events found"
|
|
||||||
|
|
||||||
# Check the tool execution details
|
|
||||||
tool_execution = tool_execution_events[0].event.payload.step_details
|
|
||||||
assert isinstance(tool_execution, ToolExecutionStep)
|
|
||||||
assert len(tool_execution.tool_calls) > 0
|
|
||||||
assert tool_execution.tool_calls[0].tool_name == BuiltinTool.brave_search
|
|
||||||
assert len(tool_execution.tool_responses) > 0
|
|
||||||
|
|
||||||
check_turn_complete_event(turn_response, session_id, search_query_messages)
|
|
||||||
|
|
||||||
|
|
||||||
def check_event_types(turn_response):
|
def check_event_types(turn_response):
|
||||||
event_types = [chunk.event.payload.event_type for chunk in turn_response]
|
event_types = [chunk.event.payload.event_type for chunk in turn_response]
|
||||||
|
|
|
@ -25,7 +25,11 @@ from .utils import group_chunks
|
||||||
|
|
||||||
|
|
||||||
def get_expected_stop_reason(model: str):
|
def get_expected_stop_reason(model: str):
|
||||||
return StopReason.end_of_message if "Llama3.1" in model else StopReason.end_of_turn
|
return (
|
||||||
|
StopReason.end_of_message
|
||||||
|
if ("Llama3.1" in model or "Llama-3.1" in model)
|
||||||
|
else StopReason.end_of_turn
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
@pytest.fixture
|
@pytest.fixture
|
||||||
|
@ -34,7 +38,7 @@ def common_params(inference_model):
|
||||||
"tool_choice": ToolChoice.auto,
|
"tool_choice": ToolChoice.auto,
|
||||||
"tool_prompt_format": (
|
"tool_prompt_format": (
|
||||||
ToolPromptFormat.json
|
ToolPromptFormat.json
|
||||||
if "Llama3.1" in inference_model
|
if ("Llama3.1" in inference_model or "Llama-3.1" in inference_model)
|
||||||
else ToolPromptFormat.python_list
|
else ToolPromptFormat.python_list
|
||||||
),
|
),
|
||||||
}
|
}
|
||||||
|
|
|
@ -6,6 +6,7 @@
|
||||||
|
|
||||||
import concurrent.futures
|
import concurrent.futures
|
||||||
import importlib
|
import importlib
|
||||||
|
import json
|
||||||
import subprocess
|
import subprocess
|
||||||
import sys
|
import sys
|
||||||
from functools import partial
|
from functools import partial
|
||||||
|
@ -14,6 +15,11 @@ from typing import Iterator
|
||||||
|
|
||||||
from rich.progress import Progress, SpinnerColumn, TextColumn
|
from rich.progress import Progress, SpinnerColumn, TextColumn
|
||||||
|
|
||||||
|
from llama_stack.distribution.build import (
|
||||||
|
get_provider_dependencies,
|
||||||
|
SERVER_DEPENDENCIES,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
REPO_ROOT = Path(__file__).parent.parent.parent
|
REPO_ROOT = Path(__file__).parent.parent.parent
|
||||||
|
|
||||||
|
@ -67,6 +73,39 @@ def check_for_changes() -> bool:
|
||||||
return result.returncode != 0
|
return result.returncode != 0
|
||||||
|
|
||||||
|
|
||||||
|
def collect_template_dependencies(template_dir: Path) -> tuple[str, list[str]]:
|
||||||
|
try:
|
||||||
|
module_name = f"llama_stack.templates.{template_dir.name}"
|
||||||
|
module = importlib.import_module(module_name)
|
||||||
|
|
||||||
|
if template_func := getattr(module, "get_distribution_template", None):
|
||||||
|
template = template_func()
|
||||||
|
normal_deps, special_deps = get_provider_dependencies(template.providers)
|
||||||
|
# Combine all dependencies in order: normal deps, special deps, server deps
|
||||||
|
all_deps = sorted(list(set(normal_deps + SERVER_DEPENDENCIES))) + sorted(
|
||||||
|
list(set(special_deps))
|
||||||
|
)
|
||||||
|
|
||||||
|
return template.name, all_deps
|
||||||
|
except Exception:
|
||||||
|
return None, []
|
||||||
|
return None, []
|
||||||
|
|
||||||
|
|
||||||
|
def generate_dependencies_file():
|
||||||
|
templates_dir = REPO_ROOT / "llama_stack" / "templates"
|
||||||
|
distribution_deps = {}
|
||||||
|
|
||||||
|
for template_dir in find_template_dirs(templates_dir):
|
||||||
|
name, deps = collect_template_dependencies(template_dir)
|
||||||
|
if name:
|
||||||
|
distribution_deps[name] = deps
|
||||||
|
|
||||||
|
deps_file = REPO_ROOT / "distributions" / "dependencies.json"
|
||||||
|
with open(deps_file, "w") as f:
|
||||||
|
json.dump(distribution_deps, f, indent=2)
|
||||||
|
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
templates_dir = REPO_ROOT / "llama_stack" / "templates"
|
templates_dir = REPO_ROOT / "llama_stack" / "templates"
|
||||||
|
|
||||||
|
@ -88,6 +127,8 @@ def main():
|
||||||
list(executor.map(process_func, template_dirs))
|
list(executor.map(process_func, template_dirs))
|
||||||
progress.update(task, advance=len(template_dirs))
|
progress.update(task, advance=len(template_dirs))
|
||||||
|
|
||||||
|
generate_dependencies_file()
|
||||||
|
|
||||||
if check_for_changes():
|
if check_for_changes():
|
||||||
print(
|
print(
|
||||||
"Distribution template changes detected. Please commit the changes.",
|
"Distribution template changes detected. Please commit the changes.",
|
||||||
|
|
|
@ -57,11 +57,11 @@ models:
|
||||||
provider_id: null
|
provider_id: null
|
||||||
provider_model_id: fireworks/llama-v3p1-405b-instruct
|
provider_model_id: fireworks/llama-v3p1-405b-instruct
|
||||||
- metadata: {}
|
- metadata: {}
|
||||||
model_id: meta-llama/Llama-3.2-3B-Instruct
|
model_id: meta-llama/Llama-3.2-1B-Instruct
|
||||||
provider_id: null
|
provider_id: null
|
||||||
provider_model_id: fireworks/llama-v3p2-1b-instruct
|
provider_model_id: fireworks/llama-v3p2-1b-instruct
|
||||||
- metadata: {}
|
- metadata: {}
|
||||||
model_id: meta-llama/Llama-3.2-11B-Vision-Instruct
|
model_id: meta-llama/Llama-3.2-3B-Instruct
|
||||||
provider_id: null
|
provider_id: null
|
||||||
provider_model_id: fireworks/llama-v3p2-3b-instruct
|
provider_model_id: fireworks/llama-v3p2-3b-instruct
|
||||||
- metadata: {}
|
- metadata: {}
|
||||||
|
|
|
@ -2,7 +2,7 @@ version: '2'
|
||||||
name: tgi
|
name: tgi
|
||||||
distribution_spec:
|
distribution_spec:
|
||||||
description: Use (an external) TGI server for running LLM inference
|
description: Use (an external) TGI server for running LLM inference
|
||||||
docker_image: llamastack/distribution-tgi:test-0.0.52rc3
|
docker_image: null
|
||||||
providers:
|
providers:
|
||||||
inference:
|
inference:
|
||||||
- remote::tgi
|
- remote::tgi
|
||||||
|
|
|
@ -1,6 +1,6 @@
|
||||||
version: '2'
|
version: '2'
|
||||||
image_name: tgi
|
image_name: tgi
|
||||||
docker_image: llamastack/distribution-tgi:test-0.0.52rc3
|
docker_image: null
|
||||||
conda_env: tgi
|
conda_env: tgi
|
||||||
apis:
|
apis:
|
||||||
- agents
|
- agents
|
||||||
|
|
|
@ -1,6 +1,6 @@
|
||||||
version: '2'
|
version: '2'
|
||||||
image_name: tgi
|
image_name: tgi
|
||||||
docker_image: llamastack/distribution-tgi:test-0.0.52rc3
|
docker_image: null
|
||||||
conda_env: tgi
|
conda_env: tgi
|
||||||
apis:
|
apis:
|
||||||
- agents
|
- agents
|
||||||
|
|
|
@ -41,7 +41,7 @@ def get_distribution_template() -> DistributionTemplate:
|
||||||
name="tgi",
|
name="tgi",
|
||||||
distro_type="self_hosted",
|
distro_type="self_hosted",
|
||||||
description="Use (an external) TGI server for running LLM inference",
|
description="Use (an external) TGI server for running LLM inference",
|
||||||
docker_image="llamastack/distribution-tgi:test-0.0.52rc3",
|
docker_image=None,
|
||||||
template_path=Path(__file__).parent / "doc_template.md",
|
template_path=Path(__file__).parent / "doc_template.md",
|
||||||
providers=providers,
|
providers=providers,
|
||||||
default_models=[inference_model, safety_model],
|
default_models=[inference_model, safety_model],
|
||||||
|
|
|
@ -2,7 +2,8 @@ blobfile
|
||||||
fire
|
fire
|
||||||
httpx
|
httpx
|
||||||
huggingface-hub
|
huggingface-hub
|
||||||
llama-models>=0.0.50
|
llama-models>=0.0.53
|
||||||
|
llama-stack-client>=0.0.53
|
||||||
prompt-toolkit
|
prompt-toolkit
|
||||||
python-dotenv
|
python-dotenv
|
||||||
pydantic>=2
|
pydantic>=2
|
||||||
|
|
2
setup.py
2
setup.py
|
@ -16,7 +16,7 @@ def read_requirements():
|
||||||
|
|
||||||
setup(
|
setup(
|
||||||
name="llama_stack",
|
name="llama_stack",
|
||||||
version="0.0.50",
|
version="0.0.53",
|
||||||
author="Meta Llama",
|
author="Meta Llama",
|
||||||
author_email="llama-oss@meta.com",
|
author_email="llama-oss@meta.com",
|
||||||
description="Llama Stack",
|
description="Llama Stack",
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue