mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-08-03 01:03:59 +00:00
modify doc
This commit is contained in:
parent
486c0bc9c8
commit
85d0f5f528
7 changed files with 158 additions and 158 deletions
|
@ -1,9 +1,9 @@
|
||||||
{
|
{
|
||||||
"hf-serverless": [
|
"bedrock": [
|
||||||
"aiohttp",
|
|
||||||
"aiosqlite",
|
"aiosqlite",
|
||||||
"autoevals",
|
"autoevals",
|
||||||
"blobfile",
|
"blobfile",
|
||||||
|
"boto3",
|
||||||
"chardet",
|
"chardet",
|
||||||
"chromadb-client",
|
"chromadb-client",
|
||||||
"datasets",
|
"datasets",
|
||||||
|
@ -11,100 +11,6 @@
|
||||||
"fastapi",
|
"fastapi",
|
||||||
"fire",
|
"fire",
|
||||||
"httpx",
|
"httpx",
|
||||||
"huggingface_hub",
|
|
||||||
"matplotlib",
|
|
||||||
"nltk",
|
|
||||||
"numpy",
|
|
||||||
"openai",
|
|
||||||
"opentelemetry-exporter-otlp-proto-http",
|
|
||||||
"opentelemetry-sdk",
|
|
||||||
"pandas",
|
|
||||||
"pillow",
|
|
||||||
"psycopg2-binary",
|
|
||||||
"pypdf",
|
|
||||||
"redis",
|
|
||||||
"scikit-learn",
|
|
||||||
"scipy",
|
|
||||||
"sentencepiece",
|
|
||||||
"tqdm",
|
|
||||||
"transformers",
|
|
||||||
"uvicorn",
|
|
||||||
"sentence-transformers --no-deps",
|
|
||||||
"torch --index-url https://download.pytorch.org/whl/cpu"
|
|
||||||
],
|
|
||||||
"together": [
|
|
||||||
"aiosqlite",
|
|
||||||
"autoevals",
|
|
||||||
"blobfile",
|
|
||||||
"chardet",
|
|
||||||
"chromadb-client",
|
|
||||||
"datasets",
|
|
||||||
"faiss-cpu",
|
|
||||||
"fastapi",
|
|
||||||
"fire",
|
|
||||||
"httpx",
|
|
||||||
"matplotlib",
|
|
||||||
"nltk",
|
|
||||||
"numpy",
|
|
||||||
"openai",
|
|
||||||
"opentelemetry-exporter-otlp-proto-http",
|
|
||||||
"opentelemetry-sdk",
|
|
||||||
"pandas",
|
|
||||||
"pillow",
|
|
||||||
"psycopg2-binary",
|
|
||||||
"pypdf",
|
|
||||||
"redis",
|
|
||||||
"scikit-learn",
|
|
||||||
"scipy",
|
|
||||||
"sentencepiece",
|
|
||||||
"together",
|
|
||||||
"tqdm",
|
|
||||||
"transformers",
|
|
||||||
"uvicorn",
|
|
||||||
"sentence-transformers --no-deps",
|
|
||||||
"torch --index-url https://download.pytorch.org/whl/cpu"
|
|
||||||
],
|
|
||||||
"vllm-gpu": [
|
|
||||||
"aiosqlite",
|
|
||||||
"autoevals",
|
|
||||||
"blobfile",
|
|
||||||
"chardet",
|
|
||||||
"chromadb-client",
|
|
||||||
"datasets",
|
|
||||||
"faiss-cpu",
|
|
||||||
"fastapi",
|
|
||||||
"fire",
|
|
||||||
"httpx",
|
|
||||||
"matplotlib",
|
|
||||||
"nltk",
|
|
||||||
"numpy",
|
|
||||||
"openai",
|
|
||||||
"opentelemetry-exporter-otlp-proto-http",
|
|
||||||
"opentelemetry-sdk",
|
|
||||||
"pandas",
|
|
||||||
"pillow",
|
|
||||||
"psycopg2-binary",
|
|
||||||
"pypdf",
|
|
||||||
"redis",
|
|
||||||
"scikit-learn",
|
|
||||||
"scipy",
|
|
||||||
"sentencepiece",
|
|
||||||
"tqdm",
|
|
||||||
"transformers",
|
|
||||||
"uvicorn",
|
|
||||||
"vllm",
|
|
||||||
"sentence-transformers --no-deps",
|
|
||||||
"torch --index-url https://download.pytorch.org/whl/cpu"
|
|
||||||
],
|
|
||||||
"remote-vllm": [
|
|
||||||
"aiosqlite",
|
|
||||||
"blobfile",
|
|
||||||
"chardet",
|
|
||||||
"chromadb-client",
|
|
||||||
"faiss-cpu",
|
|
||||||
"fastapi",
|
|
||||||
"fire",
|
|
||||||
"httpx",
|
|
||||||
"matplotlib",
|
"matplotlib",
|
||||||
"nltk",
|
"nltk",
|
||||||
"numpy",
|
"numpy",
|
||||||
|
@ -157,7 +63,7 @@
|
||||||
"sentence-transformers --no-deps",
|
"sentence-transformers --no-deps",
|
||||||
"torch --index-url https://download.pytorch.org/whl/cpu"
|
"torch --index-url https://download.pytorch.org/whl/cpu"
|
||||||
],
|
],
|
||||||
"tgi": [
|
"hf-endpoint": [
|
||||||
"aiohttp",
|
"aiohttp",
|
||||||
"aiosqlite",
|
"aiosqlite",
|
||||||
"autoevals",
|
"autoevals",
|
||||||
|
@ -190,11 +96,11 @@
|
||||||
"sentence-transformers --no-deps",
|
"sentence-transformers --no-deps",
|
||||||
"torch --index-url https://download.pytorch.org/whl/cpu"
|
"torch --index-url https://download.pytorch.org/whl/cpu"
|
||||||
],
|
],
|
||||||
"bedrock": [
|
"hf-serverless": [
|
||||||
|
"aiohttp",
|
||||||
"aiosqlite",
|
"aiosqlite",
|
||||||
"autoevals",
|
"autoevals",
|
||||||
"blobfile",
|
"blobfile",
|
||||||
"boto3",
|
|
||||||
"chardet",
|
"chardet",
|
||||||
"chromadb-client",
|
"chromadb-client",
|
||||||
"datasets",
|
"datasets",
|
||||||
|
@ -202,6 +108,7 @@
|
||||||
"fastapi",
|
"fastapi",
|
||||||
"fire",
|
"fire",
|
||||||
"httpx",
|
"httpx",
|
||||||
|
"huggingface_hub",
|
||||||
"matplotlib",
|
"matplotlib",
|
||||||
"nltk",
|
"nltk",
|
||||||
"numpy",
|
"numpy",
|
||||||
|
@ -300,34 +207,6 @@
|
||||||
"sentence-transformers --no-deps",
|
"sentence-transformers --no-deps",
|
||||||
"torch --index-url https://download.pytorch.org/whl/cpu"
|
"torch --index-url https://download.pytorch.org/whl/cpu"
|
||||||
],
|
],
|
||||||
"cerebras": [
|
|
||||||
"aiosqlite",
|
|
||||||
"blobfile",
|
|
||||||
"cerebras_cloud_sdk",
|
|
||||||
"chardet",
|
|
||||||
"faiss-cpu",
|
|
||||||
"fastapi",
|
|
||||||
"fire",
|
|
||||||
"httpx",
|
|
||||||
"matplotlib",
|
|
||||||
"nltk",
|
|
||||||
"numpy",
|
|
||||||
"opentelemetry-exporter-otlp-proto-http",
|
|
||||||
"opentelemetry-sdk",
|
|
||||||
"pandas",
|
|
||||||
"pillow",
|
|
||||||
"psycopg2-binary",
|
|
||||||
"pypdf",
|
|
||||||
"redis",
|
|
||||||
"scikit-learn",
|
|
||||||
"scipy",
|
|
||||||
"sentencepiece",
|
|
||||||
"tqdm",
|
|
||||||
"transformers",
|
|
||||||
"uvicorn",
|
|
||||||
"sentence-transformers --no-deps",
|
|
||||||
"torch --index-url https://download.pytorch.org/whl/cpu"
|
|
||||||
],
|
|
||||||
"ollama": [
|
"ollama": [
|
||||||
"aiohttp",
|
"aiohttp",
|
||||||
"aiosqlite",
|
"aiosqlite",
|
||||||
|
@ -361,7 +240,7 @@
|
||||||
"sentence-transformers --no-deps",
|
"sentence-transformers --no-deps",
|
||||||
"torch --index-url https://download.pytorch.org/whl/cpu"
|
"torch --index-url https://download.pytorch.org/whl/cpu"
|
||||||
],
|
],
|
||||||
"hf-endpoint": [
|
"tgi": [
|
||||||
"aiohttp",
|
"aiohttp",
|
||||||
"aiosqlite",
|
"aiosqlite",
|
||||||
"autoevals",
|
"autoevals",
|
||||||
|
@ -393,5 +272,126 @@
|
||||||
"uvicorn",
|
"uvicorn",
|
||||||
"sentence-transformers --no-deps",
|
"sentence-transformers --no-deps",
|
||||||
"torch --index-url https://download.pytorch.org/whl/cpu"
|
"torch --index-url https://download.pytorch.org/whl/cpu"
|
||||||
|
],
|
||||||
|
"together": [
|
||||||
|
"aiosqlite",
|
||||||
|
"autoevals",
|
||||||
|
"blobfile",
|
||||||
|
"chardet",
|
||||||
|
"chromadb-client",
|
||||||
|
"datasets",
|
||||||
|
"faiss-cpu",
|
||||||
|
"fastapi",
|
||||||
|
"fire",
|
||||||
|
"httpx",
|
||||||
|
"matplotlib",
|
||||||
|
"nltk",
|
||||||
|
"numpy",
|
||||||
|
"openai",
|
||||||
|
"opentelemetry-exporter-otlp-proto-http",
|
||||||
|
"opentelemetry-sdk",
|
||||||
|
"pandas",
|
||||||
|
"pillow",
|
||||||
|
"psycopg2-binary",
|
||||||
|
"pypdf",
|
||||||
|
"redis",
|
||||||
|
"scikit-learn",
|
||||||
|
"scipy",
|
||||||
|
"sentencepiece",
|
||||||
|
"together",
|
||||||
|
"tqdm",
|
||||||
|
"transformers",
|
||||||
|
"uvicorn",
|
||||||
|
"sentence-transformers --no-deps",
|
||||||
|
"torch --index-url https://download.pytorch.org/whl/cpu"
|
||||||
|
],
|
||||||
|
"remote-vllm": [
|
||||||
|
"aiosqlite",
|
||||||
|
"blobfile",
|
||||||
|
"chardet",
|
||||||
|
"chromadb-client",
|
||||||
|
"faiss-cpu",
|
||||||
|
"fastapi",
|
||||||
|
"fire",
|
||||||
|
"httpx",
|
||||||
|
"matplotlib",
|
||||||
|
"nltk",
|
||||||
|
"numpy",
|
||||||
|
"openai",
|
||||||
|
"opentelemetry-exporter-otlp-proto-http",
|
||||||
|
"opentelemetry-sdk",
|
||||||
|
"pandas",
|
||||||
|
"pillow",
|
||||||
|
"psycopg2-binary",
|
||||||
|
"pypdf",
|
||||||
|
"redis",
|
||||||
|
"scikit-learn",
|
||||||
|
"scipy",
|
||||||
|
"sentencepiece",
|
||||||
|
"tqdm",
|
||||||
|
"transformers",
|
||||||
|
"uvicorn",
|
||||||
|
"sentence-transformers --no-deps",
|
||||||
|
"torch --index-url https://download.pytorch.org/whl/cpu"
|
||||||
|
],
|
||||||
|
"vllm-gpu": [
|
||||||
|
"aiosqlite",
|
||||||
|
"autoevals",
|
||||||
|
"blobfile",
|
||||||
|
"chardet",
|
||||||
|
"chromadb-client",
|
||||||
|
"datasets",
|
||||||
|
"faiss-cpu",
|
||||||
|
"fastapi",
|
||||||
|
"fire",
|
||||||
|
"httpx",
|
||||||
|
"matplotlib",
|
||||||
|
"nltk",
|
||||||
|
"numpy",
|
||||||
|
"openai",
|
||||||
|
"opentelemetry-exporter-otlp-proto-http",
|
||||||
|
"opentelemetry-sdk",
|
||||||
|
"pandas",
|
||||||
|
"pillow",
|
||||||
|
"psycopg2-binary",
|
||||||
|
"pypdf",
|
||||||
|
"redis",
|
||||||
|
"scikit-learn",
|
||||||
|
"scipy",
|
||||||
|
"sentencepiece",
|
||||||
|
"tqdm",
|
||||||
|
"transformers",
|
||||||
|
"uvicorn",
|
||||||
|
"vllm",
|
||||||
|
"sentence-transformers --no-deps",
|
||||||
|
"torch --index-url https://download.pytorch.org/whl/cpu"
|
||||||
|
],
|
||||||
|
"cerebras": [
|
||||||
|
"aiosqlite",
|
||||||
|
"blobfile",
|
||||||
|
"cerebras_cloud_sdk",
|
||||||
|
"chardet",
|
||||||
|
"faiss-cpu",
|
||||||
|
"fastapi",
|
||||||
|
"fire",
|
||||||
|
"httpx",
|
||||||
|
"matplotlib",
|
||||||
|
"nltk",
|
||||||
|
"numpy",
|
||||||
|
"opentelemetry-exporter-otlp-proto-http",
|
||||||
|
"opentelemetry-sdk",
|
||||||
|
"pandas",
|
||||||
|
"pillow",
|
||||||
|
"psycopg2-binary",
|
||||||
|
"pypdf",
|
||||||
|
"redis",
|
||||||
|
"scikit-learn",
|
||||||
|
"scipy",
|
||||||
|
"sentencepiece",
|
||||||
|
"tqdm",
|
||||||
|
"transformers",
|
||||||
|
"uvicorn",
|
||||||
|
"sentence-transformers --no-deps",
|
||||||
|
"torch --index-url https://download.pytorch.org/whl/cpu"
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
|
|
|
@ -31,9 +31,9 @@ Note that you need access to nvidia GPUs to run this distribution. This distribu
|
||||||
The following environment variables can be configured:
|
The following environment variables can be configured:
|
||||||
|
|
||||||
- `LLAMASTACK_PORT`: Port for the Llama Stack distribution server (default: `5001`)
|
- `LLAMASTACK_PORT`: Port for the Llama Stack distribution server (default: `5001`)
|
||||||
- `INFERENCE_MODEL`: Inference model loaded into the Meta Reference server (default: `meta-llama/Llama-3.2-3B-Instruct`)
|
- `INFERENCE_MODEL`: Inference model loaded into the Meta Reference server (default: `Llama3.2-3B-Instruct`)
|
||||||
- `INFERENCE_CHECKPOINT_DIR`: Directory containing the Meta Reference model checkpoint (default: `null`)
|
- `INFERENCE_CHECKPOINT_DIR`: Directory containing the Meta Reference model checkpoint (default: `null`)
|
||||||
- `SAFETY_MODEL`: Name of the safety (Llama-Guard) model to use (default: `meta-llama/Llama-Guard-3-1B`)
|
- `SAFETY_MODEL`: Name of the safety (Llama-Guard) model to use (default: `Llama-Guard-3-1B`)
|
||||||
- `SAFETY_CHECKPOINT_DIR`: Directory containing the Llama-Guard model checkpoint (default: `null`)
|
- `SAFETY_CHECKPOINT_DIR`: Directory containing the Llama-Guard model checkpoint (default: `null`)
|
||||||
|
|
||||||
|
|
||||||
|
@ -63,7 +63,7 @@ docker run \
|
||||||
-v ~/.llama:/root/.llama \
|
-v ~/.llama:/root/.llama \
|
||||||
llamastack/distribution-meta-reference-gpu \
|
llamastack/distribution-meta-reference-gpu \
|
||||||
--port $LLAMA_STACK_PORT \
|
--port $LLAMA_STACK_PORT \
|
||||||
--env INFERENCE_MODEL=meta-llama/Llama-3.2-3B-Instruct
|
--env INFERENCE_MODEL=Llama3.2-3B-Instruct
|
||||||
```
|
```
|
||||||
|
|
||||||
If you are using Llama Stack Safety / Shield APIs, use:
|
If you are using Llama Stack Safety / Shield APIs, use:
|
||||||
|
@ -75,8 +75,8 @@ docker run \
|
||||||
-v ~/.llama:/root/.llama \
|
-v ~/.llama:/root/.llama \
|
||||||
llamastack/distribution-meta-reference-gpu \
|
llamastack/distribution-meta-reference-gpu \
|
||||||
--port $LLAMA_STACK_PORT \
|
--port $LLAMA_STACK_PORT \
|
||||||
--env INFERENCE_MODEL=meta-llama/Llama-3.2-3B-Instruct \
|
--env INFERENCE_MODEL=Llama3.2-3B-Instruct \
|
||||||
--env SAFETY_MODEL=meta-llama/Llama-Guard-3-1B
|
--env SAFETY_MODEL=Llama-Guard-3-1B
|
||||||
```
|
```
|
||||||
|
|
||||||
### Via Conda
|
### Via Conda
|
||||||
|
@ -87,7 +87,7 @@ Make sure you have done `pip install llama-stack` and have the Llama Stack CLI a
|
||||||
llama stack build --template meta-reference-gpu --image-type conda
|
llama stack build --template meta-reference-gpu --image-type conda
|
||||||
llama stack run distributions/meta-reference-gpu/run.yaml \
|
llama stack run distributions/meta-reference-gpu/run.yaml \
|
||||||
--port 5001 \
|
--port 5001 \
|
||||||
--env INFERENCE_MODEL=meta-llama/Llama-3.2-3B-Instruct
|
--env INFERENCE_MODEL=Llama3.2-3B-Instruct
|
||||||
```
|
```
|
||||||
|
|
||||||
If you are using Llama Stack Safety / Shield APIs, use:
|
If you are using Llama Stack Safety / Shield APIs, use:
|
||||||
|
@ -95,6 +95,6 @@ If you are using Llama Stack Safety / Shield APIs, use:
|
||||||
```bash
|
```bash
|
||||||
llama stack run distributions/meta-reference-gpu/run-with-safety.yaml \
|
llama stack run distributions/meta-reference-gpu/run-with-safety.yaml \
|
||||||
--port 5001 \
|
--port 5001 \
|
||||||
--env INFERENCE_MODEL=meta-llama/Llama-3.2-3B-Instruct \
|
--env INFERENCE_MODEL=Llama3.2-3B-Instruct \
|
||||||
--env SAFETY_MODEL=meta-llama/Llama-Guard-3-1B
|
--env SAFETY_MODEL=meta-Llama-Guard-3-1B
|
||||||
```
|
```
|
||||||
|
|
|
@ -33,7 +33,7 @@ Note that you need access to nvidia GPUs to run this distribution. This distribu
|
||||||
The following environment variables can be configured:
|
The following environment variables can be configured:
|
||||||
|
|
||||||
- `LLAMASTACK_PORT`: Port for the Llama Stack distribution server (default: `5001`)
|
- `LLAMASTACK_PORT`: Port for the Llama Stack distribution server (default: `5001`)
|
||||||
- `INFERENCE_MODEL`: Inference model loaded into the Meta Reference server (default: `meta-llama/Llama-3.2-3B-Instruct`)
|
- `INFERENCE_MODEL`: Inference model loaded into the Meta Reference server (default: `Llama3.2-3B-Instruct`)
|
||||||
- `INFERENCE_CHECKPOINT_DIR`: Directory containing the Meta Reference model checkpoint (default: `null`)
|
- `INFERENCE_CHECKPOINT_DIR`: Directory containing the Meta Reference model checkpoint (default: `null`)
|
||||||
|
|
||||||
|
|
||||||
|
@ -63,7 +63,7 @@ docker run \
|
||||||
-v ~/.llama:/root/.llama \
|
-v ~/.llama:/root/.llama \
|
||||||
llamastack/distribution-meta-reference-quantized-gpu \
|
llamastack/distribution-meta-reference-quantized-gpu \
|
||||||
--port $LLAMA_STACK_PORT \
|
--port $LLAMA_STACK_PORT \
|
||||||
--env INFERENCE_MODEL=meta-llama/Llama-3.2-3B-Instruct
|
--env INFERENCE_MODEL=Llama3.2-3B-Instruct
|
||||||
```
|
```
|
||||||
|
|
||||||
If you are using Llama Stack Safety / Shield APIs, use:
|
If you are using Llama Stack Safety / Shield APIs, use:
|
||||||
|
@ -75,8 +75,8 @@ docker run \
|
||||||
-v ~/.llama:/root/.llama \
|
-v ~/.llama:/root/.llama \
|
||||||
llamastack/distribution-meta-reference-quantized-gpu \
|
llamastack/distribution-meta-reference-quantized-gpu \
|
||||||
--port $LLAMA_STACK_PORT \
|
--port $LLAMA_STACK_PORT \
|
||||||
--env INFERENCE_MODEL=meta-llama/Llama-3.2-3B-Instruct \
|
--env INFERENCE_MODEL=Llama3.2-3B-Instruct \
|
||||||
--env SAFETY_MODEL=meta-llama/Llama-Guard-3-1B
|
--env SAFETY_MODEL=meta-Llama-Guard-3-1B
|
||||||
```
|
```
|
||||||
|
|
||||||
### Via Conda
|
### Via Conda
|
||||||
|
@ -87,7 +87,7 @@ Make sure you have done `pip install llama-stack` and have the Llama Stack CLI a
|
||||||
llama stack build --template meta-reference-quantized-gpu --image-type conda
|
llama stack build --template meta-reference-quantized-gpu --image-type conda
|
||||||
llama stack run distributions/meta-reference-quantized-gpu/run.yaml \
|
llama stack run distributions/meta-reference-quantized-gpu/run.yaml \
|
||||||
--port $LLAMA_STACK_PORT \
|
--port $LLAMA_STACK_PORT \
|
||||||
--env INFERENCE_MODEL=meta-llama/Llama-3.2-3B-Instruct
|
--env INFERENCE_MODEL=Llama3.2-3B-Instruct
|
||||||
```
|
```
|
||||||
|
|
||||||
If you are using Llama Stack Safety / Shield APIs, use:
|
If you are using Llama Stack Safety / Shield APIs, use:
|
||||||
|
@ -95,6 +95,6 @@ If you are using Llama Stack Safety / Shield APIs, use:
|
||||||
```bash
|
```bash
|
||||||
llama stack run distributions/meta-reference-quantized-gpu/run-with-safety.yaml \
|
llama stack run distributions/meta-reference-quantized-gpu/run-with-safety.yaml \
|
||||||
--port $LLAMA_STACK_PORT \
|
--port $LLAMA_STACK_PORT \
|
||||||
--env INFERENCE_MODEL=meta-llama/Llama-3.2-3B-Instruct \
|
--env INFERENCE_MODEL=Llama3.2-3B-Instruct \
|
||||||
--env SAFETY_MODEL=meta-llama/Llama-Guard-3-1B
|
--env SAFETY_MODEL=Llama-Guard-3-1B
|
||||||
```
|
```
|
||||||
|
|
|
@ -53,7 +53,7 @@ docker run \
|
||||||
-v ~/.llama:/root/.llama \
|
-v ~/.llama:/root/.llama \
|
||||||
llamastack/distribution-{{ name }} \
|
llamastack/distribution-{{ name }} \
|
||||||
--port $LLAMA_STACK_PORT \
|
--port $LLAMA_STACK_PORT \
|
||||||
--env INFERENCE_MODEL=meta-llama/Llama-3.2-3B-Instruct
|
--env INFERENCE_MODEL=Llama3.2-3B-Instruct
|
||||||
```
|
```
|
||||||
|
|
||||||
If you are using Llama Stack Safety / Shield APIs, use:
|
If you are using Llama Stack Safety / Shield APIs, use:
|
||||||
|
@ -65,8 +65,8 @@ docker run \
|
||||||
-v ~/.llama:/root/.llama \
|
-v ~/.llama:/root/.llama \
|
||||||
llamastack/distribution-{{ name }} \
|
llamastack/distribution-{{ name }} \
|
||||||
--port $LLAMA_STACK_PORT \
|
--port $LLAMA_STACK_PORT \
|
||||||
--env INFERENCE_MODEL=meta-llama/Llama-3.2-3B-Instruct \
|
--env INFERENCE_MODEL=Llama3.2-3B-Instruct \
|
||||||
--env SAFETY_MODEL=meta-llama/Llama-Guard-3-1B
|
--env SAFETY_MODEL=Llama-Guard-3-1B
|
||||||
```
|
```
|
||||||
|
|
||||||
### Via Conda
|
### Via Conda
|
||||||
|
@ -77,7 +77,7 @@ Make sure you have done `pip install llama-stack` and have the Llama Stack CLI a
|
||||||
llama stack build --template {{ name }} --image-type conda
|
llama stack build --template {{ name }} --image-type conda
|
||||||
llama stack run distributions/{{ name }}/run.yaml \
|
llama stack run distributions/{{ name }}/run.yaml \
|
||||||
--port 5001 \
|
--port 5001 \
|
||||||
--env INFERENCE_MODEL=meta-llama/Llama-3.2-3B-Instruct
|
--env INFERENCE_MODEL=Llama3.2-3B-Instruct
|
||||||
```
|
```
|
||||||
|
|
||||||
If you are using Llama Stack Safety / Shield APIs, use:
|
If you are using Llama Stack Safety / Shield APIs, use:
|
||||||
|
@ -85,6 +85,6 @@ If you are using Llama Stack Safety / Shield APIs, use:
|
||||||
```bash
|
```bash
|
||||||
llama stack run distributions/{{ name }}/run-with-safety.yaml \
|
llama stack run distributions/{{ name }}/run-with-safety.yaml \
|
||||||
--port 5001 \
|
--port 5001 \
|
||||||
--env INFERENCE_MODEL=meta-llama/Llama-3.2-3B-Instruct \
|
--env INFERENCE_MODEL=Llama3.2-3B-Instruct \
|
||||||
--env SAFETY_MODEL=meta-llama/Llama-Guard-3-1B
|
--env SAFETY_MODEL=Llama-Guard-3-1B
|
||||||
```
|
```
|
||||||
|
|
|
@ -112,7 +112,7 @@ def get_distribution_template() -> DistributionTemplate:
|
||||||
"Port for the Llama Stack distribution server",
|
"Port for the Llama Stack distribution server",
|
||||||
),
|
),
|
||||||
"INFERENCE_MODEL": (
|
"INFERENCE_MODEL": (
|
||||||
"meta-llama/Llama-3.2-3B-Instruct",
|
"Llama3.2-3B-Instruct",
|
||||||
"Inference model loaded into the Meta Reference server",
|
"Inference model loaded into the Meta Reference server",
|
||||||
),
|
),
|
||||||
"INFERENCE_CHECKPOINT_DIR": (
|
"INFERENCE_CHECKPOINT_DIR": (
|
||||||
|
@ -120,7 +120,7 @@ def get_distribution_template() -> DistributionTemplate:
|
||||||
"Directory containing the Meta Reference model checkpoint",
|
"Directory containing the Meta Reference model checkpoint",
|
||||||
),
|
),
|
||||||
"SAFETY_MODEL": (
|
"SAFETY_MODEL": (
|
||||||
"meta-llama/Llama-Guard-3-1B",
|
"Llama-Guard-3-1B",
|
||||||
"Name of the safety (Llama-Guard) model to use",
|
"Name of the safety (Llama-Guard) model to use",
|
||||||
),
|
),
|
||||||
"SAFETY_CHECKPOINT_DIR": (
|
"SAFETY_CHECKPOINT_DIR": (
|
||||||
|
|
|
@ -55,7 +55,7 @@ docker run \
|
||||||
-v ~/.llama:/root/.llama \
|
-v ~/.llama:/root/.llama \
|
||||||
llamastack/distribution-{{ name }} \
|
llamastack/distribution-{{ name }} \
|
||||||
--port $LLAMA_STACK_PORT \
|
--port $LLAMA_STACK_PORT \
|
||||||
--env INFERENCE_MODEL=meta-llama/Llama-3.2-3B-Instruct
|
--env INFERENCE_MODEL=Llama3.2-3B-Instruct
|
||||||
```
|
```
|
||||||
|
|
||||||
If you are using Llama Stack Safety / Shield APIs, use:
|
If you are using Llama Stack Safety / Shield APIs, use:
|
||||||
|
@ -67,8 +67,8 @@ docker run \
|
||||||
-v ~/.llama:/root/.llama \
|
-v ~/.llama:/root/.llama \
|
||||||
llamastack/distribution-{{ name }} \
|
llamastack/distribution-{{ name }} \
|
||||||
--port $LLAMA_STACK_PORT \
|
--port $LLAMA_STACK_PORT \
|
||||||
--env INFERENCE_MODEL=meta-llama/Llama-3.2-3B-Instruct \
|
--env INFERENCE_MODEL=Llama3.2-3B-Instruct \
|
||||||
--env SAFETY_MODEL=meta-llama/Llama-Guard-3-1B
|
--env SAFETY_MODEL=Llama-Guard-3-1B
|
||||||
```
|
```
|
||||||
|
|
||||||
### Via Conda
|
### Via Conda
|
||||||
|
@ -79,7 +79,7 @@ Make sure you have done `pip install llama-stack` and have the Llama Stack CLI a
|
||||||
llama stack build --template {{ name }} --image-type conda
|
llama stack build --template {{ name }} --image-type conda
|
||||||
llama stack run distributions/{{ name }}/run.yaml \
|
llama stack run distributions/{{ name }}/run.yaml \
|
||||||
--port $LLAMA_STACK_PORT \
|
--port $LLAMA_STACK_PORT \
|
||||||
--env INFERENCE_MODEL=meta-llama/Llama-3.2-3B-Instruct
|
--env INFERENCE_MODEL=Llama3.2-3B-Instruct
|
||||||
```
|
```
|
||||||
|
|
||||||
If you are using Llama Stack Safety / Shield APIs, use:
|
If you are using Llama Stack Safety / Shield APIs, use:
|
||||||
|
@ -87,6 +87,6 @@ If you are using Llama Stack Safety / Shield APIs, use:
|
||||||
```bash
|
```bash
|
||||||
llama stack run distributions/{{ name }}/run-with-safety.yaml \
|
llama stack run distributions/{{ name }}/run-with-safety.yaml \
|
||||||
--port $LLAMA_STACK_PORT \
|
--port $LLAMA_STACK_PORT \
|
||||||
--env INFERENCE_MODEL=meta-llama/Llama-3.2-3B-Instruct \
|
--env INFERENCE_MODEL=Llama3.2-3B-Instruct \
|
||||||
--env SAFETY_MODEL=meta-llama/Llama-Guard-3-1B
|
--env SAFETY_MODEL=Llama-Guard-3-1B
|
||||||
```
|
```
|
||||||
|
|
|
@ -84,7 +84,7 @@ def get_distribution_template() -> DistributionTemplate:
|
||||||
"Port for the Llama Stack distribution server",
|
"Port for the Llama Stack distribution server",
|
||||||
),
|
),
|
||||||
"INFERENCE_MODEL": (
|
"INFERENCE_MODEL": (
|
||||||
"meta-llama/Llama-3.2-3B-Instruct",
|
"Llama3.2-3B-Instruct",
|
||||||
"Inference model loaded into the Meta Reference server",
|
"Inference model loaded into the Meta Reference server",
|
||||||
),
|
),
|
||||||
"INFERENCE_CHECKPOINT_DIR": (
|
"INFERENCE_CHECKPOINT_DIR": (
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue