modify doc

This commit is contained in:
Botao Chen 2024-12-17 14:09:32 -08:00
parent 486c0bc9c8
commit 85d0f5f528
7 changed files with 158 additions and 158 deletions

View file

@ -1,9 +1,9 @@
{ {
"hf-serverless": [ "bedrock": [
"aiohttp",
"aiosqlite", "aiosqlite",
"autoevals", "autoevals",
"blobfile", "blobfile",
"boto3",
"chardet", "chardet",
"chromadb-client", "chromadb-client",
"datasets", "datasets",
@ -11,100 +11,6 @@
"fastapi", "fastapi",
"fire", "fire",
"httpx", "httpx",
"huggingface_hub",
"matplotlib",
"nltk",
"numpy",
"openai",
"opentelemetry-exporter-otlp-proto-http",
"opentelemetry-sdk",
"pandas",
"pillow",
"psycopg2-binary",
"pypdf",
"redis",
"scikit-learn",
"scipy",
"sentencepiece",
"tqdm",
"transformers",
"uvicorn",
"sentence-transformers --no-deps",
"torch --index-url https://download.pytorch.org/whl/cpu"
],
"together": [
"aiosqlite",
"autoevals",
"blobfile",
"chardet",
"chromadb-client",
"datasets",
"faiss-cpu",
"fastapi",
"fire",
"httpx",
"matplotlib",
"nltk",
"numpy",
"openai",
"opentelemetry-exporter-otlp-proto-http",
"opentelemetry-sdk",
"pandas",
"pillow",
"psycopg2-binary",
"pypdf",
"redis",
"scikit-learn",
"scipy",
"sentencepiece",
"together",
"tqdm",
"transformers",
"uvicorn",
"sentence-transformers --no-deps",
"torch --index-url https://download.pytorch.org/whl/cpu"
],
"vllm-gpu": [
"aiosqlite",
"autoevals",
"blobfile",
"chardet",
"chromadb-client",
"datasets",
"faiss-cpu",
"fastapi",
"fire",
"httpx",
"matplotlib",
"nltk",
"numpy",
"openai",
"opentelemetry-exporter-otlp-proto-http",
"opentelemetry-sdk",
"pandas",
"pillow",
"psycopg2-binary",
"pypdf",
"redis",
"scikit-learn",
"scipy",
"sentencepiece",
"tqdm",
"transformers",
"uvicorn",
"vllm",
"sentence-transformers --no-deps",
"torch --index-url https://download.pytorch.org/whl/cpu"
],
"remote-vllm": [
"aiosqlite",
"blobfile",
"chardet",
"chromadb-client",
"faiss-cpu",
"fastapi",
"fire",
"httpx",
"matplotlib", "matplotlib",
"nltk", "nltk",
"numpy", "numpy",
@ -157,7 +63,7 @@
"sentence-transformers --no-deps", "sentence-transformers --no-deps",
"torch --index-url https://download.pytorch.org/whl/cpu" "torch --index-url https://download.pytorch.org/whl/cpu"
], ],
"tgi": [ "hf-endpoint": [
"aiohttp", "aiohttp",
"aiosqlite", "aiosqlite",
"autoevals", "autoevals",
@ -190,11 +96,11 @@
"sentence-transformers --no-deps", "sentence-transformers --no-deps",
"torch --index-url https://download.pytorch.org/whl/cpu" "torch --index-url https://download.pytorch.org/whl/cpu"
], ],
"bedrock": [ "hf-serverless": [
"aiohttp",
"aiosqlite", "aiosqlite",
"autoevals", "autoevals",
"blobfile", "blobfile",
"boto3",
"chardet", "chardet",
"chromadb-client", "chromadb-client",
"datasets", "datasets",
@ -202,6 +108,7 @@
"fastapi", "fastapi",
"fire", "fire",
"httpx", "httpx",
"huggingface_hub",
"matplotlib", "matplotlib",
"nltk", "nltk",
"numpy", "numpy",
@ -300,34 +207,6 @@
"sentence-transformers --no-deps", "sentence-transformers --no-deps",
"torch --index-url https://download.pytorch.org/whl/cpu" "torch --index-url https://download.pytorch.org/whl/cpu"
], ],
"cerebras": [
"aiosqlite",
"blobfile",
"cerebras_cloud_sdk",
"chardet",
"faiss-cpu",
"fastapi",
"fire",
"httpx",
"matplotlib",
"nltk",
"numpy",
"opentelemetry-exporter-otlp-proto-http",
"opentelemetry-sdk",
"pandas",
"pillow",
"psycopg2-binary",
"pypdf",
"redis",
"scikit-learn",
"scipy",
"sentencepiece",
"tqdm",
"transformers",
"uvicorn",
"sentence-transformers --no-deps",
"torch --index-url https://download.pytorch.org/whl/cpu"
],
"ollama": [ "ollama": [
"aiohttp", "aiohttp",
"aiosqlite", "aiosqlite",
@ -361,7 +240,7 @@
"sentence-transformers --no-deps", "sentence-transformers --no-deps",
"torch --index-url https://download.pytorch.org/whl/cpu" "torch --index-url https://download.pytorch.org/whl/cpu"
], ],
"hf-endpoint": [ "tgi": [
"aiohttp", "aiohttp",
"aiosqlite", "aiosqlite",
"autoevals", "autoevals",
@ -393,5 +272,126 @@
"uvicorn", "uvicorn",
"sentence-transformers --no-deps", "sentence-transformers --no-deps",
"torch --index-url https://download.pytorch.org/whl/cpu" "torch --index-url https://download.pytorch.org/whl/cpu"
],
"together": [
"aiosqlite",
"autoevals",
"blobfile",
"chardet",
"chromadb-client",
"datasets",
"faiss-cpu",
"fastapi",
"fire",
"httpx",
"matplotlib",
"nltk",
"numpy",
"openai",
"opentelemetry-exporter-otlp-proto-http",
"opentelemetry-sdk",
"pandas",
"pillow",
"psycopg2-binary",
"pypdf",
"redis",
"scikit-learn",
"scipy",
"sentencepiece",
"together",
"tqdm",
"transformers",
"uvicorn",
"sentence-transformers --no-deps",
"torch --index-url https://download.pytorch.org/whl/cpu"
],
"remote-vllm": [
"aiosqlite",
"blobfile",
"chardet",
"chromadb-client",
"faiss-cpu",
"fastapi",
"fire",
"httpx",
"matplotlib",
"nltk",
"numpy",
"openai",
"opentelemetry-exporter-otlp-proto-http",
"opentelemetry-sdk",
"pandas",
"pillow",
"psycopg2-binary",
"pypdf",
"redis",
"scikit-learn",
"scipy",
"sentencepiece",
"tqdm",
"transformers",
"uvicorn",
"sentence-transformers --no-deps",
"torch --index-url https://download.pytorch.org/whl/cpu"
],
"vllm-gpu": [
"aiosqlite",
"autoevals",
"blobfile",
"chardet",
"chromadb-client",
"datasets",
"faiss-cpu",
"fastapi",
"fire",
"httpx",
"matplotlib",
"nltk",
"numpy",
"openai",
"opentelemetry-exporter-otlp-proto-http",
"opentelemetry-sdk",
"pandas",
"pillow",
"psycopg2-binary",
"pypdf",
"redis",
"scikit-learn",
"scipy",
"sentencepiece",
"tqdm",
"transformers",
"uvicorn",
"vllm",
"sentence-transformers --no-deps",
"torch --index-url https://download.pytorch.org/whl/cpu"
],
"cerebras": [
"aiosqlite",
"blobfile",
"cerebras_cloud_sdk",
"chardet",
"faiss-cpu",
"fastapi",
"fire",
"httpx",
"matplotlib",
"nltk",
"numpy",
"opentelemetry-exporter-otlp-proto-http",
"opentelemetry-sdk",
"pandas",
"pillow",
"psycopg2-binary",
"pypdf",
"redis",
"scikit-learn",
"scipy",
"sentencepiece",
"tqdm",
"transformers",
"uvicorn",
"sentence-transformers --no-deps",
"torch --index-url https://download.pytorch.org/whl/cpu"
] ]
} }

View file

@ -31,9 +31,9 @@ Note that you need access to nvidia GPUs to run this distribution. This distribu
The following environment variables can be configured: The following environment variables can be configured:
- `LLAMASTACK_PORT`: Port for the Llama Stack distribution server (default: `5001`) - `LLAMASTACK_PORT`: Port for the Llama Stack distribution server (default: `5001`)
- `INFERENCE_MODEL`: Inference model loaded into the Meta Reference server (default: `meta-llama/Llama-3.2-3B-Instruct`) - `INFERENCE_MODEL`: Inference model loaded into the Meta Reference server (default: `Llama3.2-3B-Instruct`)
- `INFERENCE_CHECKPOINT_DIR`: Directory containing the Meta Reference model checkpoint (default: `null`) - `INFERENCE_CHECKPOINT_DIR`: Directory containing the Meta Reference model checkpoint (default: `null`)
- `SAFETY_MODEL`: Name of the safety (Llama-Guard) model to use (default: `meta-llama/Llama-Guard-3-1B`) - `SAFETY_MODEL`: Name of the safety (Llama-Guard) model to use (default: `Llama-Guard-3-1B`)
- `SAFETY_CHECKPOINT_DIR`: Directory containing the Llama-Guard model checkpoint (default: `null`) - `SAFETY_CHECKPOINT_DIR`: Directory containing the Llama-Guard model checkpoint (default: `null`)
@ -63,7 +63,7 @@ docker run \
-v ~/.llama:/root/.llama \ -v ~/.llama:/root/.llama \
llamastack/distribution-meta-reference-gpu \ llamastack/distribution-meta-reference-gpu \
--port $LLAMA_STACK_PORT \ --port $LLAMA_STACK_PORT \
--env INFERENCE_MODEL=meta-llama/Llama-3.2-3B-Instruct --env INFERENCE_MODEL=Llama3.2-3B-Instruct
``` ```
If you are using Llama Stack Safety / Shield APIs, use: If you are using Llama Stack Safety / Shield APIs, use:
@ -75,8 +75,8 @@ docker run \
-v ~/.llama:/root/.llama \ -v ~/.llama:/root/.llama \
llamastack/distribution-meta-reference-gpu \ llamastack/distribution-meta-reference-gpu \
--port $LLAMA_STACK_PORT \ --port $LLAMA_STACK_PORT \
--env INFERENCE_MODEL=meta-llama/Llama-3.2-3B-Instruct \ --env INFERENCE_MODEL=Llama3.2-3B-Instruct \
--env SAFETY_MODEL=meta-llama/Llama-Guard-3-1B --env SAFETY_MODEL=Llama-Guard-3-1B
``` ```
### Via Conda ### Via Conda
@ -87,7 +87,7 @@ Make sure you have done `pip install llama-stack` and have the Llama Stack CLI a
llama stack build --template meta-reference-gpu --image-type conda llama stack build --template meta-reference-gpu --image-type conda
llama stack run distributions/meta-reference-gpu/run.yaml \ llama stack run distributions/meta-reference-gpu/run.yaml \
--port 5001 \ --port 5001 \
--env INFERENCE_MODEL=meta-llama/Llama-3.2-3B-Instruct --env INFERENCE_MODEL=Llama3.2-3B-Instruct
``` ```
If you are using Llama Stack Safety / Shield APIs, use: If you are using Llama Stack Safety / Shield APIs, use:
@ -95,6 +95,6 @@ If you are using Llama Stack Safety / Shield APIs, use:
```bash ```bash
llama stack run distributions/meta-reference-gpu/run-with-safety.yaml \ llama stack run distributions/meta-reference-gpu/run-with-safety.yaml \
--port 5001 \ --port 5001 \
--env INFERENCE_MODEL=meta-llama/Llama-3.2-3B-Instruct \ --env INFERENCE_MODEL=Llama3.2-3B-Instruct \
--env SAFETY_MODEL=meta-llama/Llama-Guard-3-1B --env SAFETY_MODEL=meta-Llama-Guard-3-1B
``` ```

View file

@ -33,7 +33,7 @@ Note that you need access to nvidia GPUs to run this distribution. This distribu
The following environment variables can be configured: The following environment variables can be configured:
- `LLAMASTACK_PORT`: Port for the Llama Stack distribution server (default: `5001`) - `LLAMASTACK_PORT`: Port for the Llama Stack distribution server (default: `5001`)
- `INFERENCE_MODEL`: Inference model loaded into the Meta Reference server (default: `meta-llama/Llama-3.2-3B-Instruct`) - `INFERENCE_MODEL`: Inference model loaded into the Meta Reference server (default: `Llama3.2-3B-Instruct`)
- `INFERENCE_CHECKPOINT_DIR`: Directory containing the Meta Reference model checkpoint (default: `null`) - `INFERENCE_CHECKPOINT_DIR`: Directory containing the Meta Reference model checkpoint (default: `null`)
@ -63,7 +63,7 @@ docker run \
-v ~/.llama:/root/.llama \ -v ~/.llama:/root/.llama \
llamastack/distribution-meta-reference-quantized-gpu \ llamastack/distribution-meta-reference-quantized-gpu \
--port $LLAMA_STACK_PORT \ --port $LLAMA_STACK_PORT \
--env INFERENCE_MODEL=meta-llama/Llama-3.2-3B-Instruct --env INFERENCE_MODEL=Llama3.2-3B-Instruct
``` ```
If you are using Llama Stack Safety / Shield APIs, use: If you are using Llama Stack Safety / Shield APIs, use:
@ -75,8 +75,8 @@ docker run \
-v ~/.llama:/root/.llama \ -v ~/.llama:/root/.llama \
llamastack/distribution-meta-reference-quantized-gpu \ llamastack/distribution-meta-reference-quantized-gpu \
--port $LLAMA_STACK_PORT \ --port $LLAMA_STACK_PORT \
--env INFERENCE_MODEL=meta-llama/Llama-3.2-3B-Instruct \ --env INFERENCE_MODEL=Llama3.2-3B-Instruct \
--env SAFETY_MODEL=meta-llama/Llama-Guard-3-1B --env SAFETY_MODEL=meta-Llama-Guard-3-1B
``` ```
### Via Conda ### Via Conda
@ -87,7 +87,7 @@ Make sure you have done `pip install llama-stack` and have the Llama Stack CLI a
llama stack build --template meta-reference-quantized-gpu --image-type conda llama stack build --template meta-reference-quantized-gpu --image-type conda
llama stack run distributions/meta-reference-quantized-gpu/run.yaml \ llama stack run distributions/meta-reference-quantized-gpu/run.yaml \
--port $LLAMA_STACK_PORT \ --port $LLAMA_STACK_PORT \
--env INFERENCE_MODEL=meta-llama/Llama-3.2-3B-Instruct --env INFERENCE_MODEL=Llama3.2-3B-Instruct
``` ```
If you are using Llama Stack Safety / Shield APIs, use: If you are using Llama Stack Safety / Shield APIs, use:
@ -95,6 +95,6 @@ If you are using Llama Stack Safety / Shield APIs, use:
```bash ```bash
llama stack run distributions/meta-reference-quantized-gpu/run-with-safety.yaml \ llama stack run distributions/meta-reference-quantized-gpu/run-with-safety.yaml \
--port $LLAMA_STACK_PORT \ --port $LLAMA_STACK_PORT \
--env INFERENCE_MODEL=meta-llama/Llama-3.2-3B-Instruct \ --env INFERENCE_MODEL=Llama3.2-3B-Instruct \
--env SAFETY_MODEL=meta-llama/Llama-Guard-3-1B --env SAFETY_MODEL=Llama-Guard-3-1B
``` ```

View file

@ -53,7 +53,7 @@ docker run \
-v ~/.llama:/root/.llama \ -v ~/.llama:/root/.llama \
llamastack/distribution-{{ name }} \ llamastack/distribution-{{ name }} \
--port $LLAMA_STACK_PORT \ --port $LLAMA_STACK_PORT \
--env INFERENCE_MODEL=meta-llama/Llama-3.2-3B-Instruct --env INFERENCE_MODEL=Llama3.2-3B-Instruct
``` ```
If you are using Llama Stack Safety / Shield APIs, use: If you are using Llama Stack Safety / Shield APIs, use:
@ -65,8 +65,8 @@ docker run \
-v ~/.llama:/root/.llama \ -v ~/.llama:/root/.llama \
llamastack/distribution-{{ name }} \ llamastack/distribution-{{ name }} \
--port $LLAMA_STACK_PORT \ --port $LLAMA_STACK_PORT \
--env INFERENCE_MODEL=meta-llama/Llama-3.2-3B-Instruct \ --env INFERENCE_MODEL=Llama3.2-3B-Instruct \
--env SAFETY_MODEL=meta-llama/Llama-Guard-3-1B --env SAFETY_MODEL=Llama-Guard-3-1B
``` ```
### Via Conda ### Via Conda
@ -77,7 +77,7 @@ Make sure you have done `pip install llama-stack` and have the Llama Stack CLI a
llama stack build --template {{ name }} --image-type conda llama stack build --template {{ name }} --image-type conda
llama stack run distributions/{{ name }}/run.yaml \ llama stack run distributions/{{ name }}/run.yaml \
--port 5001 \ --port 5001 \
--env INFERENCE_MODEL=meta-llama/Llama-3.2-3B-Instruct --env INFERENCE_MODEL=Llama3.2-3B-Instruct
``` ```
If you are using Llama Stack Safety / Shield APIs, use: If you are using Llama Stack Safety / Shield APIs, use:
@ -85,6 +85,6 @@ If you are using Llama Stack Safety / Shield APIs, use:
```bash ```bash
llama stack run distributions/{{ name }}/run-with-safety.yaml \ llama stack run distributions/{{ name }}/run-with-safety.yaml \
--port 5001 \ --port 5001 \
--env INFERENCE_MODEL=meta-llama/Llama-3.2-3B-Instruct \ --env INFERENCE_MODEL=Llama3.2-3B-Instruct \
--env SAFETY_MODEL=meta-llama/Llama-Guard-3-1B --env SAFETY_MODEL=Llama-Guard-3-1B
``` ```

View file

@ -112,7 +112,7 @@ def get_distribution_template() -> DistributionTemplate:
"Port for the Llama Stack distribution server", "Port for the Llama Stack distribution server",
), ),
"INFERENCE_MODEL": ( "INFERENCE_MODEL": (
"meta-llama/Llama-3.2-3B-Instruct", "Llama3.2-3B-Instruct",
"Inference model loaded into the Meta Reference server", "Inference model loaded into the Meta Reference server",
), ),
"INFERENCE_CHECKPOINT_DIR": ( "INFERENCE_CHECKPOINT_DIR": (
@ -120,7 +120,7 @@ def get_distribution_template() -> DistributionTemplate:
"Directory containing the Meta Reference model checkpoint", "Directory containing the Meta Reference model checkpoint",
), ),
"SAFETY_MODEL": ( "SAFETY_MODEL": (
"meta-llama/Llama-Guard-3-1B", "Llama-Guard-3-1B",
"Name of the safety (Llama-Guard) model to use", "Name of the safety (Llama-Guard) model to use",
), ),
"SAFETY_CHECKPOINT_DIR": ( "SAFETY_CHECKPOINT_DIR": (

View file

@ -55,7 +55,7 @@ docker run \
-v ~/.llama:/root/.llama \ -v ~/.llama:/root/.llama \
llamastack/distribution-{{ name }} \ llamastack/distribution-{{ name }} \
--port $LLAMA_STACK_PORT \ --port $LLAMA_STACK_PORT \
--env INFERENCE_MODEL=meta-llama/Llama-3.2-3B-Instruct --env INFERENCE_MODEL=Llama3.2-3B-Instruct
``` ```
If you are using Llama Stack Safety / Shield APIs, use: If you are using Llama Stack Safety / Shield APIs, use:
@ -67,8 +67,8 @@ docker run \
-v ~/.llama:/root/.llama \ -v ~/.llama:/root/.llama \
llamastack/distribution-{{ name }} \ llamastack/distribution-{{ name }} \
--port $LLAMA_STACK_PORT \ --port $LLAMA_STACK_PORT \
--env INFERENCE_MODEL=meta-llama/Llama-3.2-3B-Instruct \ --env INFERENCE_MODEL=Llama3.2-3B-Instruct \
--env SAFETY_MODEL=meta-llama/Llama-Guard-3-1B --env SAFETY_MODEL=Llama-Guard-3-1B
``` ```
### Via Conda ### Via Conda
@ -79,7 +79,7 @@ Make sure you have done `pip install llama-stack` and have the Llama Stack CLI a
llama stack build --template {{ name }} --image-type conda llama stack build --template {{ name }} --image-type conda
llama stack run distributions/{{ name }}/run.yaml \ llama stack run distributions/{{ name }}/run.yaml \
--port $LLAMA_STACK_PORT \ --port $LLAMA_STACK_PORT \
--env INFERENCE_MODEL=meta-llama/Llama-3.2-3B-Instruct --env INFERENCE_MODEL=Llama3.2-3B-Instruct
``` ```
If you are using Llama Stack Safety / Shield APIs, use: If you are using Llama Stack Safety / Shield APIs, use:
@ -87,6 +87,6 @@ If you are using Llama Stack Safety / Shield APIs, use:
```bash ```bash
llama stack run distributions/{{ name }}/run-with-safety.yaml \ llama stack run distributions/{{ name }}/run-with-safety.yaml \
--port $LLAMA_STACK_PORT \ --port $LLAMA_STACK_PORT \
--env INFERENCE_MODEL=meta-llama/Llama-3.2-3B-Instruct \ --env INFERENCE_MODEL=Llama3.2-3B-Instruct \
--env SAFETY_MODEL=meta-llama/Llama-Guard-3-1B --env SAFETY_MODEL=Llama-Guard-3-1B
``` ```

View file

@ -84,7 +84,7 @@ def get_distribution_template() -> DistributionTemplate:
"Port for the Llama Stack distribution server", "Port for the Llama Stack distribution server",
), ),
"INFERENCE_MODEL": ( "INFERENCE_MODEL": (
"meta-llama/Llama-3.2-3B-Instruct", "Llama3.2-3B-Instruct",
"Inference model loaded into the Meta Reference server", "Inference model loaded into the Meta Reference server",
), ),
"INFERENCE_CHECKPOINT_DIR": ( "INFERENCE_CHECKPOINT_DIR": (