forked from phoenix-oss/llama-stack-mirror
fix: Default to port 8321 everywhere (#1734)
As titled, moved all instances of 5001 to 8321
This commit is contained in:
parent
581e8ae562
commit
127bac6869
56 changed files with 2352 additions and 2305 deletions
|
@ -51,14 +51,14 @@ services:
|
|||
- ~/local/llama-stack/:/app/llama-stack-source
|
||||
- ./run${SAFETY_MODEL:+-with-safety}.yaml:/root/my-run.yaml
|
||||
ports:
|
||||
- "${LLAMA_STACK_PORT:-5001}:${LLAMA_STACK_PORT:-5001}"
|
||||
- "${LLAMA_STACK_PORT:-8321}:${LLAMA_STACK_PORT:-8321}"
|
||||
environment:
|
||||
- INFERENCE_MODEL=${INFERENCE_MODEL}
|
||||
- SAFETY_MODEL=${SAFETY_MODEL:-}
|
||||
- OLLAMA_URL=http://ollama:11434
|
||||
entrypoint: >
|
||||
python -m llama_stack.distribution.server.server /root/my-run.yaml \
|
||||
--port ${LLAMA_STACK_PORT:-5001}
|
||||
--port ${LLAMA_STACK_PORT:-8321}
|
||||
deploy:
|
||||
restart_policy:
|
||||
condition: on-failure
|
||||
|
|
|
@ -84,9 +84,9 @@ services:
|
|||
- SQLITE_STORE_DIR=${SQLITE_STORE_DIR:-$HOME/.llama/distributions/remote-vllm}
|
||||
- SAFETY_MODEL=${SAFETY_MODEL:-meta-llama/Llama-Guard-3-1B}
|
||||
ports:
|
||||
- "${LLAMA_STACK_PORT:-5001}:${LLAMA_STACK_PORT:-5001}"
|
||||
- "${LLAMA_STACK_PORT:-8321}:${LLAMA_STACK_PORT:-8321}"
|
||||
# Hack: wait for vLLM server to start before starting docker
|
||||
entrypoint: bash -c "sleep 60; python -m llama_stack.distribution.server.server --yaml_config /root/llamastack-run-remote-vllm.yaml --port 5001"
|
||||
entrypoint: bash -c "sleep 60; python -m llama_stack.distribution.server.server --yaml_config /root/llamastack-run-remote-vllm.yaml --port 8321"
|
||||
deploy:
|
||||
restart_policy:
|
||||
condition: on-failure
|
||||
|
|
|
@ -83,7 +83,7 @@ services:
|
|||
- ~/.llama:/root/.llama
|
||||
- ./run${TGI_SAFETY_MODEL:+-with-safety}.yaml:/root/my-run.yaml
|
||||
ports:
|
||||
- "${LLAMA_STACK_PORT:-5001}:${LLAMA_STACK_PORT:-5001}"
|
||||
- "${LLAMA_STACK_PORT:-8321}:${LLAMA_STACK_PORT:-8321}"
|
||||
# Hack: wait for TGI server to start before starting docker
|
||||
entrypoint: bash -c "sleep 60; python -m llama_stack.distribution.server.server --yaml_config /root/my-run.yaml"
|
||||
restart_policy:
|
||||
|
|
|
@ -58,7 +58,7 @@ You can do this via Conda (build code) or Docker which has a pre-built image.
|
|||
This method allows you to get started quickly without having to build the distribution code.
|
||||
|
||||
```bash
|
||||
LLAMA_STACK_PORT=5001
|
||||
LLAMA_STACK_PORT=8321
|
||||
docker run \
|
||||
-it \
|
||||
--pull always \
|
||||
|
@ -75,7 +75,7 @@ docker run \
|
|||
```bash
|
||||
llama stack build --template nvidia --image-type conda
|
||||
llama stack run ./run.yaml \
|
||||
--port 5001 \
|
||||
--port 8321 \
|
||||
--env NVIDIA_API_KEY=$NVIDIA_API_KEY
|
||||
--env INFERENCE_MODEL=$INFERENCE_MODEL
|
||||
```
|
||||
|
|
|
@ -28,7 +28,7 @@ The `llamastack/distribution-bedrock` distribution consists of the following pro
|
|||
|
||||
The following environment variables can be configured:
|
||||
|
||||
- `LLAMA_STACK_PORT`: Port for the Llama Stack distribution server (default: `5001`)
|
||||
- `LLAMA_STACK_PORT`: Port for the Llama Stack distribution server (default: `8321`)
|
||||
|
||||
### Models
|
||||
|
||||
|
@ -53,7 +53,7 @@ You can do this via Conda (build code) or Docker which has a pre-built image.
|
|||
This method allows you to get started quickly without having to build the distribution code.
|
||||
|
||||
```bash
|
||||
LLAMA_STACK_PORT=5001
|
||||
LLAMA_STACK_PORT=8321
|
||||
docker run \
|
||||
-it \
|
||||
--pull always \
|
||||
|
|
|
@ -20,7 +20,7 @@ The `llamastack/distribution-cerebras` distribution consists of the following pr
|
|||
|
||||
The following environment variables can be configured:
|
||||
|
||||
- `LLAMA_STACK_PORT`: Port for the Llama Stack distribution server (default: `5001`)
|
||||
- `LLAMA_STACK_PORT`: Port for the Llama Stack distribution server (default: `8321`)
|
||||
- `CEREBRAS_API_KEY`: Cerebras API Key (default: ``)
|
||||
|
||||
### Models
|
||||
|
@ -45,7 +45,7 @@ You can do this via Conda (build code) or Docker which has a pre-built image.
|
|||
This method allows you to get started quickly without having to build the distribution code.
|
||||
|
||||
```bash
|
||||
LLAMA_STACK_PORT=5001
|
||||
LLAMA_STACK_PORT=8321
|
||||
docker run \
|
||||
-it \
|
||||
--pull always \
|
||||
|
@ -62,6 +62,6 @@ docker run \
|
|||
```bash
|
||||
llama stack build --template cerebras --image-type conda
|
||||
llama stack run ./run.yaml \
|
||||
--port 5001 \
|
||||
--port 8321 \
|
||||
--env CEREBRAS_API_KEY=$CEREBRAS_API_KEY
|
||||
```
|
||||
|
|
|
@ -30,7 +30,7 @@ The `llamastack/distribution-fireworks` distribution consists of the following p
|
|||
|
||||
The following environment variables can be configured:
|
||||
|
||||
- `LLAMA_STACK_PORT`: Port for the Llama Stack distribution server (default: `5001`)
|
||||
- `LLAMA_STACK_PORT`: Port for the Llama Stack distribution server (default: `8321`)
|
||||
- `FIREWORKS_API_KEY`: Fireworks.AI API Key (default: ``)
|
||||
|
||||
### Models
|
||||
|
@ -63,7 +63,7 @@ You can do this via Conda (build code) or Docker which has a pre-built image.
|
|||
This method allows you to get started quickly without having to build the distribution code.
|
||||
|
||||
```bash
|
||||
LLAMA_STACK_PORT=5001
|
||||
LLAMA_STACK_PORT=8321
|
||||
docker run \
|
||||
-it \
|
||||
--pull always \
|
||||
|
|
|
@ -30,7 +30,7 @@ The `llamastack/distribution-groq` distribution consists of the following provid
|
|||
|
||||
The following environment variables can be configured:
|
||||
|
||||
- `LLAMASTACK_PORT`: Port for the Llama Stack distribution server (default: `5001`)
|
||||
- `LLAMASTACK_PORT`: Port for the Llama Stack distribution server (default: `8321`)
|
||||
- `GROQ_API_KEY`: Groq API Key (default: ``)
|
||||
|
||||
### Models
|
||||
|
@ -58,7 +58,7 @@ You can do this via Conda (build code) or Docker which has a pre-built image.
|
|||
This method allows you to get started quickly without having to build the distribution code.
|
||||
|
||||
```bash
|
||||
LLAMA_STACK_PORT=5001
|
||||
LLAMA_STACK_PORT=8321
|
||||
docker run \
|
||||
-it \
|
||||
--pull always \
|
||||
|
|
|
@ -32,7 +32,7 @@ Note that you need access to nvidia GPUs to run this distribution. This distribu
|
|||
|
||||
The following environment variables can be configured:
|
||||
|
||||
- `LLAMA_STACK_PORT`: Port for the Llama Stack distribution server (default: `5001`)
|
||||
- `LLAMA_STACK_PORT`: Port for the Llama Stack distribution server (default: `8321`)
|
||||
- `INFERENCE_MODEL`: Inference model loaded into the Meta Reference server (default: `meta-llama/Llama-3.2-3B-Instruct`)
|
||||
- `INFERENCE_CHECKPOINT_DIR`: Directory containing the Meta Reference model checkpoint (default: `null`)
|
||||
- `SAFETY_MODEL`: Name of the safety (Llama-Guard) model to use (default: `meta-llama/Llama-Guard-3-1B`)
|
||||
|
@ -77,7 +77,7 @@ You can do this via Conda (build code) or Docker which has a pre-built image.
|
|||
This method allows you to get started quickly without having to build the distribution code.
|
||||
|
||||
```bash
|
||||
LLAMA_STACK_PORT=5001
|
||||
LLAMA_STACK_PORT=8321
|
||||
docker run \
|
||||
-it \
|
||||
--pull always \
|
||||
|
@ -109,7 +109,7 @@ Make sure you have done `uv pip install llama-stack` and have the Llama Stack CL
|
|||
```bash
|
||||
llama stack build --template meta-reference-gpu --image-type conda
|
||||
llama stack run distributions/meta-reference-gpu/run.yaml \
|
||||
--port 5001 \
|
||||
--port 8321 \
|
||||
--env INFERENCE_MODEL=meta-llama/Llama-3.2-3B-Instruct
|
||||
```
|
||||
|
||||
|
@ -117,7 +117,7 @@ If you are using Llama Stack Safety / Shield APIs, use:
|
|||
|
||||
```bash
|
||||
llama stack run distributions/meta-reference-gpu/run-with-safety.yaml \
|
||||
--port 5001 \
|
||||
--port 8321 \
|
||||
--env INFERENCE_MODEL=meta-llama/Llama-3.2-3B-Instruct \
|
||||
--env SAFETY_MODEL=meta-llama/Llama-Guard-3-1B
|
||||
```
|
||||
|
|
|
@ -34,7 +34,7 @@ Note that you need access to nvidia GPUs to run this distribution. This distribu
|
|||
|
||||
The following environment variables can be configured:
|
||||
|
||||
- `LLAMA_STACK_PORT`: Port for the Llama Stack distribution server (default: `5001`)
|
||||
- `LLAMA_STACK_PORT`: Port for the Llama Stack distribution server (default: `8321`)
|
||||
- `INFERENCE_MODEL`: Inference model loaded into the Meta Reference server (default: `meta-llama/Llama-3.2-3B-Instruct`)
|
||||
- `INFERENCE_CHECKPOINT_DIR`: Directory containing the Meta Reference model checkpoint (default: `null`)
|
||||
|
||||
|
@ -77,7 +77,7 @@ You can do this via Conda (build code) or Docker which has a pre-built image.
|
|||
This method allows you to get started quickly without having to build the distribution code.
|
||||
|
||||
```bash
|
||||
LLAMA_STACK_PORT=5001
|
||||
LLAMA_STACK_PORT=8321
|
||||
docker run \
|
||||
-it \
|
||||
--pull always \
|
||||
|
|
|
@ -15,7 +15,7 @@ The `llamastack/distribution-nvidia` distribution consists of the following prov
|
|||
|
||||
The following environment variables can be configured:
|
||||
|
||||
- `LLAMASTACK_PORT`: Port for the Llama Stack distribution server (default: `5001`)
|
||||
- `LLAMASTACK_PORT`: Port for the Llama Stack distribution server (default: `8321`)
|
||||
- `NVIDIA_API_KEY`: NVIDIA API Key (default: ``)
|
||||
|
||||
### Models
|
||||
|
@ -39,7 +39,7 @@ You can do this via Conda (build code) or Docker which has a pre-built image.
|
|||
This method allows you to get started quickly without having to build the distribution code.
|
||||
|
||||
```bash
|
||||
LLAMA_STACK_PORT=5001
|
||||
LLAMA_STACK_PORT=8321
|
||||
docker run \
|
||||
-it \
|
||||
--pull always \
|
||||
|
@ -56,6 +56,6 @@ docker run \
|
|||
```bash
|
||||
llama stack build --template nvidia --image-type conda
|
||||
llama stack run ./run.yaml \
|
||||
--port 5001 \
|
||||
--port 8321 \
|
||||
--env NVIDIA_API_KEY=$NVIDIA_API_KEY
|
||||
```
|
||||
|
|
|
@ -32,7 +32,7 @@ You should use this distribution if you have a regular desktop machine without v
|
|||
|
||||
The following environment variables can be configured:
|
||||
|
||||
- `LLAMA_STACK_PORT`: Port for the Llama Stack distribution server (default: `5001`)
|
||||
- `LLAMA_STACK_PORT`: Port for the Llama Stack distribution server (default: `8321`)
|
||||
- `OLLAMA_URL`: URL of the Ollama server (default: `http://127.0.0.1:11434`)
|
||||
- `INFERENCE_MODEL`: Inference model loaded into the Ollama server (default: `meta-llama/Llama-3.2-3B-Instruct`)
|
||||
- `SAFETY_MODEL`: Safety model loaded into the Ollama server (default: `meta-llama/Llama-Guard-3-1B`)
|
||||
|
@ -71,7 +71,7 @@ Now you are ready to run Llama Stack with Ollama as the inference provider. You
|
|||
This method allows you to get started quickly without having to build the distribution code.
|
||||
|
||||
```bash
|
||||
export LLAMA_STACK_PORT=5001
|
||||
export LLAMA_STACK_PORT=8321
|
||||
docker run \
|
||||
-it \
|
||||
--pull always \
|
||||
|
@ -109,7 +109,7 @@ docker run \
|
|||
Make sure you have done `uv pip install llama-stack` and have the Llama Stack CLI available.
|
||||
|
||||
```bash
|
||||
export LLAMA_STACK_PORT=5001
|
||||
export LLAMA_STACK_PORT=8321
|
||||
|
||||
llama stack build --template ollama --image-type conda
|
||||
llama stack run ./run.yaml \
|
||||
|
|
|
@ -30,7 +30,7 @@ The `llamastack/distribution-passthrough` distribution consists of the following
|
|||
|
||||
The following environment variables can be configured:
|
||||
|
||||
- `LLAMA_STACK_PORT`: Port for the Llama Stack distribution server (default: `5001`)
|
||||
- `LLAMA_STACK_PORT`: Port for the Llama Stack distribution server (default: `8321`)
|
||||
- `PASSTHROUGH_API_KEY`: Passthrough API Key (default: ``)
|
||||
- `PASSTHROUGH_URL`: Passthrough URL (default: ``)
|
||||
|
||||
|
|
|
@ -31,7 +31,7 @@ You can use this distribution if you have GPUs and want to run an independent vL
|
|||
|
||||
The following environment variables can be configured:
|
||||
|
||||
- `LLAMA_STACK_PORT`: Port for the Llama Stack distribution server (default: `5001`)
|
||||
- `LLAMA_STACK_PORT`: Port for the Llama Stack distribution server (default: `8321`)
|
||||
- `INFERENCE_MODEL`: Inference model loaded into the vLLM server (default: `meta-llama/Llama-3.2-3B-Instruct`)
|
||||
- `VLLM_URL`: URL of the vLLM server with the main inference model (default: `http://host.docker.internal:5100/v1`)
|
||||
- `MAX_TOKENS`: Maximum number of tokens for generation (default: `4096`)
|
||||
|
@ -96,7 +96,7 @@ This method allows you to get started quickly without having to build the distri
|
|||
```bash
|
||||
export INFERENCE_PORT=8000
|
||||
export INFERENCE_MODEL=meta-llama/Llama-3.2-3B-Instruct
|
||||
export LLAMA_STACK_PORT=5001
|
||||
export LLAMA_STACK_PORT=8321
|
||||
|
||||
docker run \
|
||||
-it \
|
||||
|
@ -143,7 +143,7 @@ Make sure you have done `uv pip install llama-stack` and have the Llama Stack CL
|
|||
```bash
|
||||
export INFERENCE_PORT=8000
|
||||
export INFERENCE_MODEL=meta-llama/Llama-3.2-3B-Instruct
|
||||
export LLAMA_STACK_PORT=5001
|
||||
export LLAMA_STACK_PORT=8321
|
||||
|
||||
cd distributions/remote-vllm
|
||||
llama stack build --template remote-vllm --image-type conda
|
||||
|
|
|
@ -27,7 +27,7 @@ The `llamastack/distribution-sambanova` distribution consists of the following p
|
|||
|
||||
The following environment variables can be configured:
|
||||
|
||||
- `LLAMASTACK_PORT`: Port for the Llama Stack distribution server (default: `5001`)
|
||||
- `LLAMASTACK_PORT`: Port for the Llama Stack distribution server (default: `8321`)
|
||||
- `SAMBANOVA_API_KEY`: SambaNova.AI API Key (default: ``)
|
||||
|
||||
### Models
|
||||
|
@ -59,7 +59,7 @@ You can do this via Conda (build code) or Docker which has a pre-built image.
|
|||
This method allows you to get started quickly without having to build the distribution code.
|
||||
|
||||
```bash
|
||||
LLAMA_STACK_PORT=5001
|
||||
LLAMA_STACK_PORT=8321
|
||||
docker run \
|
||||
-it \
|
||||
--pull always \
|
||||
|
|
|
@ -33,7 +33,7 @@ You can use this distribution if you have GPUs and want to run an independent TG
|
|||
|
||||
The following environment variables can be configured:
|
||||
|
||||
- `LLAMA_STACK_PORT`: Port for the Llama Stack distribution server (default: `5001`)
|
||||
- `LLAMA_STACK_PORT`: Port for the Llama Stack distribution server (default: `8321`)
|
||||
- `INFERENCE_MODEL`: Inference model loaded into the TGI server (default: `meta-llama/Llama-3.2-3B-Instruct`)
|
||||
- `TGI_URL`: URL of the TGI server with the main inference model (default: `http://127.0.0.1:8080/v1`)
|
||||
- `TGI_SAFETY_URL`: URL of the TGI server with the safety model (default: `http://127.0.0.1:8081/v1`)
|
||||
|
@ -92,7 +92,7 @@ Now you are ready to run Llama Stack with TGI as the inference provider. You can
|
|||
This method allows you to get started quickly without having to build the distribution code.
|
||||
|
||||
```bash
|
||||
LLAMA_STACK_PORT=5001
|
||||
LLAMA_STACK_PORT=8321
|
||||
docker run \
|
||||
-it \
|
||||
--pull always \
|
||||
|
|
|
@ -30,7 +30,7 @@ The `llamastack/distribution-together` distribution consists of the following pr
|
|||
|
||||
The following environment variables can be configured:
|
||||
|
||||
- `LLAMA_STACK_PORT`: Port for the Llama Stack distribution server (default: `5001`)
|
||||
- `LLAMA_STACK_PORT`: Port for the Llama Stack distribution server (default: `8321`)
|
||||
- `TOGETHER_API_KEY`: Together.AI API Key (default: ``)
|
||||
|
||||
### Models
|
||||
|
@ -64,7 +64,7 @@ You can do this via Conda (build code) or Docker which has a pre-built image.
|
|||
This method allows you to get started quickly without having to build the distribution code.
|
||||
|
||||
```bash
|
||||
LLAMA_STACK_PORT=5001
|
||||
LLAMA_STACK_PORT=8321
|
||||
docker run \
|
||||
-it \
|
||||
--pull always \
|
||||
|
|
|
@ -48,7 +48,7 @@
|
|||
"outputs": [],
|
||||
"source": [
|
||||
"HOST = \"localhost\" # Replace with your host\n",
|
||||
"PORT = 5001 # Replace with your port\n",
|
||||
"PORT = 8321 # Replace with your port\n",
|
||||
"MODEL_NAME='meta-llama/Llama-3.2-3B-Instruct'"
|
||||
]
|
||||
},
|
||||
|
@ -369,6 +369,9 @@
|
|||
}
|
||||
],
|
||||
"metadata": {
|
||||
"fileHeader": "",
|
||||
"fileUid": "7da25939-a2a3-463c-958e-9cdfd710d158",
|
||||
"isAdHoc": false,
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
|
@ -386,7 +389,5 @@
|
|||
"pygments_lexer": "ipython3",
|
||||
"version": "3.10.15"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
}
|
||||
|
|
|
@ -43,7 +43,7 @@
|
|||
"source": [
|
||||
"#### 2. Set Up Local and Cloud Clients\n",
|
||||
"\n",
|
||||
"Initialize both clients, specifying the `base_url` for each instance. In this case, we have the local distribution running on `http://localhost:8321` and the cloud distribution running on `http://localhost:5001`.\n"
|
||||
"Initialize both clients, specifying the `base_url` for each instance. In this case, we have the local distribution running on `http://localhost:8321` and the cloud distribution running on `http://localhost:8322`.\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
@ -236,6 +236,9 @@
|
|||
}
|
||||
],
|
||||
"metadata": {
|
||||
"fileHeader": "",
|
||||
"fileUid": "e11939ac-dfbc-4a1c-83be-e494c7f803b8",
|
||||
"isAdHoc": false,
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
|
@ -253,7 +256,5 @@
|
|||
"pygments_lexer": "ipython3",
|
||||
"version": "3.10.15"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
}
|
||||
|
|
|
@ -47,7 +47,7 @@
|
|||
"outputs": [],
|
||||
"source": [
|
||||
"HOST = \"localhost\" # Replace with your host\n",
|
||||
"PORT = 5001 # Replace with your port\n",
|
||||
"PORT = 8321 # Replace with your port\n",
|
||||
"MODEL_NAME='meta-llama/Llama-3.2-3B-Instruct'"
|
||||
]
|
||||
},
|
||||
|
@ -281,6 +281,9 @@
|
|||
}
|
||||
],
|
||||
"metadata": {
|
||||
"fileHeader": "",
|
||||
"fileUid": "b1b93b6e-22a2-4c24-8cb0-161fdafff29a",
|
||||
"isAdHoc": false,
|
||||
"kernelspec": {
|
||||
"display_name": "base",
|
||||
"language": "python",
|
||||
|
@ -298,7 +301,5 @@
|
|||
"pygments_lexer": "ipython3",
|
||||
"version": "3.12.2"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
}
|
||||
|
|
|
@ -45,7 +45,7 @@
|
|||
"outputs": [],
|
||||
"source": [
|
||||
"HOST = \"localhost\" # Replace with your host\n",
|
||||
"CLOUD_PORT = 5001 # Replace with your cloud distro port\n",
|
||||
"CLOUD_PORT = 8321 # Replace with your cloud distro port\n",
|
||||
"MODEL_NAME='Llama3.2-11B-Vision-Instruct'"
|
||||
]
|
||||
},
|
||||
|
@ -180,6 +180,9 @@
|
|||
}
|
||||
],
|
||||
"metadata": {
|
||||
"fileHeader": "",
|
||||
"fileUid": "37bbbfda-8e42-446c-89c7-59dd49e2d339",
|
||||
"isAdHoc": false,
|
||||
"kernelspec": {
|
||||
"display_name": "base",
|
||||
"language": "python",
|
||||
|
@ -197,7 +200,5 @@
|
|||
"pygments_lexer": "ipython3",
|
||||
"version": "3.12.2"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
}
|
||||
|
|
|
@ -46,7 +46,7 @@
|
|||
"nest_asyncio.apply()\n",
|
||||
"\n",
|
||||
"HOST = \"localhost\"\n",
|
||||
"PORT = 5001\n",
|
||||
"PORT = 8321\n",
|
||||
"MODEL_NAME = \"meta-llama/Llama-3.2-3B-Instruct\"\n"
|
||||
]
|
||||
},
|
||||
|
@ -335,6 +335,9 @@
|
|||
}
|
||||
],
|
||||
"metadata": {
|
||||
"fileHeader": "",
|
||||
"fileUid": "f0abbf6d-ed52-40ad-afb4-f5ec99130249",
|
||||
"isAdHoc": false,
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
|
@ -352,7 +355,5 @@
|
|||
"pygments_lexer": "ipython3",
|
||||
"version": "3.10.15"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
}
|
||||
|
|
|
@ -45,7 +45,7 @@
|
|||
"outputs": [],
|
||||
"source": [
|
||||
"HOST = \"localhost\" # Replace with your host\n",
|
||||
"PORT = 5001 # Replace with your port\n",
|
||||
"PORT = 8321 # Replace with your port\n",
|
||||
"MODEL_NAME='meta-llama/Llama-3.2-3B-Instruct'\n",
|
||||
"MEMORY_BANK_ID=\"tutorial_bank\""
|
||||
]
|
||||
|
@ -378,6 +378,9 @@
|
|||
}
|
||||
],
|
||||
"metadata": {
|
||||
"fileHeader": "",
|
||||
"fileUid": "73bc3357-0e5e-42ff-95b1-40b916d24c4f",
|
||||
"isAdHoc": false,
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
|
@ -395,7 +398,5 @@
|
|||
"pygments_lexer": "ipython3",
|
||||
"version": "3.10.15"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 4
|
||||
}
|
||||
}
|
||||
|
|
|
@ -49,7 +49,7 @@
|
|||
"outputs": [],
|
||||
"source": [
|
||||
"HOST = \"localhost\" # Replace with your host\n",
|
||||
"PORT = 5001 # Replace with your port\n",
|
||||
"PORT = 8321 # Replace with your port\n",
|
||||
"SHEILD_NAME=\"meta-llama/Llama-Guard-3-1B\""
|
||||
]
|
||||
},
|
||||
|
@ -112,6 +112,9 @@
|
|||
}
|
||||
],
|
||||
"metadata": {
|
||||
"fileHeader": "",
|
||||
"fileUid": "9afaddb7-c2fb-4309-8fa0-761697de53f0",
|
||||
"isAdHoc": false,
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
|
@ -129,7 +132,5 @@
|
|||
"pygments_lexer": "ipython3",
|
||||
"version": "3.11.10"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 4
|
||||
}
|
||||
}
|
||||
|
|
|
@ -50,7 +50,7 @@
|
|||
"outputs": [],
|
||||
"source": [
|
||||
"HOST = \"localhost\" # Replace with your host\n",
|
||||
"PORT = 5001 # Replace with your port\n",
|
||||
"PORT = 8321 # Replace with your port\n",
|
||||
"MODEL_NAME = \"meta-llama/Llama-3.2-3B-Instruct\"\n"
|
||||
]
|
||||
},
|
||||
|
@ -168,6 +168,9 @@
|
|||
}
|
||||
],
|
||||
"metadata": {
|
||||
"fileHeader": "",
|
||||
"fileUid": "8de24775-c4a0-49c7-904e-608264f69292",
|
||||
"isAdHoc": false,
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
|
@ -185,7 +188,5 @@
|
|||
"pygments_lexer": "ipython3",
|
||||
"version": "3.10.15"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 4
|
||||
}
|
||||
}
|
||||
|
|
|
@ -96,7 +96,7 @@ If you're looking for more specific topics, we have a [Zero to Hero Guide](#next
|
|||
3. **Set the ENV variables by exporting them to the terminal**:
|
||||
```bash
|
||||
export OLLAMA_URL="http://localhost:11434"
|
||||
export LLAMA_STACK_PORT=5001
|
||||
export LLAMA_STACK_PORT=8321
|
||||
export INFERENCE_MODEL="meta-llama/Llama-3.2-3B-Instruct"
|
||||
export SAFETY_MODEL="meta-llama/Llama-Guard-3-1B"
|
||||
```
|
||||
|
@ -112,7 +112,7 @@ If you're looking for more specific topics, we have a [Zero to Hero Guide](#next
|
|||
```
|
||||
Note: Every time you run a new model with `ollama run`, you will need to restart the llama stack. Otherwise it won't see the new model.
|
||||
|
||||
The server will start and listen on `http://localhost:5001`.
|
||||
The server will start and listen on `http://localhost:8321`.
|
||||
|
||||
---
|
||||
## Test with `llama-stack-client` CLI
|
||||
|
@ -120,11 +120,11 @@ After setting up the server, open a new terminal window and configure the llama-
|
|||
|
||||
1. Configure the CLI to point to the llama-stack server.
|
||||
```bash
|
||||
llama-stack-client configure --endpoint http://localhost:5001
|
||||
llama-stack-client configure --endpoint http://localhost:8321
|
||||
```
|
||||
**Expected Output:**
|
||||
```bash
|
||||
Done! You can now use the Llama Stack Client CLI with endpoint http://localhost:5001
|
||||
Done! You can now use the Llama Stack Client CLI with endpoint http://localhost:8321
|
||||
```
|
||||
2. Test the CLI by running inference:
|
||||
```bash
|
||||
|
@ -218,7 +218,7 @@ if INFERENCE_MODEL is None:
|
|||
raise ValueError("The environment variable 'INFERENCE_MODEL' is not set.")
|
||||
|
||||
# Initialize the clien
|
||||
client = LlamaStackClient(base_url="http://localhost:5001")
|
||||
client = LlamaStackClient(base_url="http://localhost:8321")
|
||||
|
||||
# Create a chat completion reques
|
||||
response = client.inference.chat_completion(
|
||||
|
|
|
@ -9,7 +9,11 @@ from pathlib import Path
|
|||
from llama_stack.distribution.datatypes import Provider, ToolGroupInput
|
||||
from llama_stack.providers.inline.vector_io.faiss.config import FaissVectorIOConfig
|
||||
from llama_stack.providers.remote.inference.bedrock.models import MODEL_ENTRIES
|
||||
from llama_stack.templates.template import DistributionTemplate, RunConfigSettings, get_model_registry
|
||||
from llama_stack.templates.template import (
|
||||
DistributionTemplate,
|
||||
RunConfigSettings,
|
||||
get_model_registry,
|
||||
)
|
||||
|
||||
|
||||
def get_distribution_template() -> DistributionTemplate:
|
||||
|
@ -76,7 +80,7 @@ def get_distribution_template() -> DistributionTemplate:
|
|||
},
|
||||
run_config_env_vars={
|
||||
"LLAMA_STACK_PORT": (
|
||||
"5001",
|
||||
"8321",
|
||||
"Port for the Llama Stack distribution server",
|
||||
),
|
||||
},
|
||||
|
|
|
@ -47,7 +47,7 @@ You can do this via Conda (build code) or Docker which has a pre-built image.
|
|||
This method allows you to get started quickly without having to build the distribution code.
|
||||
|
||||
```bash
|
||||
LLAMA_STACK_PORT=5001
|
||||
LLAMA_STACK_PORT=8321
|
||||
docker run \
|
||||
-it \
|
||||
--pull always \
|
||||
|
|
|
@ -14,7 +14,11 @@ from llama_stack.providers.inline.inference.sentence_transformers import (
|
|||
from llama_stack.providers.inline.vector_io.faiss.config import FaissVectorIOConfig
|
||||
from llama_stack.providers.remote.inference.cerebras import CerebrasImplConfig
|
||||
from llama_stack.providers.remote.inference.cerebras.models import MODEL_ENTRIES
|
||||
from llama_stack.templates.template import DistributionTemplate, RunConfigSettings, get_model_registry
|
||||
from llama_stack.templates.template import (
|
||||
DistributionTemplate,
|
||||
RunConfigSettings,
|
||||
get_model_registry,
|
||||
)
|
||||
|
||||
|
||||
def get_distribution_template() -> DistributionTemplate:
|
||||
|
@ -100,7 +104,7 @@ def get_distribution_template() -> DistributionTemplate:
|
|||
},
|
||||
run_config_env_vars={
|
||||
"LLAMA_STACK_PORT": (
|
||||
"5001",
|
||||
"8321",
|
||||
"Port for the Llama Stack distribution server",
|
||||
),
|
||||
"CEREBRAS_API_KEY": (
|
||||
|
|
|
@ -39,7 +39,7 @@ You can do this via Conda (build code) or Docker which has a pre-built image.
|
|||
This method allows you to get started quickly without having to build the distribution code.
|
||||
|
||||
```bash
|
||||
LLAMA_STACK_PORT=5001
|
||||
LLAMA_STACK_PORT=8321
|
||||
docker run \
|
||||
-it \
|
||||
--pull always \
|
||||
|
@ -56,6 +56,6 @@ docker run \
|
|||
```bash
|
||||
llama stack build --template cerebras --image-type conda
|
||||
llama stack run ./run.yaml \
|
||||
--port 5001 \
|
||||
--port 8321 \
|
||||
--env CEREBRAS_API_KEY=$CEREBRAS_API_KEY
|
||||
```
|
||||
|
|
|
@ -15,10 +15,16 @@ from llama_stack.distribution.datatypes import (
|
|||
from llama_stack.providers.inline.inference.sentence_transformers import (
|
||||
SentenceTransformersInferenceConfig,
|
||||
)
|
||||
from llama_stack.providers.inline.vector_io.sqlite_vec.config import SQLiteVectorIOConfig
|
||||
from llama_stack.providers.inline.vector_io.sqlite_vec.config import (
|
||||
SQLiteVectorIOConfig,
|
||||
)
|
||||
from llama_stack.providers.remote.inference.fireworks.config import FireworksImplConfig
|
||||
from llama_stack.providers.remote.inference.fireworks.models import MODEL_ENTRIES
|
||||
from llama_stack.templates.template import DistributionTemplate, RunConfigSettings, get_model_registry
|
||||
from llama_stack.templates.template import (
|
||||
DistributionTemplate,
|
||||
RunConfigSettings,
|
||||
get_model_registry,
|
||||
)
|
||||
|
||||
|
||||
def get_distribution_template() -> DistributionTemplate:
|
||||
|
@ -104,7 +110,7 @@ def get_distribution_template() -> DistributionTemplate:
|
|||
},
|
||||
run_config_env_vars={
|
||||
"LLAMA_STACK_PORT": (
|
||||
"5001",
|
||||
"8321",
|
||||
"Port for the Llama Stack distribution server",
|
||||
),
|
||||
"FIREWORKS_API_KEY": (
|
||||
|
|
|
@ -16,20 +16,38 @@ from llama_stack.distribution.datatypes import (
|
|||
from llama_stack.providers.inline.inference.sentence_transformers import (
|
||||
SentenceTransformersInferenceConfig,
|
||||
)
|
||||
from llama_stack.providers.inline.vector_io.sqlite_vec.config import SQLiteVectorIOConfig
|
||||
from llama_stack.providers.inline.vector_io.sqlite_vec.config import (
|
||||
SQLiteVectorIOConfig,
|
||||
)
|
||||
from llama_stack.providers.remote.inference.anthropic.config import AnthropicConfig
|
||||
from llama_stack.providers.remote.inference.anthropic.models import MODEL_ENTRIES as ANTHROPIC_MODEL_ENTRIES
|
||||
from llama_stack.providers.remote.inference.anthropic.models import (
|
||||
MODEL_ENTRIES as ANTHROPIC_MODEL_ENTRIES,
|
||||
)
|
||||
from llama_stack.providers.remote.inference.fireworks.config import FireworksImplConfig
|
||||
from llama_stack.providers.remote.inference.fireworks.models import MODEL_ENTRIES as FIREWORKS_MODEL_ENTRIES
|
||||
from llama_stack.providers.remote.inference.fireworks.models import (
|
||||
MODEL_ENTRIES as FIREWORKS_MODEL_ENTRIES,
|
||||
)
|
||||
from llama_stack.providers.remote.inference.gemini.config import GeminiConfig
|
||||
from llama_stack.providers.remote.inference.gemini.models import MODEL_ENTRIES as GEMINI_MODEL_ENTRIES
|
||||
from llama_stack.providers.remote.inference.gemini.models import (
|
||||
MODEL_ENTRIES as GEMINI_MODEL_ENTRIES,
|
||||
)
|
||||
from llama_stack.providers.remote.inference.groq.config import GroqConfig
|
||||
from llama_stack.providers.remote.inference.groq.models import MODEL_ENTRIES as GROQ_MODEL_ENTRIES
|
||||
from llama_stack.providers.remote.inference.groq.models import (
|
||||
MODEL_ENTRIES as GROQ_MODEL_ENTRIES,
|
||||
)
|
||||
from llama_stack.providers.remote.inference.openai.config import OpenAIConfig
|
||||
from llama_stack.providers.remote.inference.openai.models import MODEL_ENTRIES as OPENAI_MODEL_ENTRIES
|
||||
from llama_stack.providers.remote.inference.openai.models import (
|
||||
MODEL_ENTRIES as OPENAI_MODEL_ENTRIES,
|
||||
)
|
||||
from llama_stack.providers.remote.vector_io.chroma.config import ChromaVectorIOConfig
|
||||
from llama_stack.providers.remote.vector_io.pgvector.config import PGVectorVectorIOConfig
|
||||
from llama_stack.templates.template import DistributionTemplate, RunConfigSettings, get_model_registry
|
||||
from llama_stack.providers.remote.vector_io.pgvector.config import (
|
||||
PGVectorVectorIOConfig,
|
||||
)
|
||||
from llama_stack.templates.template import (
|
||||
DistributionTemplate,
|
||||
RunConfigSettings,
|
||||
get_model_registry,
|
||||
)
|
||||
|
||||
|
||||
def get_inference_providers() -> Tuple[List[Provider], List[ModelInput]]:
|
||||
|
@ -168,7 +186,7 @@ def get_distribution_template() -> DistributionTemplate:
|
|||
},
|
||||
run_config_env_vars={
|
||||
"LLAMA_STACK_PORT": (
|
||||
"5001",
|
||||
"8321",
|
||||
"Port for the Llama Stack distribution server",
|
||||
),
|
||||
"FIREWORKS_API_KEY": (
|
||||
|
|
|
@ -49,7 +49,7 @@ You can do this via Conda (build code) or Docker which has a pre-built image.
|
|||
This method allows you to get started quickly without having to build the distribution code.
|
||||
|
||||
```bash
|
||||
LLAMA_STACK_PORT=5001
|
||||
LLAMA_STACK_PORT=8321
|
||||
docker run \
|
||||
-it \
|
||||
--pull always \
|
||||
|
|
|
@ -19,7 +19,11 @@ from llama_stack.providers.inline.inference.sentence_transformers import (
|
|||
from llama_stack.providers.inline.vector_io.faiss.config import FaissVectorIOConfig
|
||||
from llama_stack.providers.remote.inference.fireworks.config import FireworksImplConfig
|
||||
from llama_stack.providers.remote.inference.fireworks.models import MODEL_ENTRIES
|
||||
from llama_stack.templates.template import DistributionTemplate, RunConfigSettings, get_model_registry
|
||||
from llama_stack.templates.template import (
|
||||
DistributionTemplate,
|
||||
RunConfigSettings,
|
||||
get_model_registry,
|
||||
)
|
||||
|
||||
|
||||
def get_distribution_template() -> DistributionTemplate:
|
||||
|
@ -158,7 +162,7 @@ def get_distribution_template() -> DistributionTemplate:
|
|||
},
|
||||
run_config_env_vars={
|
||||
"LLAMA_STACK_PORT": (
|
||||
"5001",
|
||||
"8321",
|
||||
"Port for the Llama Stack distribution server",
|
||||
),
|
||||
"FIREWORKS_API_KEY": (
|
||||
|
|
|
@ -49,7 +49,7 @@ You can do this via Conda (build code) or Docker which has a pre-built image.
|
|||
This method allows you to get started quickly without having to build the distribution code.
|
||||
|
||||
```bash
|
||||
LLAMA_STACK_PORT=5001
|
||||
LLAMA_STACK_PORT=8321
|
||||
docker run \
|
||||
-it \
|
||||
--pull always \
|
||||
|
|
|
@ -7,17 +7,17 @@
|
|||
from pathlib import Path
|
||||
|
||||
from llama_stack.apis.models.models import ModelType
|
||||
from llama_stack.distribution.datatypes import (
|
||||
ModelInput,
|
||||
Provider,
|
||||
ToolGroupInput,
|
||||
)
|
||||
from llama_stack.distribution.datatypes import ModelInput, Provider, ToolGroupInput
|
||||
from llama_stack.providers.inline.inference.sentence_transformers import (
|
||||
SentenceTransformersInferenceConfig,
|
||||
)
|
||||
from llama_stack.providers.remote.inference.groq import GroqConfig
|
||||
from llama_stack.providers.remote.inference.groq.models import MODEL_ENTRIES
|
||||
from llama_stack.templates.template import DistributionTemplate, RunConfigSettings, get_model_registry
|
||||
from llama_stack.templates.template import (
|
||||
DistributionTemplate,
|
||||
RunConfigSettings,
|
||||
get_model_registry,
|
||||
)
|
||||
|
||||
|
||||
def get_distribution_template() -> DistributionTemplate:
|
||||
|
@ -97,7 +97,7 @@ def get_distribution_template() -> DistributionTemplate:
|
|||
},
|
||||
run_config_env_vars={
|
||||
"LLAMASTACK_PORT": (
|
||||
"5001",
|
||||
"8321",
|
||||
"Port for the Llama Stack distribution server",
|
||||
),
|
||||
"GROQ_API_KEY": (
|
||||
|
|
|
@ -127,7 +127,7 @@ def get_distribution_template() -> DistributionTemplate:
|
|||
},
|
||||
run_config_env_vars={
|
||||
"LLAMA_STACK_PORT": (
|
||||
"5001",
|
||||
"8321",
|
||||
"Port for the Llama Stack distribution server",
|
||||
),
|
||||
"HF_API_TOKEN": (
|
||||
|
|
|
@ -128,7 +128,7 @@ def get_distribution_template() -> DistributionTemplate:
|
|||
},
|
||||
run_config_env_vars={
|
||||
"LLAMA_STACK_PORT": (
|
||||
"5001",
|
||||
"8321",
|
||||
"Port for the Llama Stack distribution server",
|
||||
),
|
||||
"HF_API_TOKEN": (
|
||||
|
|
|
@ -65,7 +65,7 @@ You can do this via Conda (build code) or Docker which has a pre-built image.
|
|||
This method allows you to get started quickly without having to build the distribution code.
|
||||
|
||||
```bash
|
||||
LLAMA_STACK_PORT=5001
|
||||
LLAMA_STACK_PORT=8321
|
||||
docker run \
|
||||
-it \
|
||||
--pull always \
|
||||
|
@ -97,7 +97,7 @@ Make sure you have done `uv pip install llama-stack` and have the Llama Stack CL
|
|||
```bash
|
||||
llama stack build --template {{ name }} --image-type conda
|
||||
llama stack run distributions/{{ name }}/run.yaml \
|
||||
--port 5001 \
|
||||
--port 8321 \
|
||||
--env INFERENCE_MODEL=meta-llama/Llama-3.2-3B-Instruct
|
||||
```
|
||||
|
||||
|
@ -105,7 +105,7 @@ If you are using Llama Stack Safety / Shield APIs, use:
|
|||
|
||||
```bash
|
||||
llama stack run distributions/{{ name }}/run-with-safety.yaml \
|
||||
--port 5001 \
|
||||
--port 8321 \
|
||||
--env INFERENCE_MODEL=meta-llama/Llama-3.2-3B-Instruct \
|
||||
--env SAFETY_MODEL=meta-llama/Llama-Guard-3-1B
|
||||
```
|
||||
|
|
|
@ -134,7 +134,7 @@ def get_distribution_template() -> DistributionTemplate:
|
|||
},
|
||||
run_config_env_vars={
|
||||
"LLAMA_STACK_PORT": (
|
||||
"5001",
|
||||
"8321",
|
||||
"Port for the Llama Stack distribution server",
|
||||
),
|
||||
"INFERENCE_MODEL": (
|
||||
|
|
|
@ -67,7 +67,7 @@ You can do this via Conda (build code) or Docker which has a pre-built image.
|
|||
This method allows you to get started quickly without having to build the distribution code.
|
||||
|
||||
```bash
|
||||
LLAMA_STACK_PORT=5001
|
||||
LLAMA_STACK_PORT=8321
|
||||
docker run \
|
||||
-it \
|
||||
--pull always \
|
||||
|
|
|
@ -100,7 +100,7 @@ def get_distribution_template() -> DistributionTemplate:
|
|||
},
|
||||
run_config_env_vars={
|
||||
"LLAMA_STACK_PORT": (
|
||||
"5001",
|
||||
"8321",
|
||||
"Port for the Llama Stack distribution server",
|
||||
),
|
||||
"INFERENCE_MODEL": (
|
||||
|
|
|
@ -39,7 +39,7 @@ You can do this via Conda (build code) or Docker which has a pre-built image.
|
|||
This method allows you to get started quickly without having to build the distribution code.
|
||||
|
||||
```bash
|
||||
LLAMA_STACK_PORT=5001
|
||||
LLAMA_STACK_PORT=8321
|
||||
docker run \
|
||||
-it \
|
||||
--pull always \
|
||||
|
@ -56,7 +56,7 @@ docker run \
|
|||
```bash
|
||||
llama stack build --template nvidia --image-type conda
|
||||
llama stack run ./run.yaml \
|
||||
--port 5001 \
|
||||
--port 8321 \
|
||||
--env NVIDIA_API_KEY=$NVIDIA_API_KEY
|
||||
--env INFERENCE_MODEL=$INFERENCE_MODEL
|
||||
```
|
||||
|
|
|
@ -60,7 +60,7 @@ Now you are ready to run Llama Stack with Ollama as the inference provider. You
|
|||
This method allows you to get started quickly without having to build the distribution code.
|
||||
|
||||
```bash
|
||||
export LLAMA_STACK_PORT=5001
|
||||
export LLAMA_STACK_PORT=8321
|
||||
docker run \
|
||||
-it \
|
||||
--pull always \
|
||||
|
@ -98,7 +98,7 @@ docker run \
|
|||
Make sure you have done `uv pip install llama-stack` and have the Llama Stack CLI available.
|
||||
|
||||
```bash
|
||||
export LLAMA_STACK_PORT=5001
|
||||
export LLAMA_STACK_PORT=8321
|
||||
|
||||
llama stack build --template {{ name }} --image-type conda
|
||||
llama stack run ./run.yaml \
|
||||
|
|
|
@ -138,7 +138,7 @@ def get_distribution_template() -> DistributionTemplate:
|
|||
},
|
||||
run_config_env_vars={
|
||||
"LLAMA_STACK_PORT": (
|
||||
"5001",
|
||||
"8321",
|
||||
"Port for the Llama Stack distribution server",
|
||||
),
|
||||
"OLLAMA_URL": (
|
||||
|
|
|
@ -279,7 +279,7 @@ def get_distribution_template() -> DistributionTemplate:
|
|||
},
|
||||
run_config_env_vars={
|
||||
"LLAMA_STACK_PORT": (
|
||||
"5001",
|
||||
"8321",
|
||||
"Port for the Llama Stack distribution server",
|
||||
),
|
||||
"TOGETHER_API_KEY": (
|
||||
|
|
|
@ -21,10 +21,7 @@ from llama_stack.providers.remote.inference.passthrough.config import (
|
|||
PassthroughImplConfig,
|
||||
)
|
||||
from llama_stack.providers.utils.inference.model_registry import ProviderModelEntry
|
||||
from llama_stack.templates.template import (
|
||||
DistributionTemplate,
|
||||
RunConfigSettings,
|
||||
)
|
||||
from llama_stack.templates.template import DistributionTemplate, RunConfigSettings
|
||||
|
||||
|
||||
def get_distribution_template() -> DistributionTemplate:
|
||||
|
@ -186,7 +183,7 @@ def get_distribution_template() -> DistributionTemplate:
|
|||
},
|
||||
run_config_env_vars={
|
||||
"LLAMA_STACK_PORT": (
|
||||
"5001",
|
||||
"8321",
|
||||
"Port for the Llama Stack distribution server",
|
||||
),
|
||||
"PASSTHROUGH_API_KEY": (
|
||||
|
|
|
@ -83,7 +83,7 @@ This method allows you to get started quickly without having to build the distri
|
|||
```bash
|
||||
export INFERENCE_PORT=8000
|
||||
export INFERENCE_MODEL=meta-llama/Llama-3.2-3B-Instruct
|
||||
export LLAMA_STACK_PORT=5001
|
||||
export LLAMA_STACK_PORT=8321
|
||||
|
||||
docker run \
|
||||
-it \
|
||||
|
@ -130,7 +130,7 @@ Make sure you have done `uv pip install llama-stack` and have the Llama Stack CL
|
|||
```bash
|
||||
export INFERENCE_PORT=8000
|
||||
export INFERENCE_MODEL=meta-llama/Llama-3.2-3B-Instruct
|
||||
export LLAMA_STACK_PORT=5001
|
||||
export LLAMA_STACK_PORT=8321
|
||||
|
||||
cd distributions/remote-vllm
|
||||
llama stack build --template remote-vllm --image-type conda
|
||||
|
|
|
@ -135,7 +135,7 @@ def get_distribution_template() -> DistributionTemplate:
|
|||
},
|
||||
run_config_env_vars={
|
||||
"LLAMA_STACK_PORT": (
|
||||
"5001",
|
||||
"8321",
|
||||
"Port for the Llama Stack distribution server",
|
||||
),
|
||||
"INFERENCE_MODEL": (
|
||||
|
|
|
@ -49,7 +49,7 @@ You can do this via Conda (build code) or Docker which has a pre-built image.
|
|||
This method allows you to get started quickly without having to build the distribution code.
|
||||
|
||||
```bash
|
||||
LLAMA_STACK_PORT=5001
|
||||
LLAMA_STACK_PORT=8321
|
||||
docker run \
|
||||
-it \
|
||||
--pull always \
|
||||
|
|
|
@ -6,17 +6,19 @@
|
|||
|
||||
from pathlib import Path
|
||||
|
||||
from llama_stack.distribution.datatypes import (
|
||||
Provider,
|
||||
ShieldInput,
|
||||
ToolGroupInput,
|
||||
)
|
||||
from llama_stack.distribution.datatypes import Provider, ShieldInput, ToolGroupInput
|
||||
from llama_stack.providers.inline.vector_io.faiss.config import FaissVectorIOConfig
|
||||
from llama_stack.providers.remote.inference.sambanova import SambaNovaImplConfig
|
||||
from llama_stack.providers.remote.inference.sambanova.models import MODEL_ENTRIES
|
||||
from llama_stack.providers.remote.vector_io.chroma.config import ChromaVectorIOConfig
|
||||
from llama_stack.providers.remote.vector_io.pgvector.config import PGVectorVectorIOConfig
|
||||
from llama_stack.templates.template import DistributionTemplate, RunConfigSettings, get_model_registry
|
||||
from llama_stack.providers.remote.vector_io.pgvector.config import (
|
||||
PGVectorVectorIOConfig,
|
||||
)
|
||||
from llama_stack.templates.template import (
|
||||
DistributionTemplate,
|
||||
RunConfigSettings,
|
||||
get_model_registry,
|
||||
)
|
||||
|
||||
|
||||
def get_distribution_template() -> DistributionTemplate:
|
||||
|
@ -105,7 +107,7 @@ def get_distribution_template() -> DistributionTemplate:
|
|||
},
|
||||
run_config_env_vars={
|
||||
"LLAMASTACK_PORT": (
|
||||
"5001",
|
||||
"8321",
|
||||
"Port for the Llama Stack distribution server",
|
||||
),
|
||||
"SAMBANOVA_API_KEY": (
|
||||
|
|
|
@ -80,7 +80,7 @@ Now you are ready to run Llama Stack with TGI as the inference provider. You can
|
|||
This method allows you to get started quickly without having to build the distribution code.
|
||||
|
||||
```bash
|
||||
LLAMA_STACK_PORT=5001
|
||||
LLAMA_STACK_PORT=8321
|
||||
docker run \
|
||||
-it \
|
||||
--pull always \
|
||||
|
|
|
@ -129,7 +129,7 @@ def get_distribution_template() -> DistributionTemplate:
|
|||
},
|
||||
run_config_env_vars={
|
||||
"LLAMA_STACK_PORT": (
|
||||
"5001",
|
||||
"8321",
|
||||
"Port for the Llama Stack distribution server",
|
||||
),
|
||||
"INFERENCE_MODEL": (
|
||||
|
|
|
@ -49,7 +49,7 @@ You can do this via Conda (build code) or Docker which has a pre-built image.
|
|||
This method allows you to get started quickly without having to build the distribution code.
|
||||
|
||||
```bash
|
||||
LLAMA_STACK_PORT=5001
|
||||
LLAMA_STACK_PORT=8321
|
||||
docker run \
|
||||
-it \
|
||||
--pull always \
|
||||
|
|
|
@ -19,7 +19,11 @@ from llama_stack.providers.inline.inference.sentence_transformers import (
|
|||
from llama_stack.providers.inline.vector_io.faiss.config import FaissVectorIOConfig
|
||||
from llama_stack.providers.remote.inference.together import TogetherImplConfig
|
||||
from llama_stack.providers.remote.inference.together.models import MODEL_ENTRIES
|
||||
from llama_stack.templates.template import DistributionTemplate, RunConfigSettings, get_model_registry
|
||||
from llama_stack.templates.template import (
|
||||
DistributionTemplate,
|
||||
RunConfigSettings,
|
||||
get_model_registry,
|
||||
)
|
||||
|
||||
|
||||
def get_distribution_template() -> DistributionTemplate:
|
||||
|
@ -154,7 +158,7 @@ def get_distribution_template() -> DistributionTemplate:
|
|||
},
|
||||
run_config_env_vars={
|
||||
"LLAMA_STACK_PORT": (
|
||||
"5001",
|
||||
"8321",
|
||||
"Port for the Llama Stack distribution server",
|
||||
),
|
||||
"TOGETHER_API_KEY": (
|
||||
|
|
|
@ -100,7 +100,7 @@ def get_distribution_template() -> DistributionTemplate:
|
|||
},
|
||||
run_config_env_vars={
|
||||
"LLAMA_STACK_PORT": (
|
||||
"5001",
|
||||
"8321",
|
||||
"Port for the Llama Stack distribution server",
|
||||
),
|
||||
"INFERENCE_MODEL": (
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue