mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-12-06 18:40:57 +00:00
Merge e0dda3bb06 into sapling-pr-archive-ehhuang
This commit is contained in:
commit
043b9d93cd
27 changed files with 7930 additions and 7743 deletions
|
|
@ -92,7 +92,7 @@ As more providers start supporting Llama 4, you can use them in Llama Stack as w
|
||||||
To try Llama Stack locally, run:
|
To try Llama Stack locally, run:
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
curl -LsSf https://github.com/meta-llama/llama-stack/raw/main/scripts/install.sh | bash
|
curl -LsSf https://github.com/llamastack/llama-stack/raw/main/scripts/install.sh | bash
|
||||||
```
|
```
|
||||||
|
|
||||||
### Overview
|
### Overview
|
||||||
|
|
|
||||||
|
|
@ -51,8 +51,9 @@ device: cpu
|
||||||
You can access the HuggingFace trainer via the `starter` distribution:
|
You can access the HuggingFace trainer via the `starter` distribution:
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
llama stack build --distro starter --image-type venv
|
uv pip install llama-stack
|
||||||
llama stack run ~/.llama/distributions/starter/starter-run.yaml
|
llama stack list-deps starter | xargs -L1 uv pip install
|
||||||
|
llama stack run starter
|
||||||
```
|
```
|
||||||
|
|
||||||
### Usage Example
|
### Usage Example
|
||||||
|
|
|
||||||
|
|
@ -175,8 +175,8 @@ llama-stack-client benchmarks register \
|
||||||
**1. Start the Llama Stack API Server**
|
**1. Start the Llama Stack API Server**
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
# Build and run a distribution (example: together)
|
uv pip install llama-stack
|
||||||
llama stack build --distro together --image-type venv
|
llama stack list-deps together | xargs -L1 uv pip install
|
||||||
llama stack run together
|
llama stack run together
|
||||||
```
|
```
|
||||||
|
|
||||||
|
|
@ -209,7 +209,8 @@ The playground works with any Llama Stack distribution. Popular options include:
|
||||||
<TabItem value="together" label="Together AI">
|
<TabItem value="together" label="Together AI">
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
llama stack build --distro together --image-type venv
|
uv pip install llama-stack
|
||||||
|
llama stack list-deps together | xargs -L1 uv pip install
|
||||||
llama stack run together
|
llama stack run together
|
||||||
```
|
```
|
||||||
|
|
||||||
|
|
@ -222,7 +223,8 @@ llama stack run together
|
||||||
<TabItem value="ollama" label="Ollama (Local)">
|
<TabItem value="ollama" label="Ollama (Local)">
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
llama stack build --distro ollama --image-type venv
|
uv pip install llama-stack
|
||||||
|
llama stack list-deps ollama | xargs -L1 uv pip install
|
||||||
llama stack run ollama
|
llama stack run ollama
|
||||||
```
|
```
|
||||||
|
|
||||||
|
|
@ -235,7 +237,8 @@ llama stack run ollama
|
||||||
<TabItem value="meta-reference" label="Meta Reference">
|
<TabItem value="meta-reference" label="Meta Reference">
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
llama stack build --distro meta-reference --image-type venv
|
uv pip install llama-stack
|
||||||
|
llama stack list-deps meta-reference | xargs -L1 uv pip install
|
||||||
llama stack run meta-reference
|
llama stack run meta-reference
|
||||||
```
|
```
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -20,7 +20,9 @@ RAG enables your applications to reference and recall information from external
|
||||||
In one terminal, start the Llama Stack server:
|
In one terminal, start the Llama Stack server:
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
uv run llama stack build --distro starter --image-type venv --run
|
uv pip install llama-stack
|
||||||
|
llama stack list-deps starter | xargs -L1 uv pip install
|
||||||
|
llama stack run starter
|
||||||
```
|
```
|
||||||
|
|
||||||
### 2. Connect with OpenAI Client
|
### 2. Connect with OpenAI Client
|
||||||
|
|
|
||||||
|
|
@ -67,7 +67,7 @@ def get_base_url(self) -> str:
|
||||||
|
|
||||||
## Testing the Provider
|
## Testing the Provider
|
||||||
|
|
||||||
Before running tests, you must have required dependencies installed. This depends on the providers or distributions you are testing. For example, if you are testing the `together` distribution, you should install dependencies via `llama stack build --distro together`.
|
Before running tests, you must have required dependencies installed. This depends on the providers or distributions you are testing. For example, if you are testing the `together` distribution, install its dependencies with `llama stack list-deps together | xargs -L1 uv pip install`.
|
||||||
|
|
||||||
### 1. Integration Testing
|
### 1. Integration Testing
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -12,7 +12,7 @@ This avoids the overhead of setting up a server.
|
||||||
```bash
|
```bash
|
||||||
# setup
|
# setup
|
||||||
uv pip install llama-stack
|
uv pip install llama-stack
|
||||||
llama stack build --distro starter --image-type venv
|
llama stack list-deps starter | xargs -L1 uv pip install
|
||||||
```
|
```
|
||||||
|
|
||||||
```python
|
```python
|
||||||
|
|
|
||||||
|
|
@ -59,7 +59,7 @@ Start a Llama Stack server on localhost. Here is an example of how you can do th
|
||||||
uv venv starter --python 3.12
|
uv venv starter --python 3.12
|
||||||
source starter/bin/activate # On Windows: starter\Scripts\activate
|
source starter/bin/activate # On Windows: starter\Scripts\activate
|
||||||
pip install --no-cache llama-stack==0.2.2
|
pip install --no-cache llama-stack==0.2.2
|
||||||
llama stack build --distro starter --image-type venv
|
llama stack list-deps starter | xargs -L1 uv pip install
|
||||||
export FIREWORKS_API_KEY=<SOME_KEY>
|
export FIREWORKS_API_KEY=<SOME_KEY>
|
||||||
llama stack run starter --port 5050
|
llama stack run starter --port 5050
|
||||||
```
|
```
|
||||||
|
|
|
||||||
|
|
@ -166,10 +166,11 @@ docker run \
|
||||||
|
|
||||||
### Via venv
|
### Via venv
|
||||||
|
|
||||||
Make sure you have done `pip install llama-stack` and have the Llama Stack CLI available.
|
Install the package and distribution dependencies before launching:
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
llama stack build --distro dell --image-type venv
|
uv pip install llama-stack
|
||||||
|
llama stack list-deps dell | xargs -L1 uv pip install
|
||||||
INFERENCE_MODEL=$INFERENCE_MODEL \
|
INFERENCE_MODEL=$INFERENCE_MODEL \
|
||||||
DEH_URL=$DEH_URL \
|
DEH_URL=$DEH_URL \
|
||||||
CHROMA_URL=$CHROMA_URL \
|
CHROMA_URL=$CHROMA_URL \
|
||||||
|
|
|
||||||
|
|
@ -81,10 +81,11 @@ docker run \
|
||||||
|
|
||||||
### Via venv
|
### Via venv
|
||||||
|
|
||||||
Make sure you have done `uv pip install llama-stack` and have the Llama Stack CLI available.
|
Install the package and this distribution’s dependencies into your active virtualenv:
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
llama stack build --distro meta-reference-gpu --image-type venv
|
uv pip install llama-stack
|
||||||
|
llama stack list-deps meta-reference-gpu | xargs -L1 uv pip install
|
||||||
INFERENCE_MODEL=meta-llama/Llama-3.2-3B-Instruct \
|
INFERENCE_MODEL=meta-llama/Llama-3.2-3B-Instruct \
|
||||||
llama stack run distributions/meta-reference-gpu/run.yaml \
|
llama stack run distributions/meta-reference-gpu/run.yaml \
|
||||||
--port 8321
|
--port 8321
|
||||||
|
|
|
||||||
|
|
@ -136,11 +136,12 @@ docker run \
|
||||||
|
|
||||||
### Via venv
|
### Via venv
|
||||||
|
|
||||||
If you've set up your local development environment, you can also build the image using your local virtual environment.
|
If you've set up your local development environment, you can install this distribution into your virtualenv:
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
|
uv pip install llama-stack
|
||||||
|
llama stack list-deps nvidia | xargs -L1 uv pip install
|
||||||
INFERENCE_MODEL=meta-llama/Llama-3.1-8B-Instruct
|
INFERENCE_MODEL=meta-llama/Llama-3.1-8B-Instruct
|
||||||
llama stack build --distro nvidia --image-type venv
|
|
||||||
NVIDIA_API_KEY=$NVIDIA_API_KEY \
|
NVIDIA_API_KEY=$NVIDIA_API_KEY \
|
||||||
INFERENCE_MODEL=$INFERENCE_MODEL \
|
INFERENCE_MODEL=$INFERENCE_MODEL \
|
||||||
llama stack run ./run.yaml \
|
llama stack run ./run.yaml \
|
||||||
|
|
|
||||||
|
|
@ -240,6 +240,6 @@ additional_pip_packages:
|
||||||
- sqlalchemy[asyncio]
|
- sqlalchemy[asyncio]
|
||||||
```
|
```
|
||||||
|
|
||||||
No other steps are required other than `llama stack build` and `llama stack run`. The build process will use `module` to install all of the provider dependencies, retrieve the spec, etc.
|
No other steps are required beyond installing dependencies with `llama stack list-deps <distro> | xargs -L1 uv pip install` and then running `llama stack run`. The CLI will use `module` to install the provider dependencies, retrieve the spec, etc.
|
||||||
|
|
||||||
The provider will now be available in Llama Stack with the type `remote::ramalama`.
|
The provider will now be available in Llama Stack with the type `remote::ramalama`.
|
||||||
|
|
|
||||||
|
|
@ -123,7 +123,9 @@
|
||||||
" del os.environ[\"UV_SYSTEM_PYTHON\"]\n",
|
" del os.environ[\"UV_SYSTEM_PYTHON\"]\n",
|
||||||
"\n",
|
"\n",
|
||||||
"# this command installs all the dependencies needed for the llama stack server with the together inference provider\n",
|
"# this command installs all the dependencies needed for the llama stack server with the together inference provider\n",
|
||||||
"!uv run --with llama-stack llama stack build --distro together\n",
|
"!uv pip install llama-stack\n",
|
||||||
|
"llama stack list-deps together | xargs -L1 uv pip install\n",
|
||||||
|
"llama stack run together\n",
|
||||||
"\n",
|
"\n",
|
||||||
"def run_llama_stack_server_background():\n",
|
"def run_llama_stack_server_background():\n",
|
||||||
" log_file = open(\"llama_stack_server.log\", \"w\")\n",
|
" log_file = open(\"llama_stack_server.log\", \"w\")\n",
|
||||||
|
|
|
||||||
|
|
@ -233,7 +233,9 @@
|
||||||
" del os.environ[\"UV_SYSTEM_PYTHON\"]\n",
|
" del os.environ[\"UV_SYSTEM_PYTHON\"]\n",
|
||||||
"\n",
|
"\n",
|
||||||
"# this command installs all the dependencies needed for the llama stack server\n",
|
"# this command installs all the dependencies needed for the llama stack server\n",
|
||||||
"!uv run --with llama-stack llama stack build --distro meta-reference-gpu\n",
|
"!uv pip install llama-stack\n",
|
||||||
|
"llama stack list-deps meta-reference-gpu | xargs -L1 uv pip install\n",
|
||||||
|
"llama stack run meta-reference-gpu\n",
|
||||||
"\n",
|
"\n",
|
||||||
"def run_llama_stack_server_background():\n",
|
"def run_llama_stack_server_background():\n",
|
||||||
" log_file = open(\"llama_stack_server.log\", \"w\")\n",
|
" log_file = open(\"llama_stack_server.log\", \"w\")\n",
|
||||||
|
|
|
||||||
|
|
@ -223,7 +223,9 @@
|
||||||
" del os.environ[\"UV_SYSTEM_PYTHON\"]\n",
|
" del os.environ[\"UV_SYSTEM_PYTHON\"]\n",
|
||||||
"\n",
|
"\n",
|
||||||
"# this command installs all the dependencies needed for the llama stack server\n",
|
"# this command installs all the dependencies needed for the llama stack server\n",
|
||||||
"!uv run --with llama-stack llama stack build --distro llama_api\n",
|
"!uv pip install llama-stack\n",
|
||||||
|
"llama stack list-deps llama_api | xargs -L1 uv pip install\n",
|
||||||
|
"llama stack run llama_api\n",
|
||||||
"\n",
|
"\n",
|
||||||
"def run_llama_stack_server_background():\n",
|
"def run_llama_stack_server_background():\n",
|
||||||
" log_file = open(\"llama_stack_server.log\", \"w\")\n",
|
" log_file = open(\"llama_stack_server.log\", \"w\")\n",
|
||||||
|
|
|
||||||
|
|
@ -2864,7 +2864,8 @@
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"source": [
|
"source": [
|
||||||
"!llama stack build --distro experimental-post-training --image-type venv --image-name __system__"
|
"!uv pip install llama-stack\n",
|
||||||
|
"llama stack list-deps experimental-post-training | xargs -L1 uv pip install --image-name __system__\n"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
|
|
||||||
|
|
@ -38,7 +38,8 @@
|
||||||
"source": [
|
"source": [
|
||||||
"# NBVAL_SKIP\n",
|
"# NBVAL_SKIP\n",
|
||||||
"!pip install -U llama-stack\n",
|
"!pip install -U llama-stack\n",
|
||||||
"!UV_SYSTEM_PYTHON=1 llama stack build --distro fireworks --image-type venv"
|
"!UV_SYSTEM_PYTHON=1 uv pip install llama-stack\n",
|
||||||
|
"llama stack list-deps fireworks | xargs -L1 uv pip install\n"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
|
|
||||||
|
|
@ -57,7 +57,8 @@
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"# NBVAL_SKIP\n",
|
"# NBVAL_SKIP\n",
|
||||||
"!UV_SYSTEM_PYTHON=1 llama stack build --distro together --image-type venv"
|
"!UV_SYSTEM_PYTHON=1 uv pip install llama-stack\n",
|
||||||
|
"llama stack list-deps together | xargs -L1 uv pip install\n"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
|
|
||||||
|
|
@ -136,7 +136,9 @@
|
||||||
" \"\"\"Build and run LlamaStack server in one step using --run flag\"\"\"\n",
|
" \"\"\"Build and run LlamaStack server in one step using --run flag\"\"\"\n",
|
||||||
" log_file = open(\"llama_stack_server.log\", \"w\")\n",
|
" log_file = open(\"llama_stack_server.log\", \"w\")\n",
|
||||||
" process = subprocess.Popen(\n",
|
" process = subprocess.Popen(\n",
|
||||||
" \"uv run --with llama-stack llama stack build --distro starter --image-type venv --run\",\n",
|
" \"uv pip install llama-stack\n",
|
||||||
|
"llama stack list-deps starter | xargs -L1 uv pip install\n",
|
||||||
|
"llama stack run starter --image-type venv --run\",\n",
|
||||||
" shell=True,\n",
|
" shell=True,\n",
|
||||||
" stdout=log_file,\n",
|
" stdout=log_file,\n",
|
||||||
" stderr=log_file,\n",
|
" stderr=log_file,\n",
|
||||||
|
|
@ -172,7 +174,7 @@
|
||||||
"\n",
|
"\n",
|
||||||
"def kill_llama_stack_server():\n",
|
"def kill_llama_stack_server():\n",
|
||||||
" # Kill any existing llama stack server processes using pkill command\n",
|
" # Kill any existing llama stack server processes using pkill command\n",
|
||||||
" os.system(\"pkill -f llama_stack.core.server.server\")"
|
" os.system(\"pkill -f llama_stack.core.server.server\")\n"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
|
|
||||||
|
|
@ -105,7 +105,9 @@
|
||||||
" \"\"\"Build and run LlamaStack server in one step using --run flag\"\"\"\n",
|
" \"\"\"Build and run LlamaStack server in one step using --run flag\"\"\"\n",
|
||||||
" log_file = open(\"llama_stack_server.log\", \"w\")\n",
|
" log_file = open(\"llama_stack_server.log\", \"w\")\n",
|
||||||
" process = subprocess.Popen(\n",
|
" process = subprocess.Popen(\n",
|
||||||
" \"uv run --with llama-stack llama stack build --distro starter --image-type venv --run\",\n",
|
" \"uv pip install llama-stack\n",
|
||||||
|
"llama stack list-deps starter | xargs -L1 uv pip install\n",
|
||||||
|
"llama stack run starter --image-type venv --run\",\n",
|
||||||
" shell=True,\n",
|
" shell=True,\n",
|
||||||
" stdout=log_file,\n",
|
" stdout=log_file,\n",
|
||||||
" stderr=log_file,\n",
|
" stderr=log_file,\n",
|
||||||
|
|
@ -141,7 +143,7 @@
|
||||||
"\n",
|
"\n",
|
||||||
"def kill_llama_stack_server():\n",
|
"def kill_llama_stack_server():\n",
|
||||||
" # Kill any existing llama stack server processes using pkill command\n",
|
" # Kill any existing llama stack server processes using pkill command\n",
|
||||||
" os.system(\"pkill -f llama_stack.core.server.server\")"
|
" os.system(\"pkill -f llama_stack.core.server.server\")\n"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
|
|
||||||
|
|
@ -91,9 +91,7 @@
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
"```bash\n",
|
"```bash\nuv pip install llama-stack\nllama stack list-deps nvidia | xargs -L1 uv pip install\n```\n"
|
||||||
"LLAMA_STACK_DIR=$(pwd) llama stack build --distro nvidia --image-type venv\n",
|
|
||||||
"```"
|
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
|
|
||||||
|
|
@ -80,9 +80,7 @@
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
"```bash\n",
|
"```bash\nuv pip install llama-stack\nllama stack list-deps nvidia | xargs -L1 uv pip install\n```\n"
|
||||||
"LLAMA_STACK_DIR=$(pwd) llama stack build --distro nvidia --image-type venv\n",
|
|
||||||
"```"
|
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
|
|
||||||
|
|
@ -145,7 +145,9 @@
|
||||||
" del os.environ[\"UV_SYSTEM_PYTHON\"]\n",
|
" del os.environ[\"UV_SYSTEM_PYTHON\"]\n",
|
||||||
"\n",
|
"\n",
|
||||||
"# this command installs all the dependencies needed for the llama stack server with the ollama inference provider\n",
|
"# this command installs all the dependencies needed for the llama stack server with the ollama inference provider\n",
|
||||||
"!uv run --with llama-stack llama stack build --distro starter\n",
|
"!uv pip install llama-stack\n",
|
||||||
|
"llama stack list-deps starter | xargs -L1 uv pip install\n",
|
||||||
|
"llama stack run starter\n",
|
||||||
"\n",
|
"\n",
|
||||||
"def run_llama_stack_server_background():\n",
|
"def run_llama_stack_server_background():\n",
|
||||||
" log_file = open(\"llama_stack_server.log\", \"w\")\n",
|
" log_file = open(\"llama_stack_server.log\", \"w\")\n",
|
||||||
|
|
|
||||||
|
|
@ -47,11 +47,12 @@ function QuickStart() {
|
||||||
<pre><code>{`# Install uv and start Ollama
|
<pre><code>{`# Install uv and start Ollama
|
||||||
ollama run llama3.2:3b --keepalive 60m
|
ollama run llama3.2:3b --keepalive 60m
|
||||||
|
|
||||||
|
# Install server dependencies
|
||||||
|
uv pip install llama-stack
|
||||||
|
llama stack list-deps starter | xargs -L1 uv pip install
|
||||||
|
|
||||||
# Run Llama Stack server
|
# Run Llama Stack server
|
||||||
OLLAMA_URL=http://localhost:11434 \\
|
OLLAMA_URL=http://localhost:11434 llama stack run starter
|
||||||
uv run --with llama-stack \\
|
|
||||||
llama stack build --distro starter \\
|
|
||||||
--image-type venv --run
|
|
||||||
|
|
||||||
# Try the Python SDK
|
# Try the Python SDK
|
||||||
from llama_stack_client import LlamaStackClient
|
from llama_stack_client import LlamaStackClient
|
||||||
|
|
|
||||||
|
|
@ -78,17 +78,15 @@ If you're looking for more specific topics, we have a [Zero to Hero Guide](#next
|
||||||
|
|
||||||
## Build, Configure, and Run Llama Stack
|
## Build, Configure, and Run Llama Stack
|
||||||
|
|
||||||
1. **Build the Llama Stack**:
|
1. **Install Llama Stack and dependencies**:
|
||||||
Build the Llama Stack using the `starter` template:
|
|
||||||
```bash
|
```bash
|
||||||
uv run --with llama-stack llama stack build --distro starter --image-type venv
|
uv pip install llama-stack
|
||||||
|
llama stack list-deps starter | xargs -L1 uv pip install
|
||||||
```
|
```
|
||||||
**Expected Output:**
|
|
||||||
|
2. **Start the distribution**:
|
||||||
```bash
|
```bash
|
||||||
...
|
llama stack run starter
|
||||||
Build Successful!
|
|
||||||
You can find the newly-built template here: ~/.llama/distributions/starter/starter-run.yaml
|
|
||||||
You can run the new Llama Stack Distro via: uv run --with llama-stack llama stack run starter
|
|
||||||
```
|
```
|
||||||
|
|
||||||
3. **Set the ENV variables by exporting them to the terminal**:
|
3. **Set the ENV variables by exporting them to the terminal**:
|
||||||
|
|
|
||||||
2647
llama_stack/ui/package-lock.json
generated
2647
llama_stack/ui/package-lock.json
generated
File diff suppressed because it is too large
Load diff
|
|
@ -43,16 +43,16 @@
|
||||||
"@testing-library/dom": "^10.4.1",
|
"@testing-library/dom": "^10.4.1",
|
||||||
"@testing-library/jest-dom": "^6.8.0",
|
"@testing-library/jest-dom": "^6.8.0",
|
||||||
"@testing-library/react": "^16.3.0",
|
"@testing-library/react": "^16.3.0",
|
||||||
"@types/jest": "^29.5.14",
|
"@types/jest": "^30.0.0",
|
||||||
"@types/node": "^24",
|
"@types/node": "^24",
|
||||||
"@types/react": "^19",
|
"@types/react": "^19",
|
||||||
"@types/react-dom": "^19",
|
"@types/react-dom": "^19",
|
||||||
"eslint": "^9",
|
"eslint": "^9",
|
||||||
"eslint-config-next": "15.5.2",
|
"eslint-config-next": "15.5.6",
|
||||||
"eslint-config-prettier": "^10.1.8",
|
"eslint-config-prettier": "^10.1.8",
|
||||||
"eslint-plugin-prettier": "^5.5.4",
|
"eslint-plugin-prettier": "^5.5.4",
|
||||||
"jest": "^29.7.0",
|
"jest": "^30.2.0",
|
||||||
"jest-environment-jsdom": "^30.1.2",
|
"jest-environment-jsdom": "^30.2.0",
|
||||||
"prettier": "3.6.2",
|
"prettier": "3.6.2",
|
||||||
"tailwindcss": "^4",
|
"tailwindcss": "^4",
|
||||||
"ts-node": "^10.9.2",
|
"ts-node": "^10.9.2",
|
||||||
|
|
|
||||||
|
|
@ -5,10 +5,10 @@
|
||||||
# This source code is licensed under the terms described in the LICENSE file in
|
# This source code is licensed under the terms described in the LICENSE file in
|
||||||
# the root directory of this source tree.
|
# the root directory of this source tree.
|
||||||
|
|
||||||
[ -z "$BASH_VERSION" ] && {
|
[ -z "${BASH_VERSION:-}" ] && exec /usr/bin/env bash "$0" "$@"
|
||||||
echo "This script must be run with bash" >&2
|
if set -o | grep -Eq 'posix[[:space:]]+on'; then
|
||||||
exit 1
|
exec /usr/bin/env bash "$0" "$@"
|
||||||
}
|
fi
|
||||||
|
|
||||||
set -Eeuo pipefail
|
set -Eeuo pipefail
|
||||||
|
|
||||||
|
|
@ -18,12 +18,110 @@ MODEL_ALIAS="llama3.2:3b"
|
||||||
SERVER_IMAGE="docker.io/llamastack/distribution-starter:latest"
|
SERVER_IMAGE="docker.io/llamastack/distribution-starter:latest"
|
||||||
WAIT_TIMEOUT=30
|
WAIT_TIMEOUT=30
|
||||||
TEMP_LOG=""
|
TEMP_LOG=""
|
||||||
|
WITH_TELEMETRY=true
|
||||||
|
TELEMETRY_SERVICE_NAME="llama-stack"
|
||||||
|
TELEMETRY_SINKS="otel_trace,otel_metric"
|
||||||
|
OTEL_EXPORTER_OTLP_ENDPOINT="http://otel-collector:4318"
|
||||||
|
TEMP_TELEMETRY_DIR=""
|
||||||
|
|
||||||
|
materialize_telemetry_configs() {
|
||||||
|
local dest="$1"
|
||||||
|
mkdir -p "$dest"
|
||||||
|
local otel_cfg="${dest}/otel-collector-config.yaml"
|
||||||
|
local prom_cfg="${dest}/prometheus.yml"
|
||||||
|
local graf_cfg="${dest}/grafana-datasources.yaml"
|
||||||
|
|
||||||
|
for asset in "$otel_cfg" "$prom_cfg" "$graf_cfg"; do
|
||||||
|
if [ -e "$asset" ]; then
|
||||||
|
die "Telemetry asset ${asset} already exists; refusing to overwrite"
|
||||||
|
fi
|
||||||
|
done
|
||||||
|
|
||||||
|
cat <<'EOF' > "$otel_cfg"
|
||||||
|
receivers:
|
||||||
|
otlp:
|
||||||
|
protocols:
|
||||||
|
grpc:
|
||||||
|
endpoint: 0.0.0.0:4317
|
||||||
|
http:
|
||||||
|
endpoint: 0.0.0.0:4318
|
||||||
|
|
||||||
|
processors:
|
||||||
|
batch:
|
||||||
|
timeout: 1s
|
||||||
|
send_batch_size: 1024
|
||||||
|
|
||||||
|
exporters:
|
||||||
|
# Export traces to Jaeger
|
||||||
|
otlp/jaeger:
|
||||||
|
endpoint: jaeger:4317
|
||||||
|
tls:
|
||||||
|
insecure: true
|
||||||
|
|
||||||
|
# Export metrics to Prometheus
|
||||||
|
prometheus:
|
||||||
|
endpoint: 0.0.0.0:9464
|
||||||
|
namespace: llama_stack
|
||||||
|
|
||||||
|
# Debug exporter for troubleshooting
|
||||||
|
debug:
|
||||||
|
verbosity: detailed
|
||||||
|
|
||||||
|
service:
|
||||||
|
pipelines:
|
||||||
|
traces:
|
||||||
|
receivers: [otlp]
|
||||||
|
processors: [batch]
|
||||||
|
exporters: [otlp/jaeger, debug]
|
||||||
|
|
||||||
|
metrics:
|
||||||
|
receivers: [otlp]
|
||||||
|
processors: [batch]
|
||||||
|
exporters: [prometheus, debug]
|
||||||
|
EOF
|
||||||
|
|
||||||
|
cat <<'EOF' > "$prom_cfg"
|
||||||
|
global:
|
||||||
|
scrape_interval: 15s
|
||||||
|
evaluation_interval: 15s
|
||||||
|
|
||||||
|
scrape_configs:
|
||||||
|
- job_name: 'prometheus'
|
||||||
|
static_configs:
|
||||||
|
- targets: ['localhost:9090']
|
||||||
|
|
||||||
|
- job_name: 'otel-collector'
|
||||||
|
static_configs:
|
||||||
|
- targets: ['otel-collector:9464']
|
||||||
|
EOF
|
||||||
|
|
||||||
|
cat <<'EOF' > "$graf_cfg"
|
||||||
|
apiVersion: 1
|
||||||
|
|
||||||
|
datasources:
|
||||||
|
- name: Prometheus
|
||||||
|
type: prometheus
|
||||||
|
access: proxy
|
||||||
|
url: http://prometheus:9090
|
||||||
|
isDefault: true
|
||||||
|
editable: true
|
||||||
|
|
||||||
|
- name: Jaeger
|
||||||
|
type: jaeger
|
||||||
|
access: proxy
|
||||||
|
url: http://jaeger:16686
|
||||||
|
editable: true
|
||||||
|
EOF
|
||||||
|
}
|
||||||
|
|
||||||
# Cleanup function to remove temporary files
|
# Cleanup function to remove temporary files
|
||||||
cleanup() {
|
cleanup() {
|
||||||
if [ -n "$TEMP_LOG" ] && [ -f "$TEMP_LOG" ]; then
|
if [ -n "$TEMP_LOG" ] && [ -f "$TEMP_LOG" ]; then
|
||||||
rm -f "$TEMP_LOG"
|
rm -f "$TEMP_LOG"
|
||||||
fi
|
fi
|
||||||
|
if [ -n "$TEMP_TELEMETRY_DIR" ] && [ -d "$TEMP_TELEMETRY_DIR" ]; then
|
||||||
|
rm -rf "$TEMP_TELEMETRY_DIR"
|
||||||
|
fi
|
||||||
}
|
}
|
||||||
|
|
||||||
# Set up trap to clean up on exit, error, or interrupt
|
# Set up trap to clean up on exit, error, or interrupt
|
||||||
|
|
@ -32,7 +130,7 @@ trap cleanup EXIT ERR INT TERM
|
||||||
log(){ printf "\e[1;32m%s\e[0m\n" "$*"; }
|
log(){ printf "\e[1;32m%s\e[0m\n" "$*"; }
|
||||||
die(){
|
die(){
|
||||||
printf "\e[1;31m❌ %s\e[0m\n" "$*" >&2
|
printf "\e[1;31m❌ %s\e[0m\n" "$*" >&2
|
||||||
printf "\e[1;31m🐛 Report an issue @ https://github.com/meta-llama/llama-stack/issues if you think it's a bug\e[0m\n" >&2
|
printf "\e[1;31m🐛 Report an issue @ https://github.com/llamastack/llama-stack/issues if you think it's a bug\e[0m\n" >&2
|
||||||
exit 1
|
exit 1
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -89,6 +187,12 @@ Options:
|
||||||
-m, --model MODEL Model alias to use (default: ${MODEL_ALIAS})
|
-m, --model MODEL Model alias to use (default: ${MODEL_ALIAS})
|
||||||
-i, --image IMAGE Server image (default: ${SERVER_IMAGE})
|
-i, --image IMAGE Server image (default: ${SERVER_IMAGE})
|
||||||
-t, --timeout SECONDS Service wait timeout in seconds (default: ${WAIT_TIMEOUT})
|
-t, --timeout SECONDS Service wait timeout in seconds (default: ${WAIT_TIMEOUT})
|
||||||
|
--with-telemetry Provision Jaeger, OTEL Collector, Prometheus, and Grafana (default: enabled)
|
||||||
|
--no-telemetry, --without-telemetry
|
||||||
|
Skip provisioning the telemetry stack
|
||||||
|
--telemetry-service NAME Service name reported to telemetry (default: ${TELEMETRY_SERVICE_NAME})
|
||||||
|
--telemetry-sinks SINKS Comma-separated telemetry sinks (default: ${TELEMETRY_SINKS})
|
||||||
|
--otel-endpoint URL OTLP endpoint provided to Llama Stack (default: ${OTEL_EXPORTER_OTLP_ENDPOINT})
|
||||||
-h, --help Show this help message
|
-h, --help Show this help message
|
||||||
|
|
||||||
For more information:
|
For more information:
|
||||||
|
|
@ -127,6 +231,26 @@ while [[ $# -gt 0 ]]; do
|
||||||
WAIT_TIMEOUT="$2"
|
WAIT_TIMEOUT="$2"
|
||||||
shift 2
|
shift 2
|
||||||
;;
|
;;
|
||||||
|
--with-telemetry)
|
||||||
|
WITH_TELEMETRY=true
|
||||||
|
shift
|
||||||
|
;;
|
||||||
|
--no-telemetry|--without-telemetry)
|
||||||
|
WITH_TELEMETRY=false
|
||||||
|
shift
|
||||||
|
;;
|
||||||
|
--telemetry-service)
|
||||||
|
TELEMETRY_SERVICE_NAME="$2"
|
||||||
|
shift 2
|
||||||
|
;;
|
||||||
|
--telemetry-sinks)
|
||||||
|
TELEMETRY_SINKS="$2"
|
||||||
|
shift 2
|
||||||
|
;;
|
||||||
|
--otel-endpoint)
|
||||||
|
OTEL_EXPORTER_OTLP_ENDPOINT="$2"
|
||||||
|
shift 2
|
||||||
|
;;
|
||||||
*)
|
*)
|
||||||
die "Unknown option: $1"
|
die "Unknown option: $1"
|
||||||
;;
|
;;
|
||||||
|
|
@ -171,7 +295,11 @@ if [ "$ENGINE" = "podman" ] && [ "$(uname -s)" = "Darwin" ]; then
|
||||||
fi
|
fi
|
||||||
|
|
||||||
# Clean up any leftovers from earlier runs
|
# Clean up any leftovers from earlier runs
|
||||||
for name in ollama-server llama-stack; do
|
containers=(ollama-server llama-stack)
|
||||||
|
if [ "$WITH_TELEMETRY" = true ]; then
|
||||||
|
containers+=(jaeger otel-collector prometheus grafana)
|
||||||
|
fi
|
||||||
|
for name in "${containers[@]}"; do
|
||||||
ids=$($ENGINE ps -aq --filter "name=^${name}$")
|
ids=$($ENGINE ps -aq --filter "name=^${name}$")
|
||||||
if [ -n "$ids" ]; then
|
if [ -n "$ids" ]; then
|
||||||
log "⚠️ Found existing container(s) for '${name}', removing..."
|
log "⚠️ Found existing container(s) for '${name}', removing..."
|
||||||
|
|
@ -191,6 +319,64 @@ if ! $ENGINE network inspect llama-net >/dev/null 2>&1; then
|
||||||
fi
|
fi
|
||||||
fi
|
fi
|
||||||
|
|
||||||
|
###############################################################################
|
||||||
|
# Telemetry Stack
|
||||||
|
###############################################################################
|
||||||
|
if [ "$WITH_TELEMETRY" = true ]; then
|
||||||
|
TEMP_TELEMETRY_DIR="$(mktemp -d)"
|
||||||
|
TELEMETRY_ASSETS_DIR="$TEMP_TELEMETRY_DIR"
|
||||||
|
log "🧰 Materializing telemetry configs..."
|
||||||
|
materialize_telemetry_configs "$TELEMETRY_ASSETS_DIR"
|
||||||
|
|
||||||
|
log "📡 Starting telemetry stack..."
|
||||||
|
|
||||||
|
if ! execute_with_log $ENGINE run -d "${PLATFORM_OPTS[@]}" --name jaeger \
|
||||||
|
--network llama-net \
|
||||||
|
-e COLLECTOR_ZIPKIN_HOST_PORT=:9411 \
|
||||||
|
-p 16686:16686 \
|
||||||
|
-p 14250:14250 \
|
||||||
|
-p 9411:9411 \
|
||||||
|
docker.io/jaegertracing/all-in-one:latest > /dev/null 2>&1; then
|
||||||
|
die "Jaeger startup failed"
|
||||||
|
fi
|
||||||
|
|
||||||
|
if ! execute_with_log $ENGINE run -d "${PLATFORM_OPTS[@]}" --name otel-collector \
|
||||||
|
--network llama-net \
|
||||||
|
-p 4318:4318 \
|
||||||
|
-p 4317:4317 \
|
||||||
|
-p 9464:9464 \
|
||||||
|
-p 13133:13133 \
|
||||||
|
-v "${TELEMETRY_ASSETS_DIR}/otel-collector-config.yaml:/etc/otel-collector-config.yaml:Z" \
|
||||||
|
docker.io/otel/opentelemetry-collector-contrib:latest \
|
||||||
|
--config /etc/otel-collector-config.yaml > /dev/null 2>&1; then
|
||||||
|
die "OpenTelemetry Collector startup failed"
|
||||||
|
fi
|
||||||
|
|
||||||
|
if ! execute_with_log $ENGINE run -d "${PLATFORM_OPTS[@]}" --name prometheus \
|
||||||
|
--network llama-net \
|
||||||
|
-p 9090:9090 \
|
||||||
|
-v "${TELEMETRY_ASSETS_DIR}/prometheus.yml:/etc/prometheus/prometheus.yml:Z" \
|
||||||
|
docker.io/prom/prometheus:latest \
|
||||||
|
--config.file=/etc/prometheus/prometheus.yml \
|
||||||
|
--storage.tsdb.path=/prometheus \
|
||||||
|
--web.console.libraries=/etc/prometheus/console_libraries \
|
||||||
|
--web.console.templates=/etc/prometheus/consoles \
|
||||||
|
--storage.tsdb.retention.time=200h \
|
||||||
|
--web.enable-lifecycle > /dev/null 2>&1; then
|
||||||
|
die "Prometheus startup failed"
|
||||||
|
fi
|
||||||
|
|
||||||
|
if ! execute_with_log $ENGINE run -d "${PLATFORM_OPTS[@]}" --name grafana \
|
||||||
|
--network llama-net \
|
||||||
|
-p 3000:3000 \
|
||||||
|
-e GF_SECURITY_ADMIN_PASSWORD=admin \
|
||||||
|
-e GF_USERS_ALLOW_SIGN_UP=false \
|
||||||
|
-v "${TELEMETRY_ASSETS_DIR}/grafana-datasources.yaml:/etc/grafana/provisioning/datasources/datasources.yaml:Z" \
|
||||||
|
docker.io/grafana/grafana:11.0.0 > /dev/null 2>&1; then
|
||||||
|
die "Grafana startup failed"
|
||||||
|
fi
|
||||||
|
fi
|
||||||
|
|
||||||
###############################################################################
|
###############################################################################
|
||||||
# 1. Ollama
|
# 1. Ollama
|
||||||
###############################################################################
|
###############################################################################
|
||||||
|
|
@ -218,9 +404,19 @@ fi
|
||||||
###############################################################################
|
###############################################################################
|
||||||
# 2. Llama‑Stack
|
# 2. Llama‑Stack
|
||||||
###############################################################################
|
###############################################################################
|
||||||
|
server_env_opts=()
|
||||||
|
if [ "$WITH_TELEMETRY" = true ]; then
|
||||||
|
server_env_opts+=(
|
||||||
|
-e TELEMETRY_SINKS="${TELEMETRY_SINKS}"
|
||||||
|
-e OTEL_EXPORTER_OTLP_ENDPOINT="${OTEL_EXPORTER_OTLP_ENDPOINT}"
|
||||||
|
-e OTEL_SERVICE_NAME="${TELEMETRY_SERVICE_NAME}"
|
||||||
|
)
|
||||||
|
fi
|
||||||
|
|
||||||
cmd=( run -d "${PLATFORM_OPTS[@]}" --name llama-stack \
|
cmd=( run -d "${PLATFORM_OPTS[@]}" --name llama-stack \
|
||||||
--network llama-net \
|
--network llama-net \
|
||||||
-p "${PORT}:${PORT}" \
|
-p "${PORT}:${PORT}" \
|
||||||
|
"${server_env_opts[@]}" \
|
||||||
-e OLLAMA_URL="http://ollama-server:${OLLAMA_PORT}" \
|
-e OLLAMA_URL="http://ollama-server:${OLLAMA_PORT}" \
|
||||||
"${SERVER_IMAGE}" --port "${PORT}")
|
"${SERVER_IMAGE}" --port "${PORT}")
|
||||||
|
|
||||||
|
|
@ -244,5 +440,12 @@ log "👉 API endpoint: http://localhost:${PORT}"
|
||||||
log "📖 Documentation: https://llamastack.github.io/latest/references/api_reference/index.html"
|
log "📖 Documentation: https://llamastack.github.io/latest/references/api_reference/index.html"
|
||||||
log "💻 To access the llama stack CLI, exec into the container:"
|
log "💻 To access the llama stack CLI, exec into the container:"
|
||||||
log " $ENGINE exec -ti llama-stack bash"
|
log " $ENGINE exec -ti llama-stack bash"
|
||||||
|
if [ "$WITH_TELEMETRY" = true ]; then
|
||||||
|
log "📡 Telemetry dashboards:"
|
||||||
|
log " Jaeger UI: http://localhost:16686"
|
||||||
|
log " Prometheus UI: http://localhost:9090"
|
||||||
|
log " Grafana UI: http://localhost:3000 (admin/admin)"
|
||||||
|
log " OTEL Collector: http://localhost:4318"
|
||||||
|
fi
|
||||||
log "🐛 Report an issue @ https://github.com/llamastack/llama-stack/issues if you think it's a bug"
|
log "🐛 Report an issue @ https://github.com/llamastack/llama-stack/issues if you think it's a bug"
|
||||||
log ""
|
log ""
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue