Merge branch 'main' into milvus-files-api

This commit is contained in:
Francisco Arceo 2025-07-03 14:03:51 -04:00 committed by GitHub
commit c4fa7ab978
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
74 changed files with 1001 additions and 348 deletions

View file

@ -25,7 +25,7 @@ jobs:
# Listing tests manually since some of them currently fail
# TODO: generate matrix list from tests/integration when fixed
test-type: [agents, inference, datasets, inspect, scoring, post_training, providers, tool_runtime, vector_io]
client-type: [library, http]
client-type: [library, server]
python-version: ["3.12", "3.13"]
fail-fast: false # we want to run all tests regardless of failure
@ -45,39 +45,6 @@ jobs:
run: |
uv run llama stack build --template ollama --image-type venv
- name: Start Llama Stack server in background
if: matrix.client-type == 'http'
env:
INFERENCE_MODEL: "meta-llama/Llama-3.2-3B-Instruct"
run: |
LLAMA_STACK_LOG_FILE=server.log nohup uv run llama stack run ./llama_stack/templates/ollama/run.yaml --image-type venv --env OLLAMA_URL="http://0.0.0.0:11434" &
- name: Wait for Llama Stack server to be ready
if: matrix.client-type == 'http'
run: |
echo "Waiting for Llama Stack server..."
for i in {1..30}; do
if curl -s http://localhost:8321/v1/health | grep -q "OK"; then
echo "Llama Stack server is up!"
exit 0
fi
sleep 1
done
echo "Llama Stack server failed to start"
cat server.log
exit 1
- name: Verify Ollama status is OK
if: matrix.client-type == 'http'
run: |
echo "Verifying Ollama status..."
ollama_status=$(curl -s -L http://127.0.0.1:8321/v1/providers/ollama|jq --raw-output .health.status)
echo "Ollama status: $ollama_status"
if [ "$ollama_status" != "OK" ]; then
echo "Ollama health check failed"
exit 1
fi
- name: Check Storage and Memory Available Before Tests
if: ${{ always() }}
run: |
@ -92,12 +59,14 @@ jobs:
if [ "${{ matrix.client-type }}" == "library" ]; then
stack_config="ollama"
else
stack_config="http://localhost:8321"
stack_config="server:ollama"
fi
uv run pytest -s -v tests/integration/${{ matrix.test-type }} --stack-config=${stack_config} \
-k "not(builtin_tool or safety_with_image or code_interpreter or test_rag)" \
--text-model="meta-llama/Llama-3.2-3B-Instruct" \
--embedding-model=all-MiniLM-L6-v2
--embedding-model=all-MiniLM-L6-v2 \
--color=yes \
--capture=tee-sys | tee pytest-${{ matrix.test-type }}.log
- name: Check Storage and Memory Available After Tests
if: ${{ always() }}

View file

@ -35,6 +35,8 @@ pip install llama-stack-client
### CLI
```bash
# Run a chat completion
MODEL="Llama-4-Scout-17B-16E-Instruct"
llama-stack-client --endpoint http://localhost:8321 \
inference chat-completion \
--model-id meta-llama/$MODEL \
@ -106,46 +108,59 @@ By reducing friction and complexity, Llama Stack empowers developers to focus on
### API Providers
Here is a list of the various API providers and available distributions that can help developers get started easily with Llama Stack.
Please checkout for [full list](https://llama-stack.readthedocs.io/en/latest/providers/index.html)
| **API Provider Builder** | **Environments** | **Agents** | **Inference** | **Memory** | **Safety** | **Telemetry** | **Post Training** |
|:------------------------:|:----------------------:|:----------:|:-------------:|:----------:|:----------:|:-------------:|:-----------------:|
| Meta Reference | Single Node | ✅ | ✅ | ✅ | ✅ | ✅ | |
| SambaNova | Hosted | | ✅ | | ✅ | | |
| Cerebras | Hosted | | ✅ | | | | |
| Fireworks | Hosted | ✅ | ✅ | ✅ | | | |
| AWS Bedrock | Hosted | | ✅ | | ✅ | | |
| Together | Hosted | ✅ | ✅ | | ✅ | | |
| Groq | Hosted | | ✅ | | | | |
| Ollama | Single Node | | ✅ | | | | |
| TGI | Hosted and Single Node | | ✅ | | | | |
| NVIDIA NIM | Hosted and Single Node | | ✅ | | | | |
| Chroma | Single Node | | | ✅ | | | |
| PG Vector | Single Node | | | ✅ | | | |
| PyTorch ExecuTorch | On-device iOS | ✅ | ✅ | | | | |
| vLLM | Hosted and Single Node | | ✅ | | | | |
| OpenAI | Hosted | | ✅ | | | | |
| Anthropic | Hosted | | ✅ | | | | |
| Gemini | Hosted | | ✅ | | | | |
| watsonx | Hosted | | ✅ | | | | |
| HuggingFace | Single Node | | | | | | ✅ |
| TorchTune | Single Node | | | | | | ✅ |
| NVIDIA NEMO | Hosted | | | | | | ✅ |
| API Provider Builder | Environments | Agents | Inference | VectorIO | Safety | Telemetry | Post Training | Eval | DatasetIO |
|:-------------------:|:------------:|:------:|:---------:|:--------:|:------:|:---------:|:-------------:|:----:|:--------:|
| Meta Reference | Single Node | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ |
| SambaNova | Hosted | | ✅ | | ✅ | | | | |
| Cerebras | Hosted | | ✅ | | | | | | |
| Fireworks | Hosted | ✅ | ✅ | ✅ | | | | | |
| AWS Bedrock | Hosted | | ✅ | | ✅ | | | | |
| Together | Hosted | ✅ | ✅ | | ✅ | | | | |
| Groq | Hosted | | ✅ | | | | | | |
| Ollama | Single Node | | ✅ | | | | | | |
| TGI | Hosted/Single Node | | ✅ | | | | | | |
| NVIDIA NIM | Hosted/Single Node | | ✅ | | ✅ | | | | |
| ChromaDB | Hosted/Single Node | | | ✅ | | | | | |
| PG Vector | Single Node | | | ✅ | | | | | |
| PyTorch ExecuTorch | On-device iOS | ✅ | ✅ | | | | | | |
| vLLM | Single Node | | ✅ | | | | | | |
| OpenAI | Hosted | | ✅ | | | | | | |
| Anthropic | Hosted | | ✅ | | | | | | |
| Gemini | Hosted | | ✅ | | | | | | |
| WatsonX | Hosted | | ✅ | | | | | | |
| HuggingFace | Single Node | | | | | | ✅ | | ✅ |
| TorchTune | Single Node | | | | | | ✅ | | |
| NVIDIA NEMO | Hosted | | ✅ | ✅ | | | ✅ | ✅ | ✅ |
| NVIDIA | Hosted | | | | | | ✅ | ✅ | ✅ |
> **Note**: Additional providers are available through external packages. See [External Providers](https://llama-stack.readthedocs.io/en/latest/providers/external.html) documentation.
### Distributions
A Llama Stack Distribution (or "distro") is a pre-configured bundle of provider implementations for each API component. Distributions make it easy to get started with a specific deployment scenario - you can begin with a local development setup (eg. ollama) and seamlessly transition to production (eg. Fireworks) without changing your application code. Here are some of the distributions we support:
A Llama Stack Distribution (or "distro") is a pre-configured bundle of provider implementations for each API component. Distributions make it easy to get started with a specific deployment scenario - you can begin with a local development setup (eg. ollama) and seamlessly transition to production (eg. Fireworks) without changing your application code.
Here are some of the distributions we support:
| **Distribution** | **Llama Stack Docker** | Start This Distribution |
|:---------------------------------------------:|:-------------------------------------------------------------------------------------------------------------------------------------------------------------:|:------------------------------------------------------------------------------------------------------------------------:|
| Meta Reference | [llamastack/distribution-meta-reference-gpu](https://hub.docker.com/repository/docker/llamastack/distribution-meta-reference-gpu/general) | [Guide](https://llama-stack.readthedocs.io/en/latest/distributions/self_hosted_distro/meta-reference-gpu.html) |
| SambaNova | [llamastack/distribution-sambanova](https://hub.docker.com/repository/docker/llamastack/distribution-sambanova/general) | [Guide](https://llama-stack.readthedocs.io/en/latest/distributions/self_hosted_distro/sambanova.html) |
| Cerebras | [llamastack/distribution-cerebras](https://hub.docker.com/repository/docker/llamastack/distribution-cerebras/general) | [Guide](https://llama-stack.readthedocs.io/en/latest/distributions/self_hosted_distro/cerebras.html) |
| TGI | [llamastack/distribution-tgi](https://hub.docker.com/repository/docker/llamastack/distribution-tgi/general) | [Guide](https://llama-stack.readthedocs.io/en/latest/distributions/self_hosted_distro/tgi.html)
| vLLM | [llamastack/distribution-remote-vllm](https://hub.docker.com/repository/docker/llamastack/distribution-remote-vllm/general) | [Guide](https://llama-stack.readthedocs.io/en/latest/distributions/self_hosted_distro/remote-vllm.html)
| Starter | [llamastack/distribution-starter](https://hub.docker.com/repository/docker/llamastack/distribution-starter/general) | |
| PostgreSQL | [llamastack/distribution-postgres-demo](https://hub.docker.com/repository/docker/llamastack/distribution-postgres-demo/general) | |
Here are the ones out of support scope but still avaiable from Dockerhub:
| **Distribution** | **Llama Stack Docker** | Start This Distribution |
|:---------------------------------------------:|:-------------------------------------------------------------------------------------------------------------------------------------------------------------:|:------------------------------------------------------------------------------------------------------------------------:|
| Ollama | [llamastack/distribution-ollama](https://hub.docker.com/repository/docker/llamastack/distribution-ollama/general) | [Guide](https://llama-stack.readthedocs.io/en/latest/distributions/self_hosted_distro/ollama.html) |
| TGI | [llamastack/distribution-tgi](https://hub.docker.com/repository/docker/llamastack/distribution-tgi/general) | [Guide](https://llama-stack.readthedocs.io/en/latest/distributions/self_hosted_distro/tgi.html) |
| Together | [llamastack/distribution-together](https://hub.docker.com/repository/docker/llamastack/distribution-together/general) | [Guide](https://llama-stack.readthedocs.io/en/latest/distributions/self_hosted_distro/together.html) |
| Fireworks | [llamastack/distribution-fireworks](https://hub.docker.com/repository/docker/llamastack/distribution-fireworks/general) | [Guide](https://llama-stack.readthedocs.io/en/latest/distributions/self_hosted_distro/fireworks.html) |
| vLLM | [llamastack/distribution-remote-vllm](https://hub.docker.com/repository/docker/llamastack/distribution-remote-vllm/general) | [Guide](https://llama-stack.readthedocs.io/en/latest/distributions/self_hosted_distro/remote-vllm.html) |
| AWS Bedrock | [llamastack/distribution-bedrock](https://hub.docker.com/repository/docker/llamastack/distribution-bedrock/general) | [Guide](https://llama-stack.readthedocs.io/en/latest/distributions/self_hosted_distro/bedrock.html) |
| SambaNova | [llamastack/distribution-sambanova](https://hub.docker.com/repository/docker/llamastack/distribution-sambanova/general) | [Guide](https://llama-stack.readthedocs.io/en/latest/distributions/self_hosted_distro/sambanova.html) |
| Cerebras | [llamastack/distribution-cerebras](https://hub.docker.com/repository/docker/llamastack/distribution-cerebras/general) | [Guide](https://llama-stack.readthedocs.io/en/latest/distributions/self_hosted_distro/cerebras.html) | | | |
### Documentation

View file

@ -17,7 +17,9 @@
"\n",
"Read more about the project here: https://llama-stack.readthedocs.io/en/latest/index.html\n",
"\n",
"In this guide, we will showcase how you can build LLM-powered agentic applications using Llama Stack.\n"
"In this guide, we will showcase how you can build LLM-powered agentic applications using Llama Stack.\n",
"\n",
"**💡 Quick Start Option:** If you want a simpler and faster way to test out Llama Stack, check out the [quick_start.ipynb](quick_start.ipynb) notebook instead. It provides a streamlined experience for getting up and running in just a few steps.\n"
]
},
{

View file

@ -17,7 +17,9 @@
"\n",
"Read more about the project here: https://llama-stack.readthedocs.io/en/latest/index.html\n",
"\n",
"In this guide, we will showcase how you can get started with using Llama 4 in Llama Stack.\n"
"In this guide, we will showcase how you can get started with using Llama 4 in Llama Stack.\n",
"\n",
"**💡 Quick Start Option:** If you want a simpler and faster way to test out Llama Stack, check out the [quick_start.ipynb](quick_start.ipynb) notebook instead. It provides a streamlined experience for getting up and running in just a few steps.\n"
]
},
{

View file

@ -17,7 +17,9 @@
"\n",
"Read more about the project here: https://llama-stack.readthedocs.io/en/latest/index.html\n",
"\n",
"In this guide, we will showcase how you can get started with using Llama 4 in Llama Stack.\n"
"In this guide, we will showcase how you can get started with using Llama 4 in Llama Stack.\n",
"\n",
"**💡 Quick Start Option:** If you want a simpler and faster way to test out Llama Stack, check out the [quick_start.ipynb](quick_start.ipynb) notebook instead. It provides a streamlined experience for getting up and running in just a few steps.\n"
]
},
{

367
docs/quick_start.ipynb Normal file
View file

@ -0,0 +1,367 @@
{
"cells": [
{
"cell_type": "markdown",
"id": "c1e7571c",
"metadata": {
"id": "c1e7571c"
},
"source": [
"[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/meta-llama/llama-stack/blob/main/docs/getting_started.ipynb)\n",
"\n",
"# Llama Stack - Building AI Applications\n",
"\n",
"<img src=\"https://llama-stack.readthedocs.io/en/latest/_images/llama-stack.png\" alt=\"drawing\" width=\"500\"/>\n",
"\n",
"Get started with Llama Stack in minutes!\n",
"\n",
"[Llama Stack](https://github.com/meta-llama/llama-stack) is a stateful service with REST APIs to support the seamless transition of AI applications across different environments. You can build and test using a local server first and deploy to a hosted endpoint for production.\n",
"\n",
"In this guide, we'll walk through how to build a RAG application locally using Llama Stack with [Ollama](https://ollama.com/)\n",
"as the inference [provider](docs/source/providers/index.md#inference) for a Llama Model.\n"
]
},
{
"cell_type": "markdown",
"id": "4CV1Q19BDMVw",
"metadata": {
"id": "4CV1Q19BDMVw"
},
"source": [
"## Step 1: Install and setup"
]
},
{
"cell_type": "markdown",
"id": "K4AvfUAJZOeS",
"metadata": {
"id": "K4AvfUAJZOeS"
},
"source": [
"### 1.1. Install uv and test inference with Ollama\n",
"\n",
"We'll install [uv](https://docs.astral.sh/uv/) to setup the Python virtual environment, along with [colab-xterm](https://github.com/InfuseAI/colab-xterm) for running command-line tools, and [Ollama](https://ollama.com/download) as the inference provider."
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "7a2d7b85",
"metadata": {},
"outputs": [],
"source": [
"%pip install uv llama_stack llama-stack-client\n",
"\n",
"## If running on Collab:\n",
"# !pip install colab-xterm\n",
"# %load_ext colabxterm\n",
"\n",
"!curl https://ollama.ai/install.sh | sh"
]
},
{
"cell_type": "markdown",
"id": "39fa584b",
"metadata": {},
"source": [
"### 1.2. Test inference with Ollama"
]
},
{
"cell_type": "markdown",
"id": "3bf81522",
"metadata": {},
"source": [
"Well now launch a terminal and run inference on a Llama model with Ollama to verify that the model is working correctly."
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "a7e8e0f1",
"metadata": {},
"outputs": [],
"source": [
"## If running on Colab:\n",
"# %xterm\n",
"\n",
"## To be ran in the terminal:\n",
"# ollama serve &\n",
"# ollama run llama3.2:3b --keepalive 60m"
]
},
{
"cell_type": "markdown",
"id": "f3c5f243",
"metadata": {},
"source": [
"If successful, you should see the model respond to a prompt.\n",
"\n",
"...\n",
"```\n",
">>> hi\n",
"Hello! How can I assist you today?\n",
"```"
]
},
{
"cell_type": "markdown",
"id": "oDUB7M_qe-Gs",
"metadata": {
"id": "oDUB7M_qe-Gs"
},
"source": [
"## Step 2: Run the Llama Stack server\n",
"\n",
"In this showcase, we will start a Llama Stack server that is running locally."
]
},
{
"cell_type": "markdown",
"id": "732eadc6",
"metadata": {},
"source": [
"### 2.1. Setup the Llama Stack Server"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "J2kGed0R5PSf",
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"collapsed": true,
"id": "J2kGed0R5PSf",
"outputId": "2478ea60-8d35-48a1-b011-f233831740c5"
},
"outputs": [],
"source": [
"import os \n",
"import subprocess\n",
"\n",
"if \"UV_SYSTEM_PYTHON\" in os.environ:\n",
" del os.environ[\"UV_SYSTEM_PYTHON\"]\n",
"\n",
"# this command installs all the dependencies needed for the llama stack server with the ollama inference provider\n",
"!uv run --with llama-stack llama stack build --template ollama --image-type venv --image-name myvenv\n",
"\n",
"def run_llama_stack_server_background():\n",
" log_file = open(\"llama_stack_server.log\", \"w\")\n",
" process = subprocess.Popen(\n",
" f\"uv run --with llama-stack llama stack run ollama --image-type venv --image-name myvenv --env INFERENCE_MODEL=llama3.2:3b\",\n",
" shell=True,\n",
" stdout=log_file,\n",
" stderr=log_file,\n",
" text=True\n",
" )\n",
" \n",
" print(f\"Starting Llama Stack server with PID: {process.pid}\")\n",
" return process\n",
"\n",
"def wait_for_server_to_start():\n",
" import requests\n",
" from requests.exceptions import ConnectionError\n",
" import time\n",
" \n",
" url = \"http://0.0.0.0:8321/v1/health\"\n",
" max_retries = 30\n",
" retry_interval = 1\n",
" \n",
" print(\"Waiting for server to start\", end=\"\")\n",
" for _ in range(max_retries):\n",
" try:\n",
" response = requests.get(url)\n",
" if response.status_code == 200:\n",
" print(\"\\nServer is ready!\")\n",
" return True\n",
" except ConnectionError:\n",
" print(\".\", end=\"\", flush=True)\n",
" time.sleep(retry_interval)\n",
" \n",
" print(\"\\nServer failed to start after\", max_retries * retry_interval, \"seconds\")\n",
" return False\n",
"\n",
"\n",
"# use this helper if needed to kill the server \n",
"def kill_llama_stack_server():\n",
" # Kill any existing llama stack server processes\n",
" os.system(\"ps aux | grep -v grep | grep llama_stack.distribution.server.server | awk '{print $2}' | xargs kill -9\")\n"
]
},
{
"cell_type": "markdown",
"id": "c40e9efd",
"metadata": {},
"source": [
"### 2.2. Start the Llama Stack Server"
]
},
{
"cell_type": "code",
"execution_count": 7,
"id": "f779283d",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Starting Llama Stack server with PID: 787100\n",
"Waiting for server to start\n",
"Server is ready!\n"
]
}
],
"source": [
"server_process = run_llama_stack_server_background()\n",
"assert wait_for_server_to_start()"
]
},
{
"cell_type": "markdown",
"id": "28477c03",
"metadata": {},
"source": [
"## Step 3: Run the demo"
]
},
{
"cell_type": "code",
"execution_count": 8,
"id": "7da71011",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"rag_tool> Ingesting document: https://www.paulgraham.com/greatwork.html\n",
"prompt> How do you do great work?\n",
"\u001b[33minference> \u001b[0m\u001b[33m[k\u001b[0m\u001b[33mnowledge\u001b[0m\u001b[33m_search\u001b[0m\u001b[33m(query\u001b[0m\u001b[33m=\"\u001b[0m\u001b[33mWhat\u001b[0m\u001b[33m is\u001b[0m\u001b[33m the\u001b[0m\u001b[33m key\u001b[0m\u001b[33m to\u001b[0m\u001b[33m doing\u001b[0m\u001b[33m great\u001b[0m\u001b[33m work\u001b[0m\u001b[33m\")]\u001b[0m\u001b[97m\u001b[0m\n",
"\u001b[32mtool_execution> Tool:knowledge_search Args:{'query': 'What is the key to doing great work'}\u001b[0m\n",
"\u001b[32mtool_execution> Tool:knowledge_search Response:[TextContentItem(text='knowledge_search tool found 5 chunks:\\nBEGIN of knowledge_search tool results.\\n', type='text'), TextContentItem(text=\"Result 1:\\nDocument_id:docum\\nContent: work. Doing great work means doing something important\\nso well that you expand people's ideas of what's possible. But\\nthere's no threshold for importance. It's a matter of degree, and\\noften hard to judge at the time anyway.\\n\", type='text'), TextContentItem(text=\"Result 2:\\nDocument_id:docum\\nContent: work. Doing great work means doing something important\\nso well that you expand people's ideas of what's possible. But\\nthere's no threshold for importance. It's a matter of degree, and\\noften hard to judge at the time anyway.\\n\", type='text'), TextContentItem(text=\"Result 3:\\nDocument_id:docum\\nContent: work. Doing great work means doing something important\\nso well that you expand people's ideas of what's possible. But\\nthere's no threshold for importance. It's a matter of degree, and\\noften hard to judge at the time anyway.\\n\", type='text'), TextContentItem(text=\"Result 4:\\nDocument_id:docum\\nContent: work. Doing great work means doing something important\\nso well that you expand people's ideas of what's possible. But\\nthere's no threshold for importance. It's a matter of degree, and\\noften hard to judge at the time anyway.\\n\", type='text'), TextContentItem(text=\"Result 5:\\nDocument_id:docum\\nContent: work. Doing great work means doing something important\\nso well that you expand people's ideas of what's possible. But\\nthere's no threshold for importance. It's a matter of degree, and\\noften hard to judge at the time anyway.\\n\", type='text'), TextContentItem(text='END of knowledge_search tool results.\\n', type='text'), TextContentItem(text='The above results were retrieved to help answer the user\\'s query: \"What is the key to doing great work\". Use them as supporting information only in answering this query.\\n', type='text')]\u001b[0m\n",
"\u001b[33minference> \u001b[0m\u001b[33mDoing\u001b[0m\u001b[33m great\u001b[0m\u001b[33m work\u001b[0m\u001b[33m means\u001b[0m\u001b[33m doing\u001b[0m\u001b[33m something\u001b[0m\u001b[33m important\u001b[0m\u001b[33m so\u001b[0m\u001b[33m well\u001b[0m\u001b[33m that\u001b[0m\u001b[33m you\u001b[0m\u001b[33m expand\u001b[0m\u001b[33m people\u001b[0m\u001b[33m's\u001b[0m\u001b[33m ideas\u001b[0m\u001b[33m of\u001b[0m\u001b[33m what\u001b[0m\u001b[33m's\u001b[0m\u001b[33m possible\u001b[0m\u001b[33m.\u001b[0m\u001b[33m However\u001b[0m\u001b[33m,\u001b[0m\u001b[33m there\u001b[0m\u001b[33m's\u001b[0m\u001b[33m no\u001b[0m\u001b[33m threshold\u001b[0m\u001b[33m for\u001b[0m\u001b[33m importance\u001b[0m\u001b[33m,\u001b[0m\u001b[33m and\u001b[0m\u001b[33m it\u001b[0m\u001b[33m's\u001b[0m\u001b[33m often\u001b[0m\u001b[33m hard\u001b[0m\u001b[33m to\u001b[0m\u001b[33m judge\u001b[0m\u001b[33m at\u001b[0m\u001b[33m the\u001b[0m\u001b[33m time\u001b[0m\u001b[33m anyway\u001b[0m\u001b[33m.\u001b[0m\u001b[33m Great\u001b[0m\u001b[33m work\u001b[0m\u001b[33m is\u001b[0m\u001b[33m a\u001b[0m\u001b[33m matter\u001b[0m\u001b[33m of\u001b[0m\u001b[33m degree\u001b[0m\u001b[33m,\u001b[0m\u001b[33m and\u001b[0m\u001b[33m it\u001b[0m\u001b[33m can\u001b[0m\u001b[33m be\u001b[0m\u001b[33m difficult\u001b[0m\u001b[33m to\u001b[0m\u001b[33m determine\u001b[0m\u001b[33m whether\u001b[0m\u001b[33m someone\u001b[0m\u001b[33m has\u001b[0m\u001b[33m done\u001b[0m\u001b[33m great\u001b[0m\u001b[33m work\u001b[0m\u001b[33m until\u001b[0m\u001b[33m after\u001b[0m\u001b[33m the\u001b[0m\u001b[33m fact\u001b[0m\u001b[33m.\u001b[0m\u001b[97m\u001b[0m\n",
"\u001b[30m\u001b[0m"
]
}
],
"source": [
"from llama_stack_client import Agent, AgentEventLogger, RAGDocument, LlamaStackClient\n",
"\n",
"vector_db_id = \"my_demo_vector_db\"\n",
"client = LlamaStackClient(base_url=\"http://0.0.0.0:8321\")\n",
"\n",
"models = client.models.list()\n",
"\n",
"# Select the first LLM and first embedding models\n",
"model_id = next(m for m in models if m.model_type == \"llm\").identifier\n",
"embedding_model_id = (\n",
" em := next(m for m in models if m.model_type == \"embedding\")\n",
").identifier\n",
"embedding_dimension = em.metadata[\"embedding_dimension\"]\n",
"\n",
"_ = client.vector_dbs.register(\n",
" vector_db_id=vector_db_id,\n",
" embedding_model=embedding_model_id,\n",
" embedding_dimension=embedding_dimension,\n",
" provider_id=\"faiss\",\n",
")\n",
"source = \"https://www.paulgraham.com/greatwork.html\"\n",
"print(\"rag_tool> Ingesting document:\", source)\n",
"document = RAGDocument(\n",
" document_id=\"document_1\",\n",
" content=source,\n",
" mime_type=\"text/html\",\n",
" metadata={},\n",
")\n",
"client.tool_runtime.rag_tool.insert(\n",
" documents=[document],\n",
" vector_db_id=vector_db_id,\n",
" chunk_size_in_tokens=50,\n",
")\n",
"agent = Agent(\n",
" client,\n",
" model=model_id,\n",
" instructions=\"You are a helpful assistant\",\n",
" tools=[\n",
" {\n",
" \"name\": \"builtin::rag/knowledge_search\",\n",
" \"args\": {\"vector_db_ids\": [vector_db_id]},\n",
" }\n",
" ],\n",
")\n",
"\n",
"prompt = \"How do you do great work?\"\n",
"print(\"prompt>\", prompt)\n",
"\n",
"response = agent.create_turn(\n",
" messages=[{\"role\": \"user\", \"content\": prompt}],\n",
" session_id=agent.create_session(\"rag_session\"),\n",
" stream=True,\n",
")\n",
"\n",
"for log in AgentEventLogger().log(response):\n",
" log.print()"
]
},
{
"cell_type": "markdown",
"id": "341aaadf",
"metadata": {},
"source": [
"Congratulations! You've successfully built your first RAG application using Llama Stack! 🎉🥳"
]
},
{
"cell_type": "markdown",
"id": "e88e1185",
"metadata": {},
"source": [
"## Next Steps"
]
},
{
"cell_type": "markdown",
"id": "bcb73600",
"metadata": {},
"source": [
"Now you're ready to dive deeper into Llama Stack!\n",
"- Explore the [Detailed Tutorial](./detailed_tutorial.md).\n",
"- Try the [Getting Started Notebook](https://github.com/meta-llama/llama-stack/blob/main/docs/getting_started.ipynb).\n",
"- Browse more [Notebooks on GitHub](https://github.com/meta-llama/llama-stack/tree/main/docs/notebooks).\n",
"- Learn about Llama Stack [Concepts](../concepts/index.md).\n",
"- Discover how to [Build Llama Stacks](../distributions/index.md).\n",
"- Refer to our [References](../references/index.md) for details on the Llama CLI and Python SDK.\n",
"- Check out the [llama-stack-apps](https://github.com/meta-llama/llama-stack-apps/tree/main/examples) repository for example applications and tutorials."
]
}
],
"metadata": {
"accelerator": "GPU",
"colab": {
"gpuType": "T4",
"provenance": []
},
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.6"
}
},
"nbformat": 4,
"nbformat_minor": 5
}

View file

@ -80,7 +80,7 @@ providers:
# provider_id is a string you can choose freely
- provider_id: ollama
# provider_type is a string that specifies the type of provider.
# in this case, the provider for inference is ollama and it is run remotely (outside of the distribution)
# in this case, the provider for inference is ollama and it runs remotely (outside of the distribution)
provider_type: remote::ollama
# config is a dictionary that contains the configuration for the provider.
# in this case, the configuration is the url of the ollama server
@ -88,7 +88,7 @@ providers:
url: ${env.OLLAMA_URL:=http://localhost:11434}
```
A few things to note:
- A _provider instance_ is identified with an (id, type, configuration) triplet.
- A _provider instance_ is identified with an (id, type, config) triplet.
- The id is a string you can choose freely.
- You can instantiate any number of provider instances of the same type.
- The configuration dictionary is provider-specific.
@ -187,7 +187,7 @@ The environment variable substitution system is type-safe:
## Resources
Finally, let's look at the `models` section:
Let's look at the `models` section:
```yaml
models:
@ -195,8 +195,9 @@ models:
model_id: ${env.INFERENCE_MODEL}
provider_id: ollama
provider_model_id: null
model_type: llm
```
A Model is an instance of a "Resource" (see [Concepts](../concepts/index)) and is associated with a specific inference provider (in this case, the provider with identifier `ollama`). This is an instance of a "pre-registered" model. While we always encourage the clients to always register models before using them, some Stack servers may come up a list of "already known and available" models.
A Model is an instance of a "Resource" (see [Concepts](../concepts/index)) and is associated with a specific inference provider (in this case, the provider with identifier `ollama`). This is an instance of a "pre-registered" model. While we always encourage the clients to register models before using them, some Stack servers may come up a list of "already known and available" models.
What's with the `provider_model_id` field? This is an identifier for the model inside the provider's model catalog. Contrast it with `model_id` which is the identifier for the same model for Llama Stack's purposes. For example, you may want to name "llama3.2:vision-11b" as "image_captioning_model" when you use it in your Stack interactions. When omitted, the server will set `provider_model_id` to be the same as `model_id`.

View file

@ -8,6 +8,8 @@ environments. You can build and test using a local server first and deploy to a
In this guide, we'll walk through how to build a RAG application locally using Llama Stack with [Ollama](https://ollama.com/)
as the inference [provider](../providers/inference/index) for a Llama Model.
**💡 Notebook Version:** You can also follow this quickstart guide in a Jupyter notebook format: [quick_start.ipynb](https://github.com/meta-llama/llama-stack/blob/main/docs/quick_start.ipynb)
#### Step 1: Install and setup
1. Install [uv](https://docs.astral.sh/uv/)
2. Run inference on a Llama model with [Ollama](https://ollama.com/download)

View file

@ -73,17 +73,26 @@ A number of "adapters" are available for some popular Inference and Vector Store
| OpenAI | Hosted |
| Anthropic | Hosted |
| Gemini | Hosted |
| WatsonX | Hosted |
**Agents API**
| **Provider** | **Environments** |
| :----: | :----: |
| Meta Reference | Single Node |
| Fireworks | Hosted |
| Together | Hosted |
| PyTorch ExecuTorch | On-device iOS |
**Vector IO API**
| **Provider** | **Environments** |
| :----: | :----: |
| FAISS | Single Node |
| SQLite-Vec| Single Node |
| SQLite-Vec | Single Node |
| Chroma | Hosted and Single Node |
| Milvus | Hosted and Single Node |
| Postgres (PGVector) | Hosted and Single Node |
| Weaviate | Hosted |
| Qdrant | Hosted and Single Node |
**Safety API**
| **Provider** | **Environments** |
@ -93,6 +102,30 @@ A number of "adapters" are available for some popular Inference and Vector Store
| Code Scanner | Single Node |
| AWS Bedrock | Hosted |
**Post Training API**
| **Provider** | **Environments** |
| :----: | :----: |
| Meta Reference | Single Node |
| HuggingFace | Single Node |
| TorchTune | Single Node |
| NVIDIA NEMO | Hosted |
**Eval API**
| **Provider** | **Environments** |
| :----: | :----: |
| Meta Reference | Single Node |
| NVIDIA NEMO | Hosted |
**Telemetry API**
| **Provider** | **Environments** |
| :----: | :----: |
| Meta Reference | Single Node |
**Tool Runtime API**
| **Provider** | **Environments** |
| :----: | :----: |
| Brave Search | Hosted |
| RAG Runtime | Single Node |
```{toctree}
:hidden:

View file

@ -16,7 +16,6 @@ Meta's reference implementation of an agent system that can use tools, access ve
```yaml
persistence_store:
type: sqlite
namespace: null
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/dummy}/agents_store.db
responses_store:
type: sqlite

View file

@ -15,7 +15,6 @@ Local filesystem-based dataset I/O provider for reading and writing datasets to
```yaml
kvstore:
type: sqlite
namespace: null
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/dummy}/localfs_datasetio.db
```

View file

@ -15,7 +15,6 @@ HuggingFace datasets provider for accessing and managing datasets from the Huggi
```yaml
kvstore:
type: sqlite
namespace: null
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/dummy}/huggingface_datasetio.db
```

View file

@ -15,7 +15,6 @@ Meta's reference implementation of evaluation tasks with support for multiple la
```yaml
kvstore:
type: sqlite
namespace: null
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/dummy}/meta_reference_eval.db
```

View file

@ -1,9 +1,10 @@
# Providers Overview
The goal of Llama Stack is to build an ecosystem where users can easily swap out different implementations for the same API. Examples for these include:
- LLM inference providers (e.g., Ollama, Fireworks, Together, AWS Bedrock, Groq, Cerebras, SambaNova, vLLM, etc.),
- Vector databases (e.g., ChromaDB, Weaviate, Qdrant, Milvus, FAISS, PGVector, SQLite-Vec, etc.),
- Safety providers (e.g., Meta's Llama Guard, AWS Bedrock Guardrails, etc.)
- LLM inference providers (e.g., Meta Reference, Ollama, Fireworks, Together, AWS Bedrock, Groq, Cerebras, SambaNova, vLLM, OpenAI, Anthropic, Gemini, WatsonX, etc.),
- Vector databases (e.g., FAISS, SQLite-Vec, ChromaDB, Weaviate, Qdrant, Milvus, PGVector, etc.),
- Safety providers (e.g., Meta's Llama Guard, Prompt Guard, Code Scanner, AWS Bedrock Guardrails, etc.),
- Tool Runtime providers (e.g., RAG Runtime, Brave Search, etc.)
Providers come in two flavors:
- **Remote**: the provider runs as a separate service external to the Llama Stack codebase. Llama Stack contains a small amount of adapter code.

View file

@ -44,7 +44,6 @@ more details about Faiss in general.
```yaml
kvstore:
type: sqlite
namespace: null
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/dummy}/faiss_store.db
```

View file

@ -15,7 +15,6 @@ Meta's reference implementation of a vector database.
```yaml
kvstore:
type: sqlite
namespace: null
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/dummy}/faiss_store.db
```

View file

@ -19,7 +19,6 @@ Please refer to the remote provider documentation.
db_path: ${env.MILVUS_DB_PATH:=~/.llama/dummy}/milvus.db
kvstore:
type: sqlite
namespace: null
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/dummy}/milvus_registry.db
```

View file

@ -36,7 +36,7 @@
"from dotenv import load_dotenv\n",
"from llama_stack_client import LlamaStackClient\n",
"from llama_stack_client.lib.agents.agent import Agent\n",
"from llama_stack_client.lib.agents.custom_tool import CustomTool\n",
"from llama_stack_client.lib.agents.client_tool import ClientTool\n",
"from llama_stack_client.lib.agents.event_logger import EventLogger\n",
"from llama_stack_client.types import CompletionMessage\n",
"from llama_stack_client.types.agent_create_params import AgentConfig\n",
@ -129,7 +129,7 @@
"source": [
"## Step 3: Create a Custom Tool Class\n",
"\n",
"Here, we defines the `WebSearchTool` class, which extends `CustomTool` to integrate the Brave Search API with Llama Stack, enabling web search capabilities within AI workflows. The class handles incoming user queries, interacts with the `BraveSearch` class for data retrieval, and formats results for effective response generation."
"Here, we defines the `WebSearchTool` class, which extends `ClientTool` to integrate the Brave Search API with Llama Stack, enabling web search capabilities within AI workflows. The class handles incoming user queries, interacts with the `BraveSearch` class for data retrieval, and formats results for effective response generation."
]
},
{
@ -139,7 +139,7 @@
"metadata": {},
"outputs": [],
"source": [
"class WebSearchTool(CustomTool):\n",
"class WebSearchTool(ClientTool):\n",
" def __init__(self, api_key: str):\n",
" self.api_key = api_key\n",
" self.engine = BraveSearch(api_key)\n",

View file

@ -45,7 +45,7 @@
},
{
"cell_type": "code",
"execution_count": 1,
"execution_count": 14,
"metadata": {},
"outputs": [],
"source": [
@ -65,7 +65,7 @@
"from dotenv import load_dotenv\n",
"\n",
"load_dotenv()\n",
"BRAVE_SEARCH_API_KEY = os.environ[\"BRAVE_SEARCH_API_KEY\"]\n"
"TAVILY_SEARCH_API_KEY = os.environ[\"TAVILY_SEARCH_API_KEY\"]\n"
]
},
{
@ -110,10 +110,17 @@
"from llama_stack_client import LlamaStackClient\n",
"from llama_stack_client.lib.agents.agent import Agent\n",
"from llama_stack_client.lib.agents.event_logger import EventLogger\n",
"from llama_stack_client.types import UserMessage\n",
"from typing import cast, Iterator\n",
"\n",
"\n",
"async def agent_example():\n",
" client = LlamaStackClient(base_url=f\"http://{HOST}:{PORT}\")\n",
" client = LlamaStackClient(\n",
" base_url=f\"http://{HOST}:{PORT}\",\n",
" provider_data={\n",
" \"tavily_search_api_key\": TAVILY_SEARCH_API_KEY,\n",
" }\n",
" )\n",
" agent = Agent(\n",
" client,\n",
" model=MODEL_NAME,\n",
@ -123,13 +130,7 @@
" \"type\": \"greedy\",\n",
" },\n",
" },\n",
" tools=[\n",
" {\n",
" \"type\": \"brave_search\",\n",
" \"engine\": \"brave\",\n",
" \"api_key\": BRAVE_SEARCH_API_KEY,\n",
" }\n",
" ],\n",
" tools=[\"builtin::websearch\"],\n",
" )\n",
" session_id = agent.create_session(\"test-session\")\n",
" print(f\"Created session_id={session_id} for Agent({agent.agent_id})\")\n",
@ -142,15 +143,13 @@
" for prompt in user_prompts:\n",
" response = agent.create_turn(\n",
" messages=[\n",
" {\n",
" \"role\": \"user\",\n",
" \"content\": prompt,\n",
" }\n",
" UserMessage(role=\"user\", content=prompt)\n",
" ],\n",
" session_id=session_id,\n",
" stream=True,\n",
" )\n",
"\n",
" async for log in EventLogger().log(response):\n",
" for log in EventLogger().log(cast(Iterator, response)):\n",
" log.print()\n",
"\n",
"\n",

View file

@ -2,9 +2,9 @@
Llama Stack defines and standardizes the set of core building blocks needed to bring generative AI applications to market. These building blocks are presented in the form of interoperable APIs with a broad set of Providers providing their implementations. These building blocks are assembled into Distributions which are easy for developers to get from zero to production.
This guide will walk you through an end-to-end workflow with Llama Stack with Ollama as the inference provider and ChromaDB as the memory provider. Please note the steps for configuring your provider and distribution will vary a little depending on the services you use. However, the user experience will remain universal - this is the power of Llama-Stack.
This guide will walk you through an end-to-end workflow with Llama Stack with Ollama as the inference provider and ChromaDB as the VectorIO provider. Please note the steps for configuring your provider and distribution will vary depending on the services you use. However, the user experience will remain universal - this is the power of Llama-Stack.
If you're looking for more specific topics, we have a [Zero to Hero Guide](#next-steps) that covers everything from Tool Calling to Agents in detail. Feel free to skip to the end to explore the advanced topics you're interested in.
If you're looking for more specific topics, we have a [Zero to Hero Guide](#next-steps) that covers everything from 'Tool Calling' to 'Agents' in detail. Feel free to skip to the end to explore the advanced topics you're interested in.
> If you'd prefer not to set up a local server, explore our notebook on [tool calling with the Together API](Tool_Calling101_Using_Together_Llama_Stack_Server.ipynb). This notebook will show you how to leverage together.ai's Llama Stack Server API, allowing you to get started with Llama Stack without the need for a locally built and running server.
@ -26,15 +26,15 @@ If you're looking for more specific topics, we have a [Zero to Hero Guide](#next
- Follow instructions based on the OS you are on. For example, if you are on a Mac, download and unzip `Ollama-darwin.zip`.
- Run the `Ollama` application.
1. **Download the Ollama CLI**:
2. **Download the Ollama CLI**:
Ensure you have the `ollama` command line tool by downloading and installing it from the same website.
1. **Start ollama server**:
3. **Start ollama server**:
Open the terminal and run:
```
```bash
ollama serve
```
1. **Run the model**:
4. **Run the model**:
Open the terminal and run:
```bash
ollama run llama3.2:3b-instruct-fp16 --keepalive -1m
@ -48,9 +48,9 @@ If you're looking for more specific topics, we have a [Zero to Hero Guide](#next
## Install Dependencies and Set Up Environment
1. **Create a Conda Environment**:
Create a new Conda environment with Python 3.10:
Create a new Conda environment with Python 3.12:
```bash
conda create -n ollama python=3.10
conda create -n ollama python=3.12
```
Activate the environment:
```bash

View file

@ -101,7 +101,7 @@ class MetricInResponse(BaseModel):
# This is a short term solution to allow inference API to return metrics
# The ideal way to do this is to have a way for all response types to include metrics
# and all metric events logged to the telemetry API to be inlcuded with the response
# and all metric events logged to the telemetry API to be included with the response
# To do this, we will need to augment all response types with a metrics field.
# We have hit a blocker from stainless SDK that prevents us from doing this.
# The blocker is that if we were to augment the response types that have a data field

View file

@ -106,4 +106,26 @@ def is_action_allowed(
class AccessDeniedError(RuntimeError):
pass
def __init__(self, action: str | None = None, resource: ProtectedResource | None = None, user: User | None = None):
self.action = action
self.resource = resource
self.user = user
message = _build_access_denied_message(action, resource, user)
super().__init__(message)
def _build_access_denied_message(action: str | None, resource: ProtectedResource | None, user: User | None) -> str:
"""Build detailed error message for access denied scenarios."""
if action and resource and user:
resource_info = f"{resource.type}::{resource.identifier}"
user_info = f"'{user.principal}'"
if user.attributes:
attrs = ", ".join([f"{k}={v}" for k, v in user.attributes.items()])
user_info += f" (attributes: {attrs})"
message = f"User {user_info} cannot perform action '{action}' on resource '{resource_info}'"
else:
message = "Insufficient permissions"
return message

View file

@ -17,6 +17,7 @@ from llama_stack.distribution.distribution import (
builtin_automatically_routed_apis,
get_provider_registry,
)
from llama_stack.distribution.stack import replace_env_vars
from llama_stack.distribution.utils.config_dirs import EXTERNAL_PROVIDERS_DIR
from llama_stack.distribution.utils.dynamic import instantiate_class_type
from llama_stack.distribution.utils.prompt_for_config import prompt_for_config
@ -163,7 +164,7 @@ def upgrade_from_routing_table(
def parse_and_maybe_upgrade_config(config_dict: dict[str, Any]) -> StackRunConfig:
version = config_dict.get("version", None)
if version == LLAMA_STACK_RUN_CONFIG_VERSION:
return StackRunConfig(**config_dict)
return StackRunConfig(**replace_env_vars(config_dict))
if "routing_table" in config_dict:
logger.info("Upgrading config...")
@ -174,4 +175,4 @@ def parse_and_maybe_upgrade_config(config_dict: dict[str, Any]) -> StackRunConfi
if not config_dict.get("external_providers_dir", None):
config_dict["external_providers_dir"] = EXTERNAL_PROVIDERS_DIR
return StackRunConfig(**config_dict)
return StackRunConfig(**replace_env_vars(config_dict))

View file

@ -175,8 +175,9 @@ class CommonRoutingTableImpl(RoutingTable):
return obj
async def unregister_object(self, obj: RoutableObjectWithProvider) -> None:
if not is_action_allowed(self.policy, "delete", obj, get_authenticated_user()):
raise AccessDeniedError()
user = get_authenticated_user()
if not is_action_allowed(self.policy, "delete", obj, user):
raise AccessDeniedError("delete", obj, user)
await self.dist_registry.delete(obj.type, obj.identifier)
await unregister_object_from_provider(obj, self.impls_by_provider_id[obj.provider_id])
@ -193,7 +194,7 @@ class CommonRoutingTableImpl(RoutingTable):
# If object supports access control but no attributes set, use creator's attributes
creator = get_authenticated_user()
if not is_action_allowed(self.policy, "create", obj, creator):
raise AccessDeniedError()
raise AccessDeniedError("create", obj, creator)
if creator:
obj.owner = creator
logger.info(f"Setting owner for {obj.type} '{obj.identifier}' to {obj.owner.principal}")

View file

@ -9,6 +9,7 @@ import asyncio
import functools
import inspect
import json
import logging
import os
import ssl
import sys
@ -31,6 +32,7 @@ from openai import BadRequestError
from pydantic import BaseModel, ValidationError
from llama_stack.apis.common.responses import PaginatedResponse
from llama_stack.distribution.access_control.access_control import AccessDeniedError
from llama_stack.distribution.datatypes import AuthenticationRequiredError, LoggingConfig, StackRunConfig
from llama_stack.distribution.distribution import builtin_automatically_routed_apis
from llama_stack.distribution.request_headers import PROVIDER_DATA_VAR, User, request_provider_data_context
@ -116,7 +118,7 @@ def translate_exception(exc: Exception) -> HTTPException | RequestValidationErro
return HTTPException(status_code=400, detail=f"Invalid value: {str(exc)}")
elif isinstance(exc, BadRequestError):
return HTTPException(status_code=400, detail=str(exc))
elif isinstance(exc, PermissionError):
elif isinstance(exc, PermissionError | AccessDeniedError):
return HTTPException(status_code=403, detail=f"Permission denied: {str(exc)}")
elif isinstance(exc, asyncio.TimeoutError | TimeoutError):
return HTTPException(status_code=504, detail=f"Operation timed out: {str(exc)}")
@ -236,7 +238,10 @@ def create_dynamic_typed_route(func: Any, method: str, route: str) -> Callable:
result.url = route
return result
except Exception as e:
if logger.isEnabledFor(logging.DEBUG):
logger.exception(f"Error executing endpoint {route=} {method=}")
else:
logger.error(f"Error executing endpoint {route=} {method=}: {str(e)}")
raise translate_exception(e) from e
sig = inspect.signature(func)

View file

@ -10,11 +10,11 @@ from typing import Protocol
import pydantic
from llama_stack.distribution.datatypes import KVStoreConfig, RoutableObjectWithProvider
from llama_stack.distribution.datatypes import RoutableObjectWithProvider
from llama_stack.distribution.utils.config_dirs import DISTRIBS_BASE_DIR
from llama_stack.log import get_logger
from llama_stack.providers.utils.kvstore import KVStore, kvstore_impl
from llama_stack.providers.utils.kvstore.config import SqliteKVStoreConfig
from llama_stack.providers.utils.kvstore.config import KVStoreConfig, SqliteKVStoreConfig
logger = get_logger(__name__, category="core")

View file

@ -53,7 +53,7 @@ class AgentPersistence:
identifier=name, # should this be qualified in any way?
)
if not is_action_allowed(self.policy, "create", session_info, user):
raise AccessDeniedError()
raise AccessDeniedError("create", session_info, user)
await self.kvstore.set(
key=f"session:{self.agent_id}:{session_id}",

View file

@ -23,6 +23,7 @@ def available_providers() -> list[ProviderSpec]:
"pillow",
"pandas",
"scikit-learn",
"mcp",
]
+ kvstore_dependencies(), # TODO make this dynamic based on the kvstore config
module="llama_stack.providers.inline.agents.meta_reference",

View file

@ -36,15 +36,14 @@ class RedisKVStoreConfig(CommonConfig):
def url(self) -> str:
return f"redis://{self.host}:{self.port}"
@property
def pip_packages(self) -> list[str]:
@classmethod
def pip_packages(cls) -> list[str]:
return ["redis"]
@classmethod
def sample_run_config(cls):
return {
"type": "redis",
"namespace": None,
"host": "${env.REDIS_HOST:=localhost}",
"port": "${env.REDIS_PORT:=6379}",
}
@ -57,15 +56,14 @@ class SqliteKVStoreConfig(CommonConfig):
description="File path for the sqlite database",
)
@property
def pip_packages(self) -> list[str]:
@classmethod
def pip_packages(cls) -> list[str]:
return ["aiosqlite"]
@classmethod
def sample_run_config(cls, __distro_dir__: str, db_name: str = "kvstore.db"):
return {
"type": "sqlite",
"namespace": None,
"db_path": "${env.SQLITE_STORE_DIR:=" + __distro_dir__ + "}/" + db_name,
}
@ -73,7 +71,7 @@ class SqliteKVStoreConfig(CommonConfig):
class PostgresKVStoreConfig(CommonConfig):
type: Literal[KVStoreType.postgres.value] = KVStoreType.postgres.value
host: str = "localhost"
port: str = "5432"
port: int = 5432
db: str = "llamastack"
user: str
password: str | None = None
@ -83,7 +81,6 @@ class PostgresKVStoreConfig(CommonConfig):
def sample_run_config(cls, table_name: str = "llamastack_kvstore", **kwargs):
return {
"type": "postgres",
"namespace": None,
"host": "${env.POSTGRES_HOST:=localhost}",
"port": "${env.POSTGRES_PORT:=5432}",
"db": "${env.POSTGRES_DB:=llamastack}",
@ -108,8 +105,8 @@ class PostgresKVStoreConfig(CommonConfig):
raise ValueError("Table name must be less than 63 characters")
return v
@property
def pip_packages(self) -> list[str]:
@classmethod
def pip_packages(cls) -> list[str]:
return ["psycopg2-binary"]
@ -122,15 +119,14 @@ class MongoDBKVStoreConfig(CommonConfig):
password: str | None = None
collection_name: str = "llamastack_kvstore"
@property
def pip_packages(self) -> list[str]:
@classmethod
def pip_packages(cls) -> list[str]:
return ["pymongo"]
@classmethod
def sample_run_config(cls, collection_name: str = "llamastack_kvstore"):
return {
"type": "mongodb",
"namespace": None,
"host": "${env.MONGODB_HOST:=localhost}",
"port": "${env.MONGODB_PORT:=5432}",
"db": "${env.MONGODB_DB}",
@ -144,3 +140,21 @@ KVStoreConfig = Annotated[
RedisKVStoreConfig | SqliteKVStoreConfig | PostgresKVStoreConfig | MongoDBKVStoreConfig,
Field(discriminator="type", default=KVStoreType.sqlite.value),
]
def get_pip_packages(store_config: dict | KVStoreConfig) -> list[str]:
"""Get pip packages for KV store config, handling both dict and object cases."""
if isinstance(store_config, dict):
store_type = store_config.get("type")
if store_type == "sqlite":
return SqliteKVStoreConfig.pip_packages()
elif store_type == "postgres":
return PostgresKVStoreConfig.pip_packages()
elif store_type == "redis":
return RedisKVStoreConfig.pip_packages()
elif store_type == "mongodb":
return MongoDBKVStoreConfig.pip_packages()
else:
raise ValueError(f"Unknown KV store type: {store_type}")
else:
return store_config.pip_packages()

View file

@ -30,8 +30,8 @@ class SqlAlchemySqlStoreConfig(BaseModel):
def engine_str(self) -> str: ...
# TODO: move this when we have a better way to specify dependencies with internal APIs
@property
def pip_packages(self) -> list[str]:
@classmethod
def pip_packages(cls) -> list[str]:
return ["sqlalchemy[asyncio]"]
@ -48,20 +48,20 @@ class SqliteSqlStoreConfig(SqlAlchemySqlStoreConfig):
@classmethod
def sample_run_config(cls, __distro_dir__: str, db_name: str = "sqlstore.db"):
return cls(
type="sqlite",
db_path="${env.SQLITE_STORE_DIR:=" + __distro_dir__ + "}/" + db_name,
)
return {
"type": "sqlite",
"db_path": "${env.SQLITE_STORE_DIR:=" + __distro_dir__ + "}/" + db_name,
}
@property
def pip_packages(self) -> list[str]:
return super().pip_packages + ["aiosqlite"]
@classmethod
def pip_packages(cls) -> list[str]:
return super().pip_packages() + ["aiosqlite"]
class PostgresSqlStoreConfig(SqlAlchemySqlStoreConfig):
type: Literal["postgres"] = SqlStoreType.postgres.value
host: str = "localhost"
port: str = "5432"
port: int = 5432
db: str = "llamastack"
user: str
password: str | None = None
@ -70,20 +70,20 @@ class PostgresSqlStoreConfig(SqlAlchemySqlStoreConfig):
def engine_str(self) -> str:
return f"postgresql+asyncpg://{self.user}:{self.password}@{self.host}:{self.port}/{self.db}"
@property
def pip_packages(self) -> list[str]:
return super().pip_packages + ["asyncpg"]
@classmethod
def pip_packages(cls) -> list[str]:
return super().pip_packages() + ["asyncpg"]
@classmethod
def sample_run_config(cls, **kwargs):
return cls(
type="postgres",
host="${env.POSTGRES_HOST:=localhost}",
port="${env.POSTGRES_PORT:=5432}",
db="${env.POSTGRES_DB:=llamastack}",
user="${env.POSTGRES_USER:=llamastack}",
password="${env.POSTGRES_PASSWORD:=llamastack}",
)
return {
"type": "postgres",
"host": "${env.POSTGRES_HOST:=localhost}",
"port": "${env.POSTGRES_PORT:=5432}",
"db": "${env.POSTGRES_DB:=llamastack}",
"user": "${env.POSTGRES_USER:=llamastack}",
"password": "${env.POSTGRES_PASSWORD:=llamastack}",
}
SqlStoreConfig = Annotated[
@ -92,6 +92,20 @@ SqlStoreConfig = Annotated[
]
def get_pip_packages(store_config: dict | SqlStoreConfig) -> list[str]:
"""Get pip packages for SQL store config, handling both dict and object cases."""
if isinstance(store_config, dict):
store_type = store_config.get("type")
if store_type == "sqlite":
return SqliteSqlStoreConfig.pip_packages()
elif store_type == "postgres":
return PostgresSqlStoreConfig.pip_packages()
else:
raise ValueError(f"Unknown SQL store type: {store_type}")
else:
return store_config.pip_packages()
def sqlstore_impl(config: SqlStoreConfig) -> SqlStore:
if config.type in [SqlStoreType.sqlite.value, SqlStoreType.postgres.value]:
from .sqlalchemy_sqlstore import SqlAlchemySqlStoreImpl

View file

@ -9,6 +9,11 @@ import uuid
def generate_chunk_id(document_id: str, chunk_text: str) -> str:
"""Generate a unique chunk ID using a hash of document ID and chunk text."""
"""
Generate a unique chunk ID using a hash of the document ID and chunk text.
Note: MD5 is used only to calculate an identifier, not for security purposes.
Adding usedforsecurity=False for compatibility with FIPS environments.
"""
hash_input = f"{document_id}:{chunk_text}".encode()
return str(uuid.UUID(hashlib.md5(hash_input).hexdigest()))
return str(uuid.UUID(hashlib.md5(hash_input, usedforsecurity=False).hexdigest()))

View file

@ -21,7 +21,6 @@ providers:
config:
kvstore:
type: sqlite
namespace: null
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/bedrock}/faiss_store.db
safety:
- provider_id: bedrock
@ -33,7 +32,6 @@ providers:
config:
persistence_store:
type: sqlite
namespace: null
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/bedrock}/agents_store.db
responses_store:
type: sqlite
@ -51,7 +49,6 @@ providers:
config:
kvstore:
type: sqlite
namespace: null
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/bedrock}/meta_reference_eval.db
datasetio:
- provider_id: huggingface
@ -59,14 +56,12 @@ providers:
config:
kvstore:
type: sqlite
namespace: null
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/bedrock}/huggingface_datasetio.db
- provider_id: localfs
provider_type: inline::localfs
config:
kvstore:
type: sqlite
namespace: null
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/bedrock}/localfs_datasetio.db
scoring:
- provider_id: basic

View file

@ -31,7 +31,6 @@ providers:
config:
kvstore:
type: sqlite
namespace: null
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/cerebras}/faiss_store.db
agents:
- provider_id: meta-reference
@ -39,7 +38,6 @@ providers:
config:
persistence_store:
type: sqlite
namespace: null
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/cerebras}/agents_store.db
responses_store:
type: sqlite
@ -50,7 +48,6 @@ providers:
config:
kvstore:
type: sqlite
namespace: null
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/cerebras}/meta_reference_eval.db
datasetio:
- provider_id: huggingface
@ -58,14 +55,12 @@ providers:
config:
kvstore:
type: sqlite
namespace: null
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/cerebras}/huggingface_datasetio.db
- provider_id: localfs
provider_type: inline::localfs
config:
kvstore:
type: sqlite
namespace: null
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/cerebras}/localfs_datasetio.db
scoring:
- provider_id: basic

View file

@ -36,7 +36,6 @@ providers:
config:
persistence_store:
type: sqlite
namespace: null
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ci-tests}/agents_store.db
responses_store:
type: sqlite
@ -54,7 +53,6 @@ providers:
config:
kvstore:
type: sqlite
namespace: null
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ci-tests}/meta_reference_eval.db
datasetio:
- provider_id: huggingface
@ -62,14 +60,12 @@ providers:
config:
kvstore:
type: sqlite
namespace: null
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ci-tests}/huggingface_datasetio.db
- provider_id: localfs
provider_type: inline::localfs
config:
kvstore:
type: sqlite
namespace: null
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ci-tests}/localfs_datasetio.db
scoring:
- provider_id: basic

View file

@ -39,7 +39,6 @@ providers:
config:
persistence_store:
type: sqlite
namespace: null
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/dell}/agents_store.db
responses_store:
type: sqlite
@ -57,7 +56,6 @@ providers:
config:
kvstore:
type: sqlite
namespace: null
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/dell}/meta_reference_eval.db
datasetio:
- provider_id: huggingface
@ -65,14 +63,12 @@ providers:
config:
kvstore:
type: sqlite
namespace: null
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/dell}/huggingface_datasetio.db
- provider_id: localfs
provider_type: inline::localfs
config:
kvstore:
type: sqlite
namespace: null
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/dell}/localfs_datasetio.db
scoring:
- provider_id: basic

View file

@ -35,7 +35,6 @@ providers:
config:
persistence_store:
type: sqlite
namespace: null
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/dell}/agents_store.db
responses_store:
type: sqlite
@ -53,7 +52,6 @@ providers:
config:
kvstore:
type: sqlite
namespace: null
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/dell}/meta_reference_eval.db
datasetio:
- provider_id: huggingface
@ -61,14 +59,12 @@ providers:
config:
kvstore:
type: sqlite
namespace: null
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/dell}/huggingface_datasetio.db
- provider_id: localfs
provider_type: inline::localfs
config:
kvstore:
type: sqlite
namespace: null
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/dell}/localfs_datasetio.db
scoring:
- provider_id: basic

View file

@ -27,7 +27,6 @@ providers:
config:
kvstore:
type: sqlite
namespace: null
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/fireworks}/faiss_store.db
safety:
- provider_id: llama-guard
@ -45,7 +44,6 @@ providers:
config:
persistence_store:
type: sqlite
namespace: null
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/fireworks}/agents_store.db
responses_store:
type: sqlite
@ -63,7 +61,6 @@ providers:
config:
kvstore:
type: sqlite
namespace: null
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/fireworks}/meta_reference_eval.db
datasetio:
- provider_id: huggingface
@ -71,14 +68,12 @@ providers:
config:
kvstore:
type: sqlite
namespace: null
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/fireworks}/huggingface_datasetio.db
- provider_id: localfs
provider_type: inline::localfs
config:
kvstore:
type: sqlite
namespace: null
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/fireworks}/localfs_datasetio.db
scoring:
- provider_id: basic

View file

@ -27,7 +27,6 @@ providers:
config:
kvstore:
type: sqlite
namespace: null
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/fireworks}/faiss_store.db
safety:
- provider_id: llama-guard
@ -40,7 +39,6 @@ providers:
config:
persistence_store:
type: sqlite
namespace: null
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/fireworks}/agents_store.db
responses_store:
type: sqlite
@ -58,7 +56,6 @@ providers:
config:
kvstore:
type: sqlite
namespace: null
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/fireworks}/meta_reference_eval.db
datasetio:
- provider_id: huggingface
@ -66,14 +63,12 @@ providers:
config:
kvstore:
type: sqlite
namespace: null
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/fireworks}/huggingface_datasetio.db
- provider_id: localfs
provider_type: inline::localfs
config:
kvstore:
type: sqlite
namespace: null
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/fireworks}/localfs_datasetio.db
scoring:
- provider_id: basic

View file

@ -26,7 +26,6 @@ providers:
config:
kvstore:
type: sqlite
namespace: null
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/groq}/faiss_store.db
safety:
- provider_id: llama-guard
@ -39,7 +38,6 @@ providers:
config:
persistence_store:
type: sqlite
namespace: null
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/groq}/agents_store.db
responses_store:
type: sqlite
@ -57,7 +55,6 @@ providers:
config:
kvstore:
type: sqlite
namespace: null
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/groq}/meta_reference_eval.db
datasetio:
- provider_id: huggingface
@ -65,14 +62,12 @@ providers:
config:
kvstore:
type: sqlite
namespace: null
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/groq}/huggingface_datasetio.db
- provider_id: localfs
provider_type: inline::localfs
config:
kvstore:
type: sqlite
namespace: null
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/groq}/localfs_datasetio.db
scoring:
- provider_id: basic

View file

@ -31,7 +31,6 @@ providers:
config:
kvstore:
type: sqlite
namespace: null
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/hf-endpoint}/faiss_store.db
safety:
- provider_id: llama-guard
@ -44,7 +43,6 @@ providers:
config:
persistence_store:
type: sqlite
namespace: null
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/hf-endpoint}/agents_store.db
responses_store:
type: sqlite
@ -62,7 +60,6 @@ providers:
config:
kvstore:
type: sqlite
namespace: null
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/hf-endpoint}/meta_reference_eval.db
datasetio:
- provider_id: huggingface
@ -70,14 +67,12 @@ providers:
config:
kvstore:
type: sqlite
namespace: null
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/hf-endpoint}/huggingface_datasetio.db
- provider_id: localfs
provider_type: inline::localfs
config:
kvstore:
type: sqlite
namespace: null
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/hf-endpoint}/localfs_datasetio.db
scoring:
- provider_id: basic

View file

@ -26,7 +26,6 @@ providers:
config:
kvstore:
type: sqlite
namespace: null
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/hf-endpoint}/faiss_store.db
safety:
- provider_id: llama-guard
@ -39,7 +38,6 @@ providers:
config:
persistence_store:
type: sqlite
namespace: null
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/hf-endpoint}/agents_store.db
responses_store:
type: sqlite
@ -57,7 +55,6 @@ providers:
config:
kvstore:
type: sqlite
namespace: null
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/hf-endpoint}/meta_reference_eval.db
datasetio:
- provider_id: huggingface
@ -65,14 +62,12 @@ providers:
config:
kvstore:
type: sqlite
namespace: null
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/hf-endpoint}/huggingface_datasetio.db
- provider_id: localfs
provider_type: inline::localfs
config:
kvstore:
type: sqlite
namespace: null
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/hf-endpoint}/localfs_datasetio.db
scoring:
- provider_id: basic

View file

@ -31,7 +31,6 @@ providers:
config:
kvstore:
type: sqlite
namespace: null
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/hf-serverless}/faiss_store.db
safety:
- provider_id: llama-guard
@ -44,7 +43,6 @@ providers:
config:
persistence_store:
type: sqlite
namespace: null
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/hf-serverless}/agents_store.db
responses_store:
type: sqlite
@ -62,7 +60,6 @@ providers:
config:
kvstore:
type: sqlite
namespace: null
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/hf-serverless}/meta_reference_eval.db
datasetio:
- provider_id: huggingface
@ -70,14 +67,12 @@ providers:
config:
kvstore:
type: sqlite
namespace: null
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/hf-serverless}/huggingface_datasetio.db
- provider_id: localfs
provider_type: inline::localfs
config:
kvstore:
type: sqlite
namespace: null
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/hf-serverless}/localfs_datasetio.db
scoring:
- provider_id: basic

View file

@ -26,7 +26,6 @@ providers:
config:
kvstore:
type: sqlite
namespace: null
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/hf-serverless}/faiss_store.db
safety:
- provider_id: llama-guard
@ -39,7 +38,6 @@ providers:
config:
persistence_store:
type: sqlite
namespace: null
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/hf-serverless}/agents_store.db
responses_store:
type: sqlite
@ -57,7 +55,6 @@ providers:
config:
kvstore:
type: sqlite
namespace: null
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/hf-serverless}/meta_reference_eval.db
datasetio:
- provider_id: huggingface
@ -65,14 +62,12 @@ providers:
config:
kvstore:
type: sqlite
namespace: null
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/hf-serverless}/huggingface_datasetio.db
- provider_id: localfs
provider_type: inline::localfs
config:
kvstore:
type: sqlite
namespace: null
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/hf-serverless}/localfs_datasetio.db
scoring:
- provider_id: basic

View file

@ -48,7 +48,6 @@ providers:
config:
persistence_store:
type: sqlite
namespace: null
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/llama_api}/agents_store.db
responses_store:
type: sqlite
@ -66,7 +65,6 @@ providers:
config:
kvstore:
type: sqlite
namespace: null
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/llama_api}/meta_reference_eval.db
datasetio:
- provider_id: huggingface
@ -74,14 +72,12 @@ providers:
config:
kvstore:
type: sqlite
namespace: null
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/llama_api}/huggingface_datasetio.db
- provider_id: localfs
provider_type: inline::localfs
config:
kvstore:
type: sqlite
namespace: null
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/llama_api}/localfs_datasetio.db
scoring:
- provider_id: basic

View file

@ -41,7 +41,6 @@ providers:
config:
kvstore:
type: sqlite
namespace: null
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/meta-reference-gpu}/faiss_store.db
safety:
- provider_id: llama-guard
@ -54,7 +53,6 @@ providers:
config:
persistence_store:
type: sqlite
namespace: null
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/meta-reference-gpu}/agents_store.db
responses_store:
type: sqlite
@ -72,7 +70,6 @@ providers:
config:
kvstore:
type: sqlite
namespace: null
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/meta-reference-gpu}/meta_reference_eval.db
datasetio:
- provider_id: huggingface
@ -80,14 +77,12 @@ providers:
config:
kvstore:
type: sqlite
namespace: null
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/meta-reference-gpu}/huggingface_datasetio.db
- provider_id: localfs
provider_type: inline::localfs
config:
kvstore:
type: sqlite
namespace: null
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/meta-reference-gpu}/localfs_datasetio.db
scoring:
- provider_id: basic

View file

@ -31,7 +31,6 @@ providers:
config:
kvstore:
type: sqlite
namespace: null
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/meta-reference-gpu}/faiss_store.db
safety:
- provider_id: llama-guard
@ -44,7 +43,6 @@ providers:
config:
persistence_store:
type: sqlite
namespace: null
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/meta-reference-gpu}/agents_store.db
responses_store:
type: sqlite
@ -62,7 +60,6 @@ providers:
config:
kvstore:
type: sqlite
namespace: null
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/meta-reference-gpu}/meta_reference_eval.db
datasetio:
- provider_id: huggingface
@ -70,14 +67,12 @@ providers:
config:
kvstore:
type: sqlite
namespace: null
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/meta-reference-gpu}/huggingface_datasetio.db
- provider_id: localfs
provider_type: inline::localfs
config:
kvstore:
type: sqlite
namespace: null
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/meta-reference-gpu}/localfs_datasetio.db
scoring:
- provider_id: basic

View file

@ -30,7 +30,6 @@ providers:
config:
kvstore:
type: sqlite
namespace: null
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/nvidia}/faiss_store.db
safety:
- provider_id: nvidia
@ -44,7 +43,6 @@ providers:
config:
persistence_store:
type: sqlite
namespace: null
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/nvidia}/agents_store.db
responses_store:
type: sqlite
@ -75,7 +73,6 @@ providers:
config:
kvstore:
type: sqlite
namespace: null
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/nvidia}/localfs_datasetio.db
- provider_id: nvidia
provider_type: remote::nvidia

View file

@ -25,7 +25,6 @@ providers:
config:
kvstore:
type: sqlite
namespace: null
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/nvidia}/faiss_store.db
safety:
- provider_id: nvidia
@ -39,7 +38,6 @@ providers:
config:
persistence_store:
type: sqlite
namespace: null
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/nvidia}/agents_store.db
responses_store:
type: sqlite

View file

@ -25,7 +25,6 @@ providers:
config:
kvstore:
type: sqlite
namespace: null
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ollama}/faiss_store.db
safety:
- provider_id: llama-guard
@ -40,7 +39,6 @@ providers:
config:
persistence_store:
type: sqlite
namespace: null
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ollama}/agents_store.db
responses_store:
type: sqlite
@ -58,7 +56,6 @@ providers:
config:
kvstore:
type: sqlite
namespace: null
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ollama}/meta_reference_eval.db
datasetio:
- provider_id: huggingface
@ -66,14 +63,12 @@ providers:
config:
kvstore:
type: sqlite
namespace: null
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ollama}/huggingface_datasetio.db
- provider_id: localfs
provider_type: inline::localfs
config:
kvstore:
type: sqlite
namespace: null
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ollama}/localfs_datasetio.db
scoring:
- provider_id: basic

View file

@ -25,7 +25,6 @@ providers:
config:
kvstore:
type: sqlite
namespace: null
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ollama}/faiss_store.db
safety:
- provider_id: llama-guard
@ -38,7 +37,6 @@ providers:
config:
persistence_store:
type: sqlite
namespace: null
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ollama}/agents_store.db
responses_store:
type: sqlite
@ -56,7 +54,6 @@ providers:
config:
kvstore:
type: sqlite
namespace: null
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ollama}/meta_reference_eval.db
datasetio:
- provider_id: huggingface
@ -64,14 +61,12 @@ providers:
config:
kvstore:
type: sqlite
namespace: null
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ollama}/huggingface_datasetio.db
- provider_id: localfs
provider_type: inline::localfs
config:
kvstore:
type: sqlite
namespace: null
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ollama}/localfs_datasetio.db
scoring:
- provider_id: basic

View file

@ -62,7 +62,6 @@ providers:
config:
persistence_store:
type: sqlite
namespace: null
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/open-benchmark}/agents_store.db
responses_store:
type: sqlite
@ -80,7 +79,6 @@ providers:
config:
kvstore:
type: sqlite
namespace: null
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/open-benchmark}/meta_reference_eval.db
datasetio:
- provider_id: huggingface
@ -88,14 +86,12 @@ providers:
config:
kvstore:
type: sqlite
namespace: null
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/open-benchmark}/huggingface_datasetio.db
- provider_id: localfs
provider_type: inline::localfs
config:
kvstore:
type: sqlite
namespace: null
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/open-benchmark}/localfs_datasetio.db
scoring:
- provider_id: basic

View file

@ -26,7 +26,6 @@ providers:
config:
kvstore:
type: sqlite
namespace: null
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/passthrough}/faiss_store.db
safety:
- provider_id: llama-guard
@ -44,7 +43,6 @@ providers:
config:
persistence_store:
type: sqlite
namespace: null
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/passthrough}/agents_store.db
responses_store:
type: sqlite
@ -62,7 +60,6 @@ providers:
config:
kvstore:
type: sqlite
namespace: null
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/passthrough}/meta_reference_eval.db
datasetio:
- provider_id: huggingface
@ -70,14 +67,12 @@ providers:
config:
kvstore:
type: sqlite
namespace: null
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/passthrough}/huggingface_datasetio.db
- provider_id: localfs
provider_type: inline::localfs
config:
kvstore:
type: sqlite
namespace: null
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/passthrough}/localfs_datasetio.db
scoring:
- provider_id: basic

View file

@ -26,7 +26,6 @@ providers:
config:
kvstore:
type: sqlite
namespace: null
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/passthrough}/faiss_store.db
safety:
- provider_id: llama-guard
@ -39,7 +38,6 @@ providers:
config:
persistence_store:
type: sqlite
namespace: null
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/passthrough}/agents_store.db
responses_store:
type: sqlite
@ -57,7 +55,6 @@ providers:
config:
kvstore:
type: sqlite
namespace: null
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/passthrough}/meta_reference_eval.db
datasetio:
- provider_id: huggingface
@ -65,14 +62,12 @@ providers:
config:
kvstore:
type: sqlite
namespace: null
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/passthrough}/huggingface_datasetio.db
- provider_id: localfs
provider_type: inline::localfs
config:
kvstore:
type: sqlite
namespace: null
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/passthrough}/localfs_datasetio.db
scoring:
- provider_id: basic

View file

@ -114,7 +114,7 @@ def get_distribution_template() -> DistributionTemplate:
provider_id="meta-reference",
provider_type="inline::meta-reference",
config=dict(
service_name="${env.OTEL_SERVICE_NAME:=}",
service_name="${env.OTEL_SERVICE_NAME:=\u200b}",
sinks="${env.TELEMETRY_SINKS:=console,otel_trace}",
otel_trace_endpoint="${env.OTEL_TRACE_ENDPOINT:=http://localhost:4318/v1/traces}",
),

View file

@ -51,7 +51,7 @@ providers:
- provider_id: meta-reference
provider_type: inline::meta-reference
config:
service_name: ${env.OTEL_SERVICE_NAME:=}
service_name: "${env.OTEL_SERVICE_NAME:=\u200B}"
sinks: ${env.TELEMETRY_SINKS:=console,otel_trace}
otel_trace_endpoint: ${env.OTEL_TRACE_ENDPOINT:=http://localhost:4318/v1/traces}
tool_runtime:

View file

@ -35,7 +35,6 @@ providers:
config:
kvstore:
type: sqlite
namespace: null
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/remote-vllm}/faiss_store.db
safety:
- provider_id: llama-guard
@ -48,7 +47,6 @@ providers:
config:
persistence_store:
type: sqlite
namespace: null
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/remote-vllm}/agents_store.db
responses_store:
type: sqlite
@ -59,7 +57,6 @@ providers:
config:
kvstore:
type: sqlite
namespace: null
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/remote-vllm}/meta_reference_eval.db
datasetio:
- provider_id: huggingface
@ -67,14 +64,12 @@ providers:
config:
kvstore:
type: sqlite
namespace: null
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/remote-vllm}/huggingface_datasetio.db
- provider_id: localfs
provider_type: inline::localfs
config:
kvstore:
type: sqlite
namespace: null
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/remote-vllm}/localfs_datasetio.db
scoring:
- provider_id: basic

View file

@ -28,7 +28,6 @@ providers:
config:
kvstore:
type: sqlite
namespace: null
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/remote-vllm}/faiss_store.db
safety:
- provider_id: llama-guard
@ -41,7 +40,6 @@ providers:
config:
persistence_store:
type: sqlite
namespace: null
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/remote-vllm}/agents_store.db
responses_store:
type: sqlite
@ -52,7 +50,6 @@ providers:
config:
kvstore:
type: sqlite
namespace: null
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/remote-vllm}/meta_reference_eval.db
datasetio:
- provider_id: huggingface
@ -60,14 +57,12 @@ providers:
config:
kvstore:
type: sqlite
namespace: null
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/remote-vllm}/huggingface_datasetio.db
- provider_id: localfs
provider_type: inline::localfs
config:
kvstore:
type: sqlite
namespace: null
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/remote-vllm}/localfs_datasetio.db
scoring:
- provider_id: basic

View file

@ -23,7 +23,6 @@ providers:
config:
kvstore:
type: sqlite
namespace: null
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/sambanova}/faiss_store.db
- provider_id: ${env.ENABLE_CHROMADB:+chromadb}
provider_type: remote::chromadb
@ -49,7 +48,6 @@ providers:
config:
persistence_store:
type: sqlite
namespace: null
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/sambanova}/agents_store.db
responses_store:
type: sqlite

View file

@ -66,7 +66,6 @@ providers:
config:
kvstore:
type: sqlite
namespace: null
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/faiss_store.db
- provider_id: ${env.ENABLE_SQLITE_VEC:+sqlite-vec}
provider_type: inline::sqlite-vec
@ -78,7 +77,6 @@ providers:
db_path: ${env.MILVUS_DB_PATH:=~/.llama/distributions/starter}/milvus.db
kvstore:
type: sqlite
namespace: null
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/milvus_registry.db
- provider_id: ${env.ENABLE_CHROMADB:+chromadb}
provider_type: remote::chromadb
@ -111,7 +109,6 @@ providers:
config:
persistence_store:
type: sqlite
namespace: null
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/agents_store.db
responses_store:
type: sqlite
@ -129,7 +126,6 @@ providers:
config:
kvstore:
type: sqlite
namespace: null
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/meta_reference_eval.db
datasetio:
- provider_id: huggingface
@ -137,14 +133,12 @@ providers:
config:
kvstore:
type: sqlite
namespace: null
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/huggingface_datasetio.db
- provider_id: localfs
provider_type: inline::localfs
config:
kvstore:
type: sqlite
namespace: null
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/localfs_datasetio.db
scoring:
- provider_id: basic

View file

@ -234,7 +234,6 @@ def get_distribution_template() -> DistributionTemplate:
default_models = get_model_registry(available_models)
postgres_store = PostgresSqlStoreConfig.sample_run_config()
return DistributionTemplate(
name=name,
distro_type="self_hosted",
@ -243,7 +242,7 @@ def get_distribution_template() -> DistributionTemplate:
template_path=None,
providers=providers,
available_models_by_provider=available_models,
additional_pip_packages=postgres_store.pip_packages,
additional_pip_packages=PostgresSqlStoreConfig.pip_packages(),
run_configs={
"run.yaml": RunConfigSettings(
provider_overrides={

View file

@ -15,6 +15,7 @@ from pydantic import BaseModel, Field
from llama_stack.apis.datasets import DatasetPurpose
from llama_stack.apis.models import ModelType
from llama_stack.distribution.datatypes import (
LLAMA_STACK_RUN_CONFIG_VERSION,
Api,
BenchmarkInput,
BuildConfig,
@ -23,14 +24,15 @@ from llama_stack.distribution.datatypes import (
ModelInput,
Provider,
ShieldInput,
StackRunConfig,
ToolGroupInput,
)
from llama_stack.distribution.distribution import get_provider_registry
from llama_stack.distribution.utils.dynamic import instantiate_class_type
from llama_stack.providers.utils.inference.model_registry import ProviderModelEntry
from llama_stack.providers.utils.kvstore.config import KVStoreConfig, SqliteKVStoreConfig
from llama_stack.providers.utils.sqlstore.sqlstore import SqliteSqlStoreConfig, SqlStoreConfig
from llama_stack.providers.utils.kvstore.config import SqliteKVStoreConfig
from llama_stack.providers.utils.kvstore.config import get_pip_packages as get_kv_pip_packages
from llama_stack.providers.utils.sqlstore.sqlstore import SqliteSqlStoreConfig
from llama_stack.providers.utils.sqlstore.sqlstore import get_pip_packages as get_sql_pip_packages
def get_model_registry(
@ -87,21 +89,24 @@ class RunConfigSettings(BaseModel):
default_tool_groups: list[ToolGroupInput] | None = None
default_datasets: list[DatasetInput] | None = None
default_benchmarks: list[BenchmarkInput] | None = None
metadata_store: KVStoreConfig | None = None
inference_store: SqlStoreConfig | None = None
metadata_store: dict | None = None
inference_store: dict | None = None
def run_config(
self,
name: str,
providers: dict[str, list[str]],
container_image: str | None = None,
) -> StackRunConfig:
) -> dict:
provider_registry = get_provider_registry()
provider_configs = {}
for api_str, provider_types in providers.items():
if api_providers := self.provider_overrides.get(api_str):
provider_configs[api_str] = api_providers
# Convert Provider objects to dicts for YAML serialization
provider_configs[api_str] = [
p.model_dump(exclude_none=True) if isinstance(p, Provider) else p for p in api_providers
]
continue
provider_configs[api_str] = []
@ -128,33 +133,40 @@ class RunConfigSettings(BaseModel):
provider_id=provider_id,
provider_type=provider_type,
config=config,
)
).model_dump(exclude_none=True)
)
# Get unique set of APIs from providers
apis = sorted(providers.keys())
return StackRunConfig(
image_name=name,
container_image=container_image,
apis=apis,
providers=provider_configs,
metadata_store=self.metadata_store
# Return a dict that matches StackRunConfig structure
return {
"version": LLAMA_STACK_RUN_CONFIG_VERSION,
"image_name": name,
"container_image": container_image,
"apis": apis,
"providers": provider_configs,
"metadata_store": self.metadata_store
or SqliteKVStoreConfig.sample_run_config(
__distro_dir__=f"~/.llama/distributions/{name}",
db_name="registry.db",
),
inference_store=self.inference_store
"inference_store": self.inference_store
or SqliteSqlStoreConfig.sample_run_config(
__distro_dir__=f"~/.llama/distributions/{name}",
db_name="inference_store.db",
),
models=self.default_models or [],
shields=self.default_shields or [],
tool_groups=self.default_tool_groups or [],
datasets=self.default_datasets or [],
benchmarks=self.default_benchmarks or [],
)
"models": [m.model_dump(exclude_none=True) for m in (self.default_models or [])],
"shields": [s.model_dump(exclude_none=True) for s in (self.default_shields or [])],
"vector_dbs": [],
"datasets": [d.model_dump(exclude_none=True) for d in (self.default_datasets or [])],
"scoring_fns": [],
"benchmarks": [b.model_dump(exclude_none=True) for b in (self.default_benchmarks or [])],
"tool_groups": [t.model_dump(exclude_none=True) for t in (self.default_tool_groups or [])],
"server": {
"port": 8321,
},
}
class DistributionTemplate(BaseModel):
@ -190,10 +202,12 @@ class DistributionTemplate(BaseModel):
# TODO: This is a hack to get the dependencies for internal APIs into build
# We should have a better way to do this by formalizing the concept of "internal" APIs
# and providers, with a way to specify dependencies for them.
if run_config_.inference_store:
additional_pip_packages.extend(run_config_.inference_store.pip_packages)
if run_config_.metadata_store:
additional_pip_packages.extend(run_config_.metadata_store.pip_packages)
if run_config_.get("inference_store"):
additional_pip_packages.extend(get_sql_pip_packages(run_config_["inference_store"]))
if run_config_.get("metadata_store"):
additional_pip_packages.extend(get_kv_pip_packages(run_config_["metadata_store"]))
if self.additional_pip_packages:
additional_pip_packages.extend(self.additional_pip_packages)
@ -286,7 +300,7 @@ class DistributionTemplate(BaseModel):
run_config = settings.run_config(self.name, self.providers, self.container_image)
with open(yaml_output_dir / yaml_pth, "w") as f:
yaml.safe_dump(
run_config.model_dump(exclude_none=True),
{k: v for k, v in run_config.items() if v is not None},
f,
sort_keys=False,
)

View file

@ -26,7 +26,6 @@ providers:
config:
kvstore:
type: sqlite
namespace: null
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/tgi}/faiss_store.db
safety:
- provider_id: llama-guard
@ -39,7 +38,6 @@ providers:
config:
persistence_store:
type: sqlite
namespace: null
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/tgi}/agents_store.db
responses_store:
type: sqlite
@ -57,7 +55,6 @@ providers:
config:
kvstore:
type: sqlite
namespace: null
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/tgi}/meta_reference_eval.db
datasetio:
- provider_id: huggingface
@ -65,14 +62,12 @@ providers:
config:
kvstore:
type: sqlite
namespace: null
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/tgi}/huggingface_datasetio.db
- provider_id: localfs
provider_type: inline::localfs
config:
kvstore:
type: sqlite
namespace: null
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/tgi}/localfs_datasetio.db
scoring:
- provider_id: basic

View file

@ -25,7 +25,6 @@ providers:
config:
kvstore:
type: sqlite
namespace: null
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/tgi}/faiss_store.db
safety:
- provider_id: llama-guard
@ -38,7 +37,6 @@ providers:
config:
persistence_store:
type: sqlite
namespace: null
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/tgi}/agents_store.db
responses_store:
type: sqlite
@ -56,7 +54,6 @@ providers:
config:
kvstore:
type: sqlite
namespace: null
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/tgi}/meta_reference_eval.db
datasetio:
- provider_id: huggingface
@ -64,14 +61,12 @@ providers:
config:
kvstore:
type: sqlite
namespace: null
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/tgi}/huggingface_datasetio.db
- provider_id: localfs
provider_type: inline::localfs
config:
kvstore:
type: sqlite
namespace: null
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/tgi}/localfs_datasetio.db
scoring:
- provider_id: basic

View file

@ -26,7 +26,6 @@ providers:
config:
kvstore:
type: sqlite
namespace: null
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/together}/faiss_store.db
safety:
- provider_id: llama-guard
@ -44,7 +43,6 @@ providers:
config:
persistence_store:
type: sqlite
namespace: null
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/together}/agents_store.db
responses_store:
type: sqlite
@ -62,7 +60,6 @@ providers:
config:
kvstore:
type: sqlite
namespace: null
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/together}/meta_reference_eval.db
datasetio:
- provider_id: huggingface
@ -70,14 +67,12 @@ providers:
config:
kvstore:
type: sqlite
namespace: null
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/together}/huggingface_datasetio.db
- provider_id: localfs
provider_type: inline::localfs
config:
kvstore:
type: sqlite
namespace: null
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/together}/localfs_datasetio.db
scoring:
- provider_id: basic

View file

@ -26,7 +26,6 @@ providers:
config:
kvstore:
type: sqlite
namespace: null
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/together}/faiss_store.db
safety:
- provider_id: llama-guard
@ -39,7 +38,6 @@ providers:
config:
persistence_store:
type: sqlite
namespace: null
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/together}/agents_store.db
responses_store:
type: sqlite
@ -57,7 +55,6 @@ providers:
config:
kvstore:
type: sqlite
namespace: null
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/together}/meta_reference_eval.db
datasetio:
- provider_id: huggingface
@ -65,14 +62,12 @@ providers:
config:
kvstore:
type: sqlite
namespace: null
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/together}/huggingface_datasetio.db
- provider_id: localfs
provider_type: inline::localfs
config:
kvstore:
type: sqlite
namespace: null
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/together}/localfs_datasetio.db
scoring:
- provider_id: basic

View file

@ -30,7 +30,6 @@ providers:
config:
kvstore:
type: sqlite
namespace: null
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/vllm-gpu}/faiss_store.db
safety:
- provider_id: llama-guard
@ -43,7 +42,6 @@ providers:
config:
persistence_store:
type: sqlite
namespace: null
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/vllm-gpu}/agents_store.db
responses_store:
type: sqlite
@ -61,7 +59,6 @@ providers:
config:
kvstore:
type: sqlite
namespace: null
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/vllm-gpu}/meta_reference_eval.db
datasetio:
- provider_id: huggingface
@ -69,14 +66,12 @@ providers:
config:
kvstore:
type: sqlite
namespace: null
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/vllm-gpu}/huggingface_datasetio.db
- provider_id: localfs
provider_type: inline::localfs
config:
kvstore:
type: sqlite
namespace: null
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/vllm-gpu}/localfs_datasetio.db
scoring:
- provider_id: basic

View file

@ -27,7 +27,6 @@ providers:
config:
kvstore:
type: sqlite
namespace: null
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/watsonx}/faiss_store.db
safety:
- provider_id: llama-guard
@ -40,7 +39,6 @@ providers:
config:
persistence_store:
type: sqlite
namespace: null
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/watsonx}/agents_store.db
responses_store:
type: sqlite
@ -58,7 +56,6 @@ providers:
config:
kvstore:
type: sqlite
namespace: null
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/watsonx}/meta_reference_eval.db
datasetio:
- provider_id: huggingface
@ -66,14 +63,12 @@ providers:
config:
kvstore:
type: sqlite
namespace: null
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/watsonx}/huggingface_datasetio.db
- provider_id: localfs
provider_type: inline::localfs
config:
kvstore:
type: sqlite
namespace: null
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/watsonx}/localfs_datasetio.db
scoring:
- provider_id: basic

View file

@ -9,7 +9,9 @@ pytest --help
```
Here are the most important options:
- `--stack-config`: specify the stack config to use. You have three ways to point to a stack:
- `--stack-config`: specify the stack config to use. You have four ways to point to a stack:
- **`server:<config>`** - automatically start a server with the given config (e.g., `server:fireworks`). This provides one-step testing by auto-starting the server if the port is available, or reusing an existing server if already running.
- **`server:<config>:<port>`** - same as above but with a custom port (e.g., `server:together:8322`)
- a URL which points to a Llama Stack distribution server
- a template (e.g., `fireworks`, `together`) or a path to a `run.yaml` file
- a comma-separated list of api=provider pairs, e.g. `inference=fireworks,safety=llama-guard,agents=meta-reference`. This is most useful for testing a single API surface.
@ -26,12 +28,39 @@ Model parameters can be influenced by the following options:
Each of these are comma-separated lists and can be used to generate multiple parameter combinations. Note that tests will be skipped
if no model is specified.
Experimental, under development, options:
- `--record-responses`: record new API responses instead of using cached ones
## Examples
### Testing against a Server
Run all text inference tests by auto-starting a server with the `fireworks` config:
```bash
pytest -s -v tests/integration/inference/test_text_inference.py \
--stack-config=server:fireworks \
--text-model=meta-llama/Llama-3.1-8B-Instruct
```
Run tests with auto-server startup on a custom port:
```bash
pytest -s -v tests/integration/inference/ \
--stack-config=server:together:8322 \
--text-model=meta-llama/Llama-3.1-8B-Instruct
```
Run multiple test suites with auto-server (eliminates manual server management):
```bash
# Auto-start server and run all integration tests
export FIREWORKS_API_KEY=<your_key>
pytest -s -v tests/integration/inference/ tests/integration/safety/ tests/integration/agents/ \
--stack-config=server:fireworks \
--text-model=meta-llama/Llama-3.1-8B-Instruct
```
### Testing with Library Client
Run all text inference tests with the `together` distribution:
```bash

View file

@ -6,9 +6,13 @@
import inspect
import os
import socket
import subprocess
import tempfile
import time
import pytest
import requests
import yaml
from llama_stack_client import LlamaStackClient
from openai import OpenAI
@ -17,6 +21,60 @@ from llama_stack import LlamaStackAsLibraryClient
from llama_stack.distribution.stack import run_config_from_adhoc_config_spec
from llama_stack.env import get_env_or_fail
DEFAULT_PORT = 8321
def is_port_available(port: int, host: str = "localhost") -> bool:
"""Check if a port is available for binding."""
try:
with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as sock:
sock.bind((host, port))
return True
except OSError:
return False
def start_llama_stack_server(config_name: str) -> subprocess.Popen:
"""Start a llama stack server with the given config."""
cmd = ["llama", "stack", "run", config_name]
devnull = open(os.devnull, "w")
process = subprocess.Popen(
cmd,
stdout=devnull, # redirect stdout to devnull to prevent deadlock
stderr=devnull, # redirect stderr to devnull to prevent deadlock
text=True,
env={**os.environ, "LLAMA_STACK_LOG_FILE": "server.log"},
)
return process
def wait_for_server_ready(base_url: str, timeout: int = 30, process: subprocess.Popen | None = None) -> bool:
"""Wait for the server to be ready by polling the health endpoint."""
health_url = f"{base_url}/v1/health"
start_time = time.time()
while time.time() - start_time < timeout:
if process and process.poll() is not None:
print(f"Server process terminated with return code: {process.returncode}")
return False
try:
response = requests.get(health_url, timeout=5)
if response.status_code == 200:
return True
except (requests.exceptions.ConnectionError, requests.exceptions.Timeout):
pass
# Print progress every 5 seconds
elapsed = time.time() - start_time
if int(elapsed) % 5 == 0 and elapsed > 0:
print(f"Waiting for server at {base_url}... ({elapsed:.1f}s elapsed)")
time.sleep(0.5)
print(f"Server failed to respond within {timeout} seconds")
return False
@pytest.fixture(scope="session")
def provider_data():
@ -122,6 +180,41 @@ def llama_stack_client(request, provider_data):
if not config:
raise ValueError("You must specify either --stack-config or LLAMA_STACK_CONFIG")
# Handle server:<config_name> format or server:<config_name>:<port>
if config.startswith("server:"):
parts = config.split(":")
config_name = parts[1]
port = int(parts[2]) if len(parts) > 2 else int(os.environ.get("LLAMA_STACK_PORT", DEFAULT_PORT))
base_url = f"http://localhost:{port}"
# Check if port is available
if is_port_available(port):
print(f"Starting llama stack server with config '{config_name}' on port {port}...")
# Start server
server_process = start_llama_stack_server(config_name)
# Wait for server to be ready
if not wait_for_server_ready(base_url, timeout=30, process=server_process):
print("Server failed to start within timeout")
server_process.terminate()
raise RuntimeError(
f"Server failed to start within timeout. Check that config '{config_name}' exists and is valid. "
f"See server.log for details."
)
print(f"Server is ready at {base_url}")
# Store process for potential cleanup (pytest will handle termination at session end)
request.session._llama_stack_server_process = server_process
else:
print(f"Port {port} is already in use, assuming server is already running...")
return LlamaStackClient(
base_url=base_url,
provider_data=provider_data,
)
# check if this looks like a URL
if config.startswith("http") or "//" in config:
return LlamaStackClient(
@ -151,3 +244,31 @@ def llama_stack_client(request, provider_data):
def openai_client(client_with_models):
base_url = f"{client_with_models.base_url}/v1/openai/v1"
return OpenAI(base_url=base_url, api_key="fake")
@pytest.fixture(scope="session", autouse=True)
def cleanup_server_process(request):
"""Cleanup server process at the end of the test session."""
yield # Run tests
if hasattr(request.session, "_llama_stack_server_process"):
server_process = request.session._llama_stack_server_process
if server_process:
if server_process.poll() is None:
print("Terminating llama stack server process...")
else:
print(f"Server process already terminated with return code: {server_process.returncode}")
return
try:
server_process.terminate()
server_process.wait(timeout=10)
print("Server process terminated gracefully")
except subprocess.TimeoutExpired:
print("Server process did not terminate gracefully, killing it")
server_process.kill()
server_process.wait()
print("Server process killed")
except Exception as e:
print(f"Error during server cleanup: {e}")
else:
print("Server process not found - won't be able to cleanup")

View file

@ -4,14 +4,14 @@
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.
import pytest
import pytest_asyncio
from llama_stack.distribution.store.registry import CachedDiskDistributionRegistry, DiskDistributionRegistry
from llama_stack.providers.utils.kvstore.config import SqliteKVStoreConfig
from llama_stack.providers.utils.kvstore.sqlite import SqliteKVStoreImpl
@pytest.fixture(scope="function")
@pytest_asyncio.fixture(scope="function")
async def sqlite_kvstore(tmp_path):
db_path = tmp_path / "test_kv.db"
kvstore_config = SqliteKVStoreConfig(db_path=db_path.as_posix())
@ -20,14 +20,14 @@ async def sqlite_kvstore(tmp_path):
yield kvstore
@pytest.fixture(scope="function")
@pytest_asyncio.fixture(scope="function")
async def disk_dist_registry(sqlite_kvstore):
registry = DiskDistributionRegistry(sqlite_kvstore)
await registry.initialize()
yield registry
@pytest.fixture(scope="function")
@pytest_asyncio.fixture(scope="function")
async def cached_disk_dist_registry(sqlite_kvstore):
registry = CachedDiskDistributionRegistry(sqlite_kvstore)
await registry.initialize()

View file

@ -9,6 +9,7 @@ from datetime import datetime
from unittest.mock import patch
import pytest
import pytest_asyncio
from llama_stack.apis.agents import Turn
from llama_stack.apis.inference import CompletionMessage, StopReason
@ -16,7 +17,7 @@ from llama_stack.distribution.datatypes import User
from llama_stack.providers.inline.agents.meta_reference.persistence import AgentPersistence, AgentSessionInfo
@pytest.fixture
@pytest_asyncio.fixture
async def test_setup(sqlite_kvstore):
agent_persistence = AgentPersistence(agent_id="test_agent", kvstore=sqlite_kvstore, policy={})
yield agent_persistence

View file

@ -148,7 +148,7 @@ async def test_chunk_id_conflict(sqlite_vec_index, sample_chunks, embedding_dime
assert len(chunk_ids) == len(set(chunk_ids)), "Duplicate chunk IDs detected across batches!"
@pytest.fixture(scope="session")
@pytest_asyncio.fixture(scope="session")
async def sqlite_vec_adapter(sqlite_connection):
config = type("Config", (object,), {"db_path": ":memory:"}) # Mock config with in-memory database
adapter = SQLiteVecVectorIOAdapter(config=config, inference_api=None)

View file

@ -7,6 +7,7 @@
from unittest.mock import MagicMock, Mock, patch
import pytest
import pytest_asyncio
import yaml
from pydantic import TypeAdapter, ValidationError
@ -26,7 +27,7 @@ def _return_model(model):
return model
@pytest.fixture
@pytest_asyncio.fixture
async def test_setup(cached_disk_dist_registry):
mock_inference = Mock()
mock_inference.__provider_spec__ = MagicMock()
@ -245,7 +246,7 @@ async def test_automatic_access_attributes(mock_get_authenticated_user, test_set
assert model.identifier == "auto-access-model"
@pytest.fixture
@pytest_asyncio.fixture
async def test_setup_with_access_policy(cached_disk_dist_registry):
mock_inference = Mock()
mock_inference.__provider_spec__ = MagicMock()

View file

@ -0,0 +1,187 @@
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.
from unittest.mock import Mock
from fastapi import HTTPException
from openai import BadRequestError
from pydantic import ValidationError
from llama_stack.distribution.access_control.access_control import AccessDeniedError
from llama_stack.distribution.datatypes import AuthenticationRequiredError
from llama_stack.distribution.server.server import translate_exception
class TestTranslateException:
"""Test cases for the translate_exception function."""
def test_translate_access_denied_error(self):
"""Test that AccessDeniedError is translated to 403 HTTP status."""
exc = AccessDeniedError()
result = translate_exception(exc)
assert isinstance(result, HTTPException)
assert result.status_code == 403
assert result.detail == "Permission denied: Insufficient permissions"
def test_translate_access_denied_error_with_context(self):
"""Test that AccessDeniedError with context includes detailed information."""
from llama_stack.distribution.datatypes import User
# Create mock user and resource
user = User("test-user", {"roles": ["user"], "teams": ["dev"]})
# Create a simple mock object that implements the ProtectedResource protocol
class MockResource:
def __init__(self, type: str, identifier: str, owner=None):
self.type = type
self.identifier = identifier
self.owner = owner
resource = MockResource("vector_db", "test-db")
exc = AccessDeniedError("create", resource, user)
result = translate_exception(exc)
assert isinstance(result, HTTPException)
assert result.status_code == 403
assert "test-user" in result.detail
assert "vector_db::test-db" in result.detail
assert "create" in result.detail
assert "roles=['user']" in result.detail
assert "teams=['dev']" in result.detail
def test_translate_permission_error(self):
"""Test that PermissionError is translated to 403 HTTP status."""
exc = PermissionError("Permission denied")
result = translate_exception(exc)
assert isinstance(result, HTTPException)
assert result.status_code == 403
assert result.detail == "Permission denied: Permission denied"
def test_translate_value_error(self):
"""Test that ValueError is translated to 400 HTTP status."""
exc = ValueError("Invalid input")
result = translate_exception(exc)
assert isinstance(result, HTTPException)
assert result.status_code == 400
assert result.detail == "Invalid value: Invalid input"
def test_translate_bad_request_error(self):
"""Test that BadRequestError is translated to 400 HTTP status."""
# Create a mock response for BadRequestError
mock_response = Mock()
mock_response.status_code = 400
mock_response.headers = {}
exc = BadRequestError("Bad request", response=mock_response, body="Bad request")
result = translate_exception(exc)
assert isinstance(result, HTTPException)
assert result.status_code == 400
assert result.detail == "Bad request"
def test_translate_authentication_required_error(self):
"""Test that AuthenticationRequiredError is translated to 401 HTTP status."""
exc = AuthenticationRequiredError("Authentication required")
result = translate_exception(exc)
assert isinstance(result, HTTPException)
assert result.status_code == 401
assert result.detail == "Authentication required: Authentication required"
def test_translate_timeout_error(self):
"""Test that TimeoutError is translated to 504 HTTP status."""
exc = TimeoutError("Operation timed out")
result = translate_exception(exc)
assert isinstance(result, HTTPException)
assert result.status_code == 504
assert result.detail == "Operation timed out: Operation timed out"
def test_translate_asyncio_timeout_error(self):
"""Test that asyncio.TimeoutError is translated to 504 HTTP status."""
exc = TimeoutError()
result = translate_exception(exc)
assert isinstance(result, HTTPException)
assert result.status_code == 504
assert result.detail == "Operation timed out: "
def test_translate_not_implemented_error(self):
"""Test that NotImplementedError is translated to 501 HTTP status."""
exc = NotImplementedError("Not implemented")
result = translate_exception(exc)
assert isinstance(result, HTTPException)
assert result.status_code == 501
assert result.detail == "Not implemented: Not implemented"
def test_translate_validation_error(self):
"""Test that ValidationError is translated to 400 HTTP status with proper format."""
# Create a mock validation error using proper Pydantic error format
exc = ValidationError.from_exception_data(
"TestModel",
[
{
"loc": ("field", "nested"),
"msg": "field required",
"type": "missing",
}
],
)
result = translate_exception(exc)
assert isinstance(result, HTTPException)
assert result.status_code == 400
assert "errors" in result.detail
assert len(result.detail["errors"]) == 1
assert result.detail["errors"][0]["loc"] == ["field", "nested"]
assert result.detail["errors"][0]["msg"] == "Field required"
assert result.detail["errors"][0]["type"] == "missing"
def test_translate_generic_exception(self):
"""Test that generic exceptions are translated to 500 HTTP status."""
exc = Exception("Unexpected error")
result = translate_exception(exc)
assert isinstance(result, HTTPException)
assert result.status_code == 500
assert result.detail == "Internal server error: An unexpected error occurred."
def test_translate_runtime_error(self):
"""Test that RuntimeError is translated to 500 HTTP status."""
exc = RuntimeError("Runtime error")
result = translate_exception(exc)
assert isinstance(result, HTTPException)
assert result.status_code == 500
assert result.detail == "Internal server error: An unexpected error occurred."
def test_multiple_access_denied_scenarios(self):
"""Test various scenarios that should result in 403 status codes."""
# Test AccessDeniedError (uses enhanced message)
exc1 = AccessDeniedError()
result1 = translate_exception(exc1)
assert isinstance(result1, HTTPException)
assert result1.status_code == 403
assert result1.detail == "Permission denied: Insufficient permissions"
# Test PermissionError (uses generic message)
exc2 = PermissionError("No permission")
result2 = translate_exception(exc2)
assert isinstance(result2, HTTPException)
assert result2.status_code == 403
assert result2.detail == "Permission denied: No permission"
exc3 = PermissionError("Access denied")
result3 = translate_exception(exc3)
assert isinstance(result3, HTTPException)
assert result3.status_code == 403
assert result3.detail == "Permission denied: Access denied"