Merge branch 'main' into add-llama-guard-4-model

This commit is contained in:
raghotham 2025-07-03 10:52:01 -07:00 committed by GitHub
commit bae3c766bc
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
72 changed files with 990 additions and 337 deletions

View file

@ -25,7 +25,7 @@ jobs:
# Listing tests manually since some of them currently fail # Listing tests manually since some of them currently fail
# TODO: generate matrix list from tests/integration when fixed # TODO: generate matrix list from tests/integration when fixed
test-type: [agents, inference, datasets, inspect, scoring, post_training, providers, tool_runtime, vector_io] test-type: [agents, inference, datasets, inspect, scoring, post_training, providers, tool_runtime, vector_io]
client-type: [library, http] client-type: [library, server]
python-version: ["3.12", "3.13"] python-version: ["3.12", "3.13"]
fail-fast: false # we want to run all tests regardless of failure fail-fast: false # we want to run all tests regardless of failure
@ -45,39 +45,6 @@ jobs:
run: | run: |
uv run llama stack build --template ollama --image-type venv uv run llama stack build --template ollama --image-type venv
- name: Start Llama Stack server in background
if: matrix.client-type == 'http'
env:
INFERENCE_MODEL: "meta-llama/Llama-3.2-3B-Instruct"
run: |
LLAMA_STACK_LOG_FILE=server.log nohup uv run llama stack run ./llama_stack/templates/ollama/run.yaml --image-type venv --env OLLAMA_URL="http://0.0.0.0:11434" &
- name: Wait for Llama Stack server to be ready
if: matrix.client-type == 'http'
run: |
echo "Waiting for Llama Stack server..."
for i in {1..30}; do
if curl -s http://localhost:8321/v1/health | grep -q "OK"; then
echo "Llama Stack server is up!"
exit 0
fi
sleep 1
done
echo "Llama Stack server failed to start"
cat server.log
exit 1
- name: Verify Ollama status is OK
if: matrix.client-type == 'http'
run: |
echo "Verifying Ollama status..."
ollama_status=$(curl -s -L http://127.0.0.1:8321/v1/providers/ollama|jq --raw-output .health.status)
echo "Ollama status: $ollama_status"
if [ "$ollama_status" != "OK" ]; then
echo "Ollama health check failed"
exit 1
fi
- name: Check Storage and Memory Available Before Tests - name: Check Storage and Memory Available Before Tests
if: ${{ always() }} if: ${{ always() }}
run: | run: |
@ -92,12 +59,14 @@ jobs:
if [ "${{ matrix.client-type }}" == "library" ]; then if [ "${{ matrix.client-type }}" == "library" ]; then
stack_config="ollama" stack_config="ollama"
else else
stack_config="http://localhost:8321" stack_config="server:ollama"
fi fi
uv run pytest -s -v tests/integration/${{ matrix.test-type }} --stack-config=${stack_config} \ uv run pytest -s -v tests/integration/${{ matrix.test-type }} --stack-config=${stack_config} \
-k "not(builtin_tool or safety_with_image or code_interpreter or test_rag)" \ -k "not(builtin_tool or safety_with_image or code_interpreter or test_rag)" \
--text-model="meta-llama/Llama-3.2-3B-Instruct" \ --text-model="meta-llama/Llama-3.2-3B-Instruct" \
--embedding-model=all-MiniLM-L6-v2 --embedding-model=all-MiniLM-L6-v2 \
--color=yes \
--capture=tee-sys | tee pytest-${{ matrix.test-type }}.log
- name: Check Storage and Memory Available After Tests - name: Check Storage and Memory Available After Tests
if: ${{ always() }} if: ${{ always() }}

View file

@ -35,6 +35,8 @@ pip install llama-stack-client
### CLI ### CLI
```bash ```bash
# Run a chat completion # Run a chat completion
MODEL="Llama-4-Scout-17B-16E-Instruct"
llama-stack-client --endpoint http://localhost:8321 \ llama-stack-client --endpoint http://localhost:8321 \
inference chat-completion \ inference chat-completion \
--model-id meta-llama/$MODEL \ --model-id meta-llama/$MODEL \
@ -106,46 +108,59 @@ By reducing friction and complexity, Llama Stack empowers developers to focus on
### API Providers ### API Providers
Here is a list of the various API providers and available distributions that can help developers get started easily with Llama Stack. Here is a list of the various API providers and available distributions that can help developers get started easily with Llama Stack.
Please checkout for [full list](https://llama-stack.readthedocs.io/en/latest/providers/index.html)
| **API Provider Builder** | **Environments** | **Agents** | **Inference** | **Memory** | **Safety** | **Telemetry** | **Post Training** | | API Provider Builder | Environments | Agents | Inference | VectorIO | Safety | Telemetry | Post Training | Eval | DatasetIO |
|:------------------------:|:----------------------:|:----------:|:-------------:|:----------:|:----------:|:-------------:|:-----------------:| |:-------------------:|:------------:|:------:|:---------:|:--------:|:------:|:---------:|:-------------:|:----:|:--------:|
| Meta Reference | Single Node | ✅ | ✅ | ✅ | ✅ | ✅ | | | Meta Reference | Single Node | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ |
| SambaNova | Hosted | | ✅ | | ✅ | | | | SambaNova | Hosted | | ✅ | | ✅ | | | | |
| Cerebras | Hosted | | ✅ | | | | | | Cerebras | Hosted | | ✅ | | | | | | |
| Fireworks | Hosted | ✅ | ✅ | ✅ | | | | | Fireworks | Hosted | ✅ | ✅ | ✅ | | | | | |
| AWS Bedrock | Hosted | | ✅ | | ✅ | | | | AWS Bedrock | Hosted | | ✅ | | ✅ | | | | |
| Together | Hosted | ✅ | ✅ | | ✅ | | | | Together | Hosted | ✅ | ✅ | | ✅ | | | | |
| Groq | Hosted | | ✅ | | | | | | Groq | Hosted | | ✅ | | | | | | |
| Ollama | Single Node | | ✅ | | | | | | Ollama | Single Node | | ✅ | | | | | | |
| TGI | Hosted and Single Node | | ✅ | | | | | | TGI | Hosted/Single Node | | ✅ | | | | | | |
| NVIDIA NIM | Hosted and Single Node | | ✅ | | | | | | NVIDIA NIM | Hosted/Single Node | | ✅ | | ✅ | | | | |
| Chroma | Single Node | | | ✅ | | | | | ChromaDB | Hosted/Single Node | | | ✅ | | | | | |
| PG Vector | Single Node | | | ✅ | | | | | PG Vector | Single Node | | | ✅ | | | | | |
| PyTorch ExecuTorch | On-device iOS | ✅ | ✅ | | | | | | PyTorch ExecuTorch | On-device iOS | ✅ | ✅ | | | | | | |
| vLLM | Hosted and Single Node | | ✅ | | | | | | vLLM | Single Node | | ✅ | | | | | | |
| OpenAI | Hosted | | ✅ | | | | | | OpenAI | Hosted | | ✅ | | | | | | |
| Anthropic | Hosted | | ✅ | | | | | | Anthropic | Hosted | | ✅ | | | | | | |
| Gemini | Hosted | | ✅ | | | | | | Gemini | Hosted | | ✅ | | | | | | |
| watsonx | Hosted | | ✅ | | | | | | WatsonX | Hosted | | ✅ | | | | | | |
| HuggingFace | Single Node | | | | | | ✅ | | HuggingFace | Single Node | | | | | | ✅ | | ✅ |
| TorchTune | Single Node | | | | | | ✅ | | TorchTune | Single Node | | | | | | ✅ | | |
| NVIDIA NEMO | Hosted | | | | | | ✅ | | NVIDIA NEMO | Hosted | | ✅ | ✅ | | | ✅ | ✅ | ✅ |
| NVIDIA | Hosted | | | | | | ✅ | ✅ | ✅ |
> **Note**: Additional providers are available through external packages. See [External Providers](https://llama-stack.readthedocs.io/en/latest/providers/external.html) documentation.
### Distributions ### Distributions
A Llama Stack Distribution (or "distro") is a pre-configured bundle of provider implementations for each API component. Distributions make it easy to get started with a specific deployment scenario - you can begin with a local development setup (eg. ollama) and seamlessly transition to production (eg. Fireworks) without changing your application code. Here are some of the distributions we support: A Llama Stack Distribution (or "distro") is a pre-configured bundle of provider implementations for each API component. Distributions make it easy to get started with a specific deployment scenario - you can begin with a local development setup (eg. ollama) and seamlessly transition to production (eg. Fireworks) without changing your application code.
Here are some of the distributions we support:
| **Distribution** | **Llama Stack Docker** | Start This Distribution | | **Distribution** | **Llama Stack Docker** | Start This Distribution |
|:---------------------------------------------:|:-------------------------------------------------------------------------------------------------------------------------------------------------------------:|:------------------------------------------------------------------------------------------------------------------------:| |:---------------------------------------------:|:-------------------------------------------------------------------------------------------------------------------------------------------------------------:|:------------------------------------------------------------------------------------------------------------------------:|
| Meta Reference | [llamastack/distribution-meta-reference-gpu](https://hub.docker.com/repository/docker/llamastack/distribution-meta-reference-gpu/general) | [Guide](https://llama-stack.readthedocs.io/en/latest/distributions/self_hosted_distro/meta-reference-gpu.html) | | Meta Reference | [llamastack/distribution-meta-reference-gpu](https://hub.docker.com/repository/docker/llamastack/distribution-meta-reference-gpu/general) | [Guide](https://llama-stack.readthedocs.io/en/latest/distributions/self_hosted_distro/meta-reference-gpu.html) |
| SambaNova | [llamastack/distribution-sambanova](https://hub.docker.com/repository/docker/llamastack/distribution-sambanova/general) | [Guide](https://llama-stack.readthedocs.io/en/latest/distributions/self_hosted_distro/sambanova.html) | | TGI | [llamastack/distribution-tgi](https://hub.docker.com/repository/docker/llamastack/distribution-tgi/general) | [Guide](https://llama-stack.readthedocs.io/en/latest/distributions/self_hosted_distro/tgi.html)
| Cerebras | [llamastack/distribution-cerebras](https://hub.docker.com/repository/docker/llamastack/distribution-cerebras/general) | [Guide](https://llama-stack.readthedocs.io/en/latest/distributions/self_hosted_distro/cerebras.html) | | vLLM | [llamastack/distribution-remote-vllm](https://hub.docker.com/repository/docker/llamastack/distribution-remote-vllm/general) | [Guide](https://llama-stack.readthedocs.io/en/latest/distributions/self_hosted_distro/remote-vllm.html)
| Starter | [llamastack/distribution-starter](https://hub.docker.com/repository/docker/llamastack/distribution-starter/general) | |
| PostgreSQL | [llamastack/distribution-postgres-demo](https://hub.docker.com/repository/docker/llamastack/distribution-postgres-demo/general) | |
Here are the ones out of support scope but still avaiable from Dockerhub:
| **Distribution** | **Llama Stack Docker** | Start This Distribution |
|:---------------------------------------------:|:-------------------------------------------------------------------------------------------------------------------------------------------------------------:|:------------------------------------------------------------------------------------------------------------------------:|
| Ollama | [llamastack/distribution-ollama](https://hub.docker.com/repository/docker/llamastack/distribution-ollama/general) | [Guide](https://llama-stack.readthedocs.io/en/latest/distributions/self_hosted_distro/ollama.html) | | Ollama | [llamastack/distribution-ollama](https://hub.docker.com/repository/docker/llamastack/distribution-ollama/general) | [Guide](https://llama-stack.readthedocs.io/en/latest/distributions/self_hosted_distro/ollama.html) |
| TGI | [llamastack/distribution-tgi](https://hub.docker.com/repository/docker/llamastack/distribution-tgi/general) | [Guide](https://llama-stack.readthedocs.io/en/latest/distributions/self_hosted_distro/tgi.html) |
| Together | [llamastack/distribution-together](https://hub.docker.com/repository/docker/llamastack/distribution-together/general) | [Guide](https://llama-stack.readthedocs.io/en/latest/distributions/self_hosted_distro/together.html) | | Together | [llamastack/distribution-together](https://hub.docker.com/repository/docker/llamastack/distribution-together/general) | [Guide](https://llama-stack.readthedocs.io/en/latest/distributions/self_hosted_distro/together.html) |
| Fireworks | [llamastack/distribution-fireworks](https://hub.docker.com/repository/docker/llamastack/distribution-fireworks/general) | [Guide](https://llama-stack.readthedocs.io/en/latest/distributions/self_hosted_distro/fireworks.html) | | Fireworks | [llamastack/distribution-fireworks](https://hub.docker.com/repository/docker/llamastack/distribution-fireworks/general) | [Guide](https://llama-stack.readthedocs.io/en/latest/distributions/self_hosted_distro/fireworks.html) |
| vLLM | [llamastack/distribution-remote-vllm](https://hub.docker.com/repository/docker/llamastack/distribution-remote-vllm/general) | [Guide](https://llama-stack.readthedocs.io/en/latest/distributions/self_hosted_distro/remote-vllm.html) | | AWS Bedrock | [llamastack/distribution-bedrock](https://hub.docker.com/repository/docker/llamastack/distribution-bedrock/general) | [Guide](https://llama-stack.readthedocs.io/en/latest/distributions/self_hosted_distro/bedrock.html) |
| SambaNova | [llamastack/distribution-sambanova](https://hub.docker.com/repository/docker/llamastack/distribution-sambanova/general) | [Guide](https://llama-stack.readthedocs.io/en/latest/distributions/self_hosted_distro/sambanova.html) |
| Cerebras | [llamastack/distribution-cerebras](https://hub.docker.com/repository/docker/llamastack/distribution-cerebras/general) | [Guide](https://llama-stack.readthedocs.io/en/latest/distributions/self_hosted_distro/cerebras.html) | | | |
### Documentation ### Documentation

View file

@ -17,7 +17,9 @@
"\n", "\n",
"Read more about the project here: https://llama-stack.readthedocs.io/en/latest/index.html\n", "Read more about the project here: https://llama-stack.readthedocs.io/en/latest/index.html\n",
"\n", "\n",
"In this guide, we will showcase how you can build LLM-powered agentic applications using Llama Stack.\n" "In this guide, we will showcase how you can build LLM-powered agentic applications using Llama Stack.\n",
"\n",
"**💡 Quick Start Option:** If you want a simpler and faster way to test out Llama Stack, check out the [quick_start.ipynb](quick_start.ipynb) notebook instead. It provides a streamlined experience for getting up and running in just a few steps.\n"
] ]
}, },
{ {

View file

@ -17,7 +17,9 @@
"\n", "\n",
"Read more about the project here: https://llama-stack.readthedocs.io/en/latest/index.html\n", "Read more about the project here: https://llama-stack.readthedocs.io/en/latest/index.html\n",
"\n", "\n",
"In this guide, we will showcase how you can get started with using Llama 4 in Llama Stack.\n" "In this guide, we will showcase how you can get started with using Llama 4 in Llama Stack.\n",
"\n",
"**💡 Quick Start Option:** If you want a simpler and faster way to test out Llama Stack, check out the [quick_start.ipynb](quick_start.ipynb) notebook instead. It provides a streamlined experience for getting up and running in just a few steps.\n"
] ]
}, },
{ {

View file

@ -17,7 +17,9 @@
"\n", "\n",
"Read more about the project here: https://llama-stack.readthedocs.io/en/latest/index.html\n", "Read more about the project here: https://llama-stack.readthedocs.io/en/latest/index.html\n",
"\n", "\n",
"In this guide, we will showcase how you can get started with using Llama 4 in Llama Stack.\n" "In this guide, we will showcase how you can get started with using Llama 4 in Llama Stack.\n",
"\n",
"**💡 Quick Start Option:** If you want a simpler and faster way to test out Llama Stack, check out the [quick_start.ipynb](quick_start.ipynb) notebook instead. It provides a streamlined experience for getting up and running in just a few steps.\n"
] ]
}, },
{ {

367
docs/quick_start.ipynb Normal file
View file

@ -0,0 +1,367 @@
{
"cells": [
{
"cell_type": "markdown",
"id": "c1e7571c",
"metadata": {
"id": "c1e7571c"
},
"source": [
"[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/meta-llama/llama-stack/blob/main/docs/getting_started.ipynb)\n",
"\n",
"# Llama Stack - Building AI Applications\n",
"\n",
"<img src=\"https://llama-stack.readthedocs.io/en/latest/_images/llama-stack.png\" alt=\"drawing\" width=\"500\"/>\n",
"\n",
"Get started with Llama Stack in minutes!\n",
"\n",
"[Llama Stack](https://github.com/meta-llama/llama-stack) is a stateful service with REST APIs to support the seamless transition of AI applications across different environments. You can build and test using a local server first and deploy to a hosted endpoint for production.\n",
"\n",
"In this guide, we'll walk through how to build a RAG application locally using Llama Stack with [Ollama](https://ollama.com/)\n",
"as the inference [provider](docs/source/providers/index.md#inference) for a Llama Model.\n"
]
},
{
"cell_type": "markdown",
"id": "4CV1Q19BDMVw",
"metadata": {
"id": "4CV1Q19BDMVw"
},
"source": [
"## Step 1: Install and setup"
]
},
{
"cell_type": "markdown",
"id": "K4AvfUAJZOeS",
"metadata": {
"id": "K4AvfUAJZOeS"
},
"source": [
"### 1.1. Install uv and test inference with Ollama\n",
"\n",
"We'll install [uv](https://docs.astral.sh/uv/) to setup the Python virtual environment, along with [colab-xterm](https://github.com/InfuseAI/colab-xterm) for running command-line tools, and [Ollama](https://ollama.com/download) as the inference provider."
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "7a2d7b85",
"metadata": {},
"outputs": [],
"source": [
"%pip install uv llama_stack llama-stack-client\n",
"\n",
"## If running on Collab:\n",
"# !pip install colab-xterm\n",
"# %load_ext colabxterm\n",
"\n",
"!curl https://ollama.ai/install.sh | sh"
]
},
{
"cell_type": "markdown",
"id": "39fa584b",
"metadata": {},
"source": [
"### 1.2. Test inference with Ollama"
]
},
{
"cell_type": "markdown",
"id": "3bf81522",
"metadata": {},
"source": [
"Well now launch a terminal and run inference on a Llama model with Ollama to verify that the model is working correctly."
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "a7e8e0f1",
"metadata": {},
"outputs": [],
"source": [
"## If running on Colab:\n",
"# %xterm\n",
"\n",
"## To be ran in the terminal:\n",
"# ollama serve &\n",
"# ollama run llama3.2:3b --keepalive 60m"
]
},
{
"cell_type": "markdown",
"id": "f3c5f243",
"metadata": {},
"source": [
"If successful, you should see the model respond to a prompt.\n",
"\n",
"...\n",
"```\n",
">>> hi\n",
"Hello! How can I assist you today?\n",
"```"
]
},
{
"cell_type": "markdown",
"id": "oDUB7M_qe-Gs",
"metadata": {
"id": "oDUB7M_qe-Gs"
},
"source": [
"## Step 2: Run the Llama Stack server\n",
"\n",
"In this showcase, we will start a Llama Stack server that is running locally."
]
},
{
"cell_type": "markdown",
"id": "732eadc6",
"metadata": {},
"source": [
"### 2.1. Setup the Llama Stack Server"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "J2kGed0R5PSf",
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"collapsed": true,
"id": "J2kGed0R5PSf",
"outputId": "2478ea60-8d35-48a1-b011-f233831740c5"
},
"outputs": [],
"source": [
"import os \n",
"import subprocess\n",
"\n",
"if \"UV_SYSTEM_PYTHON\" in os.environ:\n",
" del os.environ[\"UV_SYSTEM_PYTHON\"]\n",
"\n",
"# this command installs all the dependencies needed for the llama stack server with the ollama inference provider\n",
"!uv run --with llama-stack llama stack build --template ollama --image-type venv --image-name myvenv\n",
"\n",
"def run_llama_stack_server_background():\n",
" log_file = open(\"llama_stack_server.log\", \"w\")\n",
" process = subprocess.Popen(\n",
" f\"uv run --with llama-stack llama stack run ollama --image-type venv --image-name myvenv --env INFERENCE_MODEL=llama3.2:3b\",\n",
" shell=True,\n",
" stdout=log_file,\n",
" stderr=log_file,\n",
" text=True\n",
" )\n",
" \n",
" print(f\"Starting Llama Stack server with PID: {process.pid}\")\n",
" return process\n",
"\n",
"def wait_for_server_to_start():\n",
" import requests\n",
" from requests.exceptions import ConnectionError\n",
" import time\n",
" \n",
" url = \"http://0.0.0.0:8321/v1/health\"\n",
" max_retries = 30\n",
" retry_interval = 1\n",
" \n",
" print(\"Waiting for server to start\", end=\"\")\n",
" for _ in range(max_retries):\n",
" try:\n",
" response = requests.get(url)\n",
" if response.status_code == 200:\n",
" print(\"\\nServer is ready!\")\n",
" return True\n",
" except ConnectionError:\n",
" print(\".\", end=\"\", flush=True)\n",
" time.sleep(retry_interval)\n",
" \n",
" print(\"\\nServer failed to start after\", max_retries * retry_interval, \"seconds\")\n",
" return False\n",
"\n",
"\n",
"# use this helper if needed to kill the server \n",
"def kill_llama_stack_server():\n",
" # Kill any existing llama stack server processes\n",
" os.system(\"ps aux | grep -v grep | grep llama_stack.distribution.server.server | awk '{print $2}' | xargs kill -9\")\n"
]
},
{
"cell_type": "markdown",
"id": "c40e9efd",
"metadata": {},
"source": [
"### 2.2. Start the Llama Stack Server"
]
},
{
"cell_type": "code",
"execution_count": 7,
"id": "f779283d",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Starting Llama Stack server with PID: 787100\n",
"Waiting for server to start\n",
"Server is ready!\n"
]
}
],
"source": [
"server_process = run_llama_stack_server_background()\n",
"assert wait_for_server_to_start()"
]
},
{
"cell_type": "markdown",
"id": "28477c03",
"metadata": {},
"source": [
"## Step 3: Run the demo"
]
},
{
"cell_type": "code",
"execution_count": 8,
"id": "7da71011",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"rag_tool> Ingesting document: https://www.paulgraham.com/greatwork.html\n",
"prompt> How do you do great work?\n",
"\u001b[33minference> \u001b[0m\u001b[33m[k\u001b[0m\u001b[33mnowledge\u001b[0m\u001b[33m_search\u001b[0m\u001b[33m(query\u001b[0m\u001b[33m=\"\u001b[0m\u001b[33mWhat\u001b[0m\u001b[33m is\u001b[0m\u001b[33m the\u001b[0m\u001b[33m key\u001b[0m\u001b[33m to\u001b[0m\u001b[33m doing\u001b[0m\u001b[33m great\u001b[0m\u001b[33m work\u001b[0m\u001b[33m\")]\u001b[0m\u001b[97m\u001b[0m\n",
"\u001b[32mtool_execution> Tool:knowledge_search Args:{'query': 'What is the key to doing great work'}\u001b[0m\n",
"\u001b[32mtool_execution> Tool:knowledge_search Response:[TextContentItem(text='knowledge_search tool found 5 chunks:\\nBEGIN of knowledge_search tool results.\\n', type='text'), TextContentItem(text=\"Result 1:\\nDocument_id:docum\\nContent: work. Doing great work means doing something important\\nso well that you expand people's ideas of what's possible. But\\nthere's no threshold for importance. It's a matter of degree, and\\noften hard to judge at the time anyway.\\n\", type='text'), TextContentItem(text=\"Result 2:\\nDocument_id:docum\\nContent: work. Doing great work means doing something important\\nso well that you expand people's ideas of what's possible. But\\nthere's no threshold for importance. It's a matter of degree, and\\noften hard to judge at the time anyway.\\n\", type='text'), TextContentItem(text=\"Result 3:\\nDocument_id:docum\\nContent: work. Doing great work means doing something important\\nso well that you expand people's ideas of what's possible. But\\nthere's no threshold for importance. It's a matter of degree, and\\noften hard to judge at the time anyway.\\n\", type='text'), TextContentItem(text=\"Result 4:\\nDocument_id:docum\\nContent: work. Doing great work means doing something important\\nso well that you expand people's ideas of what's possible. But\\nthere's no threshold for importance. It's a matter of degree, and\\noften hard to judge at the time anyway.\\n\", type='text'), TextContentItem(text=\"Result 5:\\nDocument_id:docum\\nContent: work. Doing great work means doing something important\\nso well that you expand people's ideas of what's possible. But\\nthere's no threshold for importance. It's a matter of degree, and\\noften hard to judge at the time anyway.\\n\", type='text'), TextContentItem(text='END of knowledge_search tool results.\\n', type='text'), TextContentItem(text='The above results were retrieved to help answer the user\\'s query: \"What is the key to doing great work\". Use them as supporting information only in answering this query.\\n', type='text')]\u001b[0m\n",
"\u001b[33minference> \u001b[0m\u001b[33mDoing\u001b[0m\u001b[33m great\u001b[0m\u001b[33m work\u001b[0m\u001b[33m means\u001b[0m\u001b[33m doing\u001b[0m\u001b[33m something\u001b[0m\u001b[33m important\u001b[0m\u001b[33m so\u001b[0m\u001b[33m well\u001b[0m\u001b[33m that\u001b[0m\u001b[33m you\u001b[0m\u001b[33m expand\u001b[0m\u001b[33m people\u001b[0m\u001b[33m's\u001b[0m\u001b[33m ideas\u001b[0m\u001b[33m of\u001b[0m\u001b[33m what\u001b[0m\u001b[33m's\u001b[0m\u001b[33m possible\u001b[0m\u001b[33m.\u001b[0m\u001b[33m However\u001b[0m\u001b[33m,\u001b[0m\u001b[33m there\u001b[0m\u001b[33m's\u001b[0m\u001b[33m no\u001b[0m\u001b[33m threshold\u001b[0m\u001b[33m for\u001b[0m\u001b[33m importance\u001b[0m\u001b[33m,\u001b[0m\u001b[33m and\u001b[0m\u001b[33m it\u001b[0m\u001b[33m's\u001b[0m\u001b[33m often\u001b[0m\u001b[33m hard\u001b[0m\u001b[33m to\u001b[0m\u001b[33m judge\u001b[0m\u001b[33m at\u001b[0m\u001b[33m the\u001b[0m\u001b[33m time\u001b[0m\u001b[33m anyway\u001b[0m\u001b[33m.\u001b[0m\u001b[33m Great\u001b[0m\u001b[33m work\u001b[0m\u001b[33m is\u001b[0m\u001b[33m a\u001b[0m\u001b[33m matter\u001b[0m\u001b[33m of\u001b[0m\u001b[33m degree\u001b[0m\u001b[33m,\u001b[0m\u001b[33m and\u001b[0m\u001b[33m it\u001b[0m\u001b[33m can\u001b[0m\u001b[33m be\u001b[0m\u001b[33m difficult\u001b[0m\u001b[33m to\u001b[0m\u001b[33m determine\u001b[0m\u001b[33m whether\u001b[0m\u001b[33m someone\u001b[0m\u001b[33m has\u001b[0m\u001b[33m done\u001b[0m\u001b[33m great\u001b[0m\u001b[33m work\u001b[0m\u001b[33m until\u001b[0m\u001b[33m after\u001b[0m\u001b[33m the\u001b[0m\u001b[33m fact\u001b[0m\u001b[33m.\u001b[0m\u001b[97m\u001b[0m\n",
"\u001b[30m\u001b[0m"
]
}
],
"source": [
"from llama_stack_client import Agent, AgentEventLogger, RAGDocument, LlamaStackClient\n",
"\n",
"vector_db_id = \"my_demo_vector_db\"\n",
"client = LlamaStackClient(base_url=\"http://0.0.0.0:8321\")\n",
"\n",
"models = client.models.list()\n",
"\n",
"# Select the first LLM and first embedding models\n",
"model_id = next(m for m in models if m.model_type == \"llm\").identifier\n",
"embedding_model_id = (\n",
" em := next(m for m in models if m.model_type == \"embedding\")\n",
").identifier\n",
"embedding_dimension = em.metadata[\"embedding_dimension\"]\n",
"\n",
"_ = client.vector_dbs.register(\n",
" vector_db_id=vector_db_id,\n",
" embedding_model=embedding_model_id,\n",
" embedding_dimension=embedding_dimension,\n",
" provider_id=\"faiss\",\n",
")\n",
"source = \"https://www.paulgraham.com/greatwork.html\"\n",
"print(\"rag_tool> Ingesting document:\", source)\n",
"document = RAGDocument(\n",
" document_id=\"document_1\",\n",
" content=source,\n",
" mime_type=\"text/html\",\n",
" metadata={},\n",
")\n",
"client.tool_runtime.rag_tool.insert(\n",
" documents=[document],\n",
" vector_db_id=vector_db_id,\n",
" chunk_size_in_tokens=50,\n",
")\n",
"agent = Agent(\n",
" client,\n",
" model=model_id,\n",
" instructions=\"You are a helpful assistant\",\n",
" tools=[\n",
" {\n",
" \"name\": \"builtin::rag/knowledge_search\",\n",
" \"args\": {\"vector_db_ids\": [vector_db_id]},\n",
" }\n",
" ],\n",
")\n",
"\n",
"prompt = \"How do you do great work?\"\n",
"print(\"prompt>\", prompt)\n",
"\n",
"response = agent.create_turn(\n",
" messages=[{\"role\": \"user\", \"content\": prompt}],\n",
" session_id=agent.create_session(\"rag_session\"),\n",
" stream=True,\n",
")\n",
"\n",
"for log in AgentEventLogger().log(response):\n",
" log.print()"
]
},
{
"cell_type": "markdown",
"id": "341aaadf",
"metadata": {},
"source": [
"Congratulations! You've successfully built your first RAG application using Llama Stack! 🎉🥳"
]
},
{
"cell_type": "markdown",
"id": "e88e1185",
"metadata": {},
"source": [
"## Next Steps"
]
},
{
"cell_type": "markdown",
"id": "bcb73600",
"metadata": {},
"source": [
"Now you're ready to dive deeper into Llama Stack!\n",
"- Explore the [Detailed Tutorial](./detailed_tutorial.md).\n",
"- Try the [Getting Started Notebook](https://github.com/meta-llama/llama-stack/blob/main/docs/getting_started.ipynb).\n",
"- Browse more [Notebooks on GitHub](https://github.com/meta-llama/llama-stack/tree/main/docs/notebooks).\n",
"- Learn about Llama Stack [Concepts](../concepts/index.md).\n",
"- Discover how to [Build Llama Stacks](../distributions/index.md).\n",
"- Refer to our [References](../references/index.md) for details on the Llama CLI and Python SDK.\n",
"- Check out the [llama-stack-apps](https://github.com/meta-llama/llama-stack-apps/tree/main/examples) repository for example applications and tutorials."
]
}
],
"metadata": {
"accelerator": "GPU",
"colab": {
"gpuType": "T4",
"provenance": []
},
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.6"
}
},
"nbformat": 4,
"nbformat_minor": 5
}

View file

@ -77,10 +77,10 @@ Next up is the most critical part: the set of providers that the stack will use
```yaml ```yaml
providers: providers:
inference: inference:
# provider_id is a string you can choose freely # provider_id is a string you can choose freely
- provider_id: ollama - provider_id: ollama
# provider_type is a string that specifies the type of provider. # provider_type is a string that specifies the type of provider.
# in this case, the provider for inference is ollama and it is run remotely (outside of the distribution) # in this case, the provider for inference is ollama and it runs remotely (outside of the distribution)
provider_type: remote::ollama provider_type: remote::ollama
# config is a dictionary that contains the configuration for the provider. # config is a dictionary that contains the configuration for the provider.
# in this case, the configuration is the url of the ollama server # in this case, the configuration is the url of the ollama server
@ -88,7 +88,7 @@ providers:
url: ${env.OLLAMA_URL:=http://localhost:11434} url: ${env.OLLAMA_URL:=http://localhost:11434}
``` ```
A few things to note: A few things to note:
- A _provider instance_ is identified with an (id, type, configuration) triplet. - A _provider instance_ is identified with an (id, type, config) triplet.
- The id is a string you can choose freely. - The id is a string you can choose freely.
- You can instantiate any number of provider instances of the same type. - You can instantiate any number of provider instances of the same type.
- The configuration dictionary is provider-specific. - The configuration dictionary is provider-specific.
@ -187,7 +187,7 @@ The environment variable substitution system is type-safe:
## Resources ## Resources
Finally, let's look at the `models` section: Let's look at the `models` section:
```yaml ```yaml
models: models:
@ -195,8 +195,9 @@ models:
model_id: ${env.INFERENCE_MODEL} model_id: ${env.INFERENCE_MODEL}
provider_id: ollama provider_id: ollama
provider_model_id: null provider_model_id: null
model_type: llm
``` ```
A Model is an instance of a "Resource" (see [Concepts](../concepts/index)) and is associated with a specific inference provider (in this case, the provider with identifier `ollama`). This is an instance of a "pre-registered" model. While we always encourage the clients to always register models before using them, some Stack servers may come up a list of "already known and available" models. A Model is an instance of a "Resource" (see [Concepts](../concepts/index)) and is associated with a specific inference provider (in this case, the provider with identifier `ollama`). This is an instance of a "pre-registered" model. While we always encourage the clients to register models before using them, some Stack servers may come up a list of "already known and available" models.
What's with the `provider_model_id` field? This is an identifier for the model inside the provider's model catalog. Contrast it with `model_id` which is the identifier for the same model for Llama Stack's purposes. For example, you may want to name "llama3.2:vision-11b" as "image_captioning_model" when you use it in your Stack interactions. When omitted, the server will set `provider_model_id` to be the same as `model_id`. What's with the `provider_model_id` field? This is an identifier for the model inside the provider's model catalog. Contrast it with `model_id` which is the identifier for the same model for Llama Stack's purposes. For example, you may want to name "llama3.2:vision-11b" as "image_captioning_model" when you use it in your Stack interactions. When omitted, the server will set `provider_model_id` to be the same as `model_id`.

View file

@ -8,6 +8,8 @@ environments. You can build and test using a local server first and deploy to a
In this guide, we'll walk through how to build a RAG application locally using Llama Stack with [Ollama](https://ollama.com/) In this guide, we'll walk through how to build a RAG application locally using Llama Stack with [Ollama](https://ollama.com/)
as the inference [provider](../providers/inference/index) for a Llama Model. as the inference [provider](../providers/inference/index) for a Llama Model.
**💡 Notebook Version:** You can also follow this quickstart guide in a Jupyter notebook format: [quick_start.ipynb](https://github.com/meta-llama/llama-stack/blob/main/docs/quick_start.ipynb)
#### Step 1: Install and setup #### Step 1: Install and setup
1. Install [uv](https://docs.astral.sh/uv/) 1. Install [uv](https://docs.astral.sh/uv/)
2. Run inference on a Llama model with [Ollama](https://ollama.com/download) 2. Run inference on a Llama model with [Ollama](https://ollama.com/download)

View file

@ -73,17 +73,26 @@ A number of "adapters" are available for some popular Inference and Vector Store
| OpenAI | Hosted | | OpenAI | Hosted |
| Anthropic | Hosted | | Anthropic | Hosted |
| Gemini | Hosted | | Gemini | Hosted |
| WatsonX | Hosted |
**Agents API**
| **Provider** | **Environments** |
| :----: | :----: |
| Meta Reference | Single Node |
| Fireworks | Hosted |
| Together | Hosted |
| PyTorch ExecuTorch | On-device iOS |
**Vector IO API** **Vector IO API**
| **Provider** | **Environments** | | **Provider** | **Environments** |
| :----: | :----: | | :----: | :----: |
| FAISS | Single Node | | FAISS | Single Node |
| SQLite-Vec| Single Node | | SQLite-Vec | Single Node |
| Chroma | Hosted and Single Node | | Chroma | Hosted and Single Node |
| Milvus | Hosted and Single Node | | Milvus | Hosted and Single Node |
| Postgres (PGVector) | Hosted and Single Node | | Postgres (PGVector) | Hosted and Single Node |
| Weaviate | Hosted | | Weaviate | Hosted |
| Qdrant | Hosted and Single Node |
**Safety API** **Safety API**
| **Provider** | **Environments** | | **Provider** | **Environments** |
@ -93,6 +102,30 @@ A number of "adapters" are available for some popular Inference and Vector Store
| Code Scanner | Single Node | | Code Scanner | Single Node |
| AWS Bedrock | Hosted | | AWS Bedrock | Hosted |
**Post Training API**
| **Provider** | **Environments** |
| :----: | :----: |
| Meta Reference | Single Node |
| HuggingFace | Single Node |
| TorchTune | Single Node |
| NVIDIA NEMO | Hosted |
**Eval API**
| **Provider** | **Environments** |
| :----: | :----: |
| Meta Reference | Single Node |
| NVIDIA NEMO | Hosted |
**Telemetry API**
| **Provider** | **Environments** |
| :----: | :----: |
| Meta Reference | Single Node |
**Tool Runtime API**
| **Provider** | **Environments** |
| :----: | :----: |
| Brave Search | Hosted |
| RAG Runtime | Single Node |
```{toctree} ```{toctree}
:hidden: :hidden:

View file

@ -16,7 +16,6 @@ Meta's reference implementation of an agent system that can use tools, access ve
```yaml ```yaml
persistence_store: persistence_store:
type: sqlite type: sqlite
namespace: null
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/dummy}/agents_store.db db_path: ${env.SQLITE_STORE_DIR:=~/.llama/dummy}/agents_store.db
responses_store: responses_store:
type: sqlite type: sqlite

View file

@ -15,7 +15,6 @@ Local filesystem-based dataset I/O provider for reading and writing datasets to
```yaml ```yaml
kvstore: kvstore:
type: sqlite type: sqlite
namespace: null
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/dummy}/localfs_datasetio.db db_path: ${env.SQLITE_STORE_DIR:=~/.llama/dummy}/localfs_datasetio.db
``` ```

View file

@ -15,7 +15,6 @@ HuggingFace datasets provider for accessing and managing datasets from the Huggi
```yaml ```yaml
kvstore: kvstore:
type: sqlite type: sqlite
namespace: null
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/dummy}/huggingface_datasetio.db db_path: ${env.SQLITE_STORE_DIR:=~/.llama/dummy}/huggingface_datasetio.db
``` ```

View file

@ -15,7 +15,6 @@ Meta's reference implementation of evaluation tasks with support for multiple la
```yaml ```yaml
kvstore: kvstore:
type: sqlite type: sqlite
namespace: null
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/dummy}/meta_reference_eval.db db_path: ${env.SQLITE_STORE_DIR:=~/.llama/dummy}/meta_reference_eval.db
``` ```

View file

@ -1,9 +1,10 @@
# Providers Overview # Providers Overview
The goal of Llama Stack is to build an ecosystem where users can easily swap out different implementations for the same API. Examples for these include: The goal of Llama Stack is to build an ecosystem where users can easily swap out different implementations for the same API. Examples for these include:
- LLM inference providers (e.g., Ollama, Fireworks, Together, AWS Bedrock, Groq, Cerebras, SambaNova, vLLM, etc.), - LLM inference providers (e.g., Meta Reference, Ollama, Fireworks, Together, AWS Bedrock, Groq, Cerebras, SambaNova, vLLM, OpenAI, Anthropic, Gemini, WatsonX, etc.),
- Vector databases (e.g., ChromaDB, Weaviate, Qdrant, Milvus, FAISS, PGVector, SQLite-Vec, etc.), - Vector databases (e.g., FAISS, SQLite-Vec, ChromaDB, Weaviate, Qdrant, Milvus, PGVector, etc.),
- Safety providers (e.g., Meta's Llama Guard, AWS Bedrock Guardrails, etc.) - Safety providers (e.g., Meta's Llama Guard, Prompt Guard, Code Scanner, AWS Bedrock Guardrails, etc.),
- Tool Runtime providers (e.g., RAG Runtime, Brave Search, etc.)
Providers come in two flavors: Providers come in two flavors:
- **Remote**: the provider runs as a separate service external to the Llama Stack codebase. Llama Stack contains a small amount of adapter code. - **Remote**: the provider runs as a separate service external to the Llama Stack codebase. Llama Stack contains a small amount of adapter code.

View file

@ -44,7 +44,6 @@ more details about Faiss in general.
```yaml ```yaml
kvstore: kvstore:
type: sqlite type: sqlite
namespace: null
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/dummy}/faiss_store.db db_path: ${env.SQLITE_STORE_DIR:=~/.llama/dummy}/faiss_store.db
``` ```

View file

@ -15,7 +15,6 @@ Meta's reference implementation of a vector database.
```yaml ```yaml
kvstore: kvstore:
type: sqlite type: sqlite
namespace: null
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/dummy}/faiss_store.db db_path: ${env.SQLITE_STORE_DIR:=~/.llama/dummy}/faiss_store.db
``` ```

View file

@ -19,7 +19,6 @@ Please refer to the remote provider documentation.
db_path: ${env.MILVUS_DB_PATH:=~/.llama/dummy}/milvus.db db_path: ${env.MILVUS_DB_PATH:=~/.llama/dummy}/milvus.db
kvstore: kvstore:
type: sqlite type: sqlite
namespace: null
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/dummy}/milvus_registry.db db_path: ${env.SQLITE_STORE_DIR:=~/.llama/dummy}/milvus_registry.db
``` ```

View file

@ -45,7 +45,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 1, "execution_count": 14,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -65,7 +65,7 @@
"from dotenv import load_dotenv\n", "from dotenv import load_dotenv\n",
"\n", "\n",
"load_dotenv()\n", "load_dotenv()\n",
"BRAVE_SEARCH_API_KEY = os.environ[\"BRAVE_SEARCH_API_KEY\"]\n" "TAVILY_SEARCH_API_KEY = os.environ[\"TAVILY_SEARCH_API_KEY\"]\n"
] ]
}, },
{ {
@ -110,10 +110,17 @@
"from llama_stack_client import LlamaStackClient\n", "from llama_stack_client import LlamaStackClient\n",
"from llama_stack_client.lib.agents.agent import Agent\n", "from llama_stack_client.lib.agents.agent import Agent\n",
"from llama_stack_client.lib.agents.event_logger import EventLogger\n", "from llama_stack_client.lib.agents.event_logger import EventLogger\n",
"from llama_stack_client.types import UserMessage\n",
"from typing import cast, Iterator\n",
"\n", "\n",
"\n", "\n",
"async def agent_example():\n", "async def agent_example():\n",
" client = LlamaStackClient(base_url=f\"http://{HOST}:{PORT}\")\n", " client = LlamaStackClient(\n",
" base_url=f\"http://{HOST}:{PORT}\",\n",
" provider_data={\n",
" \"tavily_search_api_key\": TAVILY_SEARCH_API_KEY,\n",
" }\n",
" )\n",
" agent = Agent(\n", " agent = Agent(\n",
" client,\n", " client,\n",
" model=MODEL_NAME,\n", " model=MODEL_NAME,\n",
@ -123,13 +130,7 @@
" \"type\": \"greedy\",\n", " \"type\": \"greedy\",\n",
" },\n", " },\n",
" },\n", " },\n",
" tools=[\n", " tools=[\"builtin::websearch\"],\n",
" {\n",
" \"type\": \"brave_search\",\n",
" \"engine\": \"brave\",\n",
" \"api_key\": BRAVE_SEARCH_API_KEY,\n",
" }\n",
" ],\n",
" )\n", " )\n",
" session_id = agent.create_session(\"test-session\")\n", " session_id = agent.create_session(\"test-session\")\n",
" print(f\"Created session_id={session_id} for Agent({agent.agent_id})\")\n", " print(f\"Created session_id={session_id} for Agent({agent.agent_id})\")\n",
@ -142,15 +143,13 @@
" for prompt in user_prompts:\n", " for prompt in user_prompts:\n",
" response = agent.create_turn(\n", " response = agent.create_turn(\n",
" messages=[\n", " messages=[\n",
" {\n", " UserMessage(role=\"user\", content=prompt)\n",
" \"role\": \"user\",\n",
" \"content\": prompt,\n",
" }\n",
" ],\n", " ],\n",
" session_id=session_id,\n", " session_id=session_id,\n",
" stream=True,\n",
" )\n", " )\n",
"\n", "\n",
" async for log in EventLogger().log(response):\n", " for log in EventLogger().log(cast(Iterator, response)):\n",
" log.print()\n", " log.print()\n",
"\n", "\n",
"\n", "\n",

View file

@ -101,7 +101,7 @@ class MetricInResponse(BaseModel):
# This is a short term solution to allow inference API to return metrics # This is a short term solution to allow inference API to return metrics
# The ideal way to do this is to have a way for all response types to include metrics # The ideal way to do this is to have a way for all response types to include metrics
# and all metric events logged to the telemetry API to be inlcuded with the response # and all metric events logged to the telemetry API to be included with the response
# To do this, we will need to augment all response types with a metrics field. # To do this, we will need to augment all response types with a metrics field.
# We have hit a blocker from stainless SDK that prevents us from doing this. # We have hit a blocker from stainless SDK that prevents us from doing this.
# The blocker is that if we were to augment the response types that have a data field # The blocker is that if we were to augment the response types that have a data field

View file

@ -106,4 +106,26 @@ def is_action_allowed(
class AccessDeniedError(RuntimeError): class AccessDeniedError(RuntimeError):
pass def __init__(self, action: str | None = None, resource: ProtectedResource | None = None, user: User | None = None):
self.action = action
self.resource = resource
self.user = user
message = _build_access_denied_message(action, resource, user)
super().__init__(message)
def _build_access_denied_message(action: str | None, resource: ProtectedResource | None, user: User | None) -> str:
"""Build detailed error message for access denied scenarios."""
if action and resource and user:
resource_info = f"{resource.type}::{resource.identifier}"
user_info = f"'{user.principal}'"
if user.attributes:
attrs = ", ".join([f"{k}={v}" for k, v in user.attributes.items()])
user_info += f" (attributes: {attrs})"
message = f"User {user_info} cannot perform action '{action}' on resource '{resource_info}'"
else:
message = "Insufficient permissions"
return message

View file

@ -17,6 +17,7 @@ from llama_stack.distribution.distribution import (
builtin_automatically_routed_apis, builtin_automatically_routed_apis,
get_provider_registry, get_provider_registry,
) )
from llama_stack.distribution.stack import replace_env_vars
from llama_stack.distribution.utils.config_dirs import EXTERNAL_PROVIDERS_DIR from llama_stack.distribution.utils.config_dirs import EXTERNAL_PROVIDERS_DIR
from llama_stack.distribution.utils.dynamic import instantiate_class_type from llama_stack.distribution.utils.dynamic import instantiate_class_type
from llama_stack.distribution.utils.prompt_for_config import prompt_for_config from llama_stack.distribution.utils.prompt_for_config import prompt_for_config
@ -163,7 +164,7 @@ def upgrade_from_routing_table(
def parse_and_maybe_upgrade_config(config_dict: dict[str, Any]) -> StackRunConfig: def parse_and_maybe_upgrade_config(config_dict: dict[str, Any]) -> StackRunConfig:
version = config_dict.get("version", None) version = config_dict.get("version", None)
if version == LLAMA_STACK_RUN_CONFIG_VERSION: if version == LLAMA_STACK_RUN_CONFIG_VERSION:
return StackRunConfig(**config_dict) return StackRunConfig(**replace_env_vars(config_dict))
if "routing_table" in config_dict: if "routing_table" in config_dict:
logger.info("Upgrading config...") logger.info("Upgrading config...")
@ -174,4 +175,4 @@ def parse_and_maybe_upgrade_config(config_dict: dict[str, Any]) -> StackRunConfi
if not config_dict.get("external_providers_dir", None): if not config_dict.get("external_providers_dir", None):
config_dict["external_providers_dir"] = EXTERNAL_PROVIDERS_DIR config_dict["external_providers_dir"] = EXTERNAL_PROVIDERS_DIR
return StackRunConfig(**config_dict) return StackRunConfig(**replace_env_vars(config_dict))

View file

@ -175,8 +175,9 @@ class CommonRoutingTableImpl(RoutingTable):
return obj return obj
async def unregister_object(self, obj: RoutableObjectWithProvider) -> None: async def unregister_object(self, obj: RoutableObjectWithProvider) -> None:
if not is_action_allowed(self.policy, "delete", obj, get_authenticated_user()): user = get_authenticated_user()
raise AccessDeniedError() if not is_action_allowed(self.policy, "delete", obj, user):
raise AccessDeniedError("delete", obj, user)
await self.dist_registry.delete(obj.type, obj.identifier) await self.dist_registry.delete(obj.type, obj.identifier)
await unregister_object_from_provider(obj, self.impls_by_provider_id[obj.provider_id]) await unregister_object_from_provider(obj, self.impls_by_provider_id[obj.provider_id])
@ -193,7 +194,7 @@ class CommonRoutingTableImpl(RoutingTable):
# If object supports access control but no attributes set, use creator's attributes # If object supports access control but no attributes set, use creator's attributes
creator = get_authenticated_user() creator = get_authenticated_user()
if not is_action_allowed(self.policy, "create", obj, creator): if not is_action_allowed(self.policy, "create", obj, creator):
raise AccessDeniedError() raise AccessDeniedError("create", obj, creator)
if creator: if creator:
obj.owner = creator obj.owner = creator
logger.info(f"Setting owner for {obj.type} '{obj.identifier}' to {obj.owner.principal}") logger.info(f"Setting owner for {obj.type} '{obj.identifier}' to {obj.owner.principal}")

View file

@ -9,6 +9,7 @@ import asyncio
import functools import functools
import inspect import inspect
import json import json
import logging
import os import os
import ssl import ssl
import sys import sys
@ -31,6 +32,7 @@ from openai import BadRequestError
from pydantic import BaseModel, ValidationError from pydantic import BaseModel, ValidationError
from llama_stack.apis.common.responses import PaginatedResponse from llama_stack.apis.common.responses import PaginatedResponse
from llama_stack.distribution.access_control.access_control import AccessDeniedError
from llama_stack.distribution.datatypes import AuthenticationRequiredError, LoggingConfig, StackRunConfig from llama_stack.distribution.datatypes import AuthenticationRequiredError, LoggingConfig, StackRunConfig
from llama_stack.distribution.distribution import builtin_automatically_routed_apis from llama_stack.distribution.distribution import builtin_automatically_routed_apis
from llama_stack.distribution.request_headers import PROVIDER_DATA_VAR, User, request_provider_data_context from llama_stack.distribution.request_headers import PROVIDER_DATA_VAR, User, request_provider_data_context
@ -116,7 +118,7 @@ def translate_exception(exc: Exception) -> HTTPException | RequestValidationErro
return HTTPException(status_code=400, detail=f"Invalid value: {str(exc)}") return HTTPException(status_code=400, detail=f"Invalid value: {str(exc)}")
elif isinstance(exc, BadRequestError): elif isinstance(exc, BadRequestError):
return HTTPException(status_code=400, detail=str(exc)) return HTTPException(status_code=400, detail=str(exc))
elif isinstance(exc, PermissionError): elif isinstance(exc, PermissionError | AccessDeniedError):
return HTTPException(status_code=403, detail=f"Permission denied: {str(exc)}") return HTTPException(status_code=403, detail=f"Permission denied: {str(exc)}")
elif isinstance(exc, asyncio.TimeoutError | TimeoutError): elif isinstance(exc, asyncio.TimeoutError | TimeoutError):
return HTTPException(status_code=504, detail=f"Operation timed out: {str(exc)}") return HTTPException(status_code=504, detail=f"Operation timed out: {str(exc)}")
@ -236,7 +238,10 @@ def create_dynamic_typed_route(func: Any, method: str, route: str) -> Callable:
result.url = route result.url = route
return result return result
except Exception as e: except Exception as e:
logger.exception(f"Error executing endpoint {route=} {method=}") if logger.isEnabledFor(logging.DEBUG):
logger.exception(f"Error executing endpoint {route=} {method=}")
else:
logger.error(f"Error executing endpoint {route=} {method=}: {str(e)}")
raise translate_exception(e) from e raise translate_exception(e) from e
sig = inspect.signature(func) sig = inspect.signature(func)

View file

@ -10,11 +10,11 @@ from typing import Protocol
import pydantic import pydantic
from llama_stack.distribution.datatypes import KVStoreConfig, RoutableObjectWithProvider from llama_stack.distribution.datatypes import RoutableObjectWithProvider
from llama_stack.distribution.utils.config_dirs import DISTRIBS_BASE_DIR from llama_stack.distribution.utils.config_dirs import DISTRIBS_BASE_DIR
from llama_stack.log import get_logger from llama_stack.log import get_logger
from llama_stack.providers.utils.kvstore import KVStore, kvstore_impl from llama_stack.providers.utils.kvstore import KVStore, kvstore_impl
from llama_stack.providers.utils.kvstore.config import SqliteKVStoreConfig from llama_stack.providers.utils.kvstore.config import KVStoreConfig, SqliteKVStoreConfig
logger = get_logger(__name__, category="core") logger = get_logger(__name__, category="core")

View file

@ -53,7 +53,7 @@ class AgentPersistence:
identifier=name, # should this be qualified in any way? identifier=name, # should this be qualified in any way?
) )
if not is_action_allowed(self.policy, "create", session_info, user): if not is_action_allowed(self.policy, "create", session_info, user):
raise AccessDeniedError() raise AccessDeniedError("create", session_info, user)
await self.kvstore.set( await self.kvstore.set(
key=f"session:{self.agent_id}:{session_id}", key=f"session:{self.agent_id}:{session_id}",

View file

@ -23,6 +23,7 @@ def available_providers() -> list[ProviderSpec]:
"pillow", "pillow",
"pandas", "pandas",
"scikit-learn", "scikit-learn",
"mcp",
] ]
+ kvstore_dependencies(), # TODO make this dynamic based on the kvstore config + kvstore_dependencies(), # TODO make this dynamic based on the kvstore config
module="llama_stack.providers.inline.agents.meta_reference", module="llama_stack.providers.inline.agents.meta_reference",

View file

@ -36,15 +36,14 @@ class RedisKVStoreConfig(CommonConfig):
def url(self) -> str: def url(self) -> str:
return f"redis://{self.host}:{self.port}" return f"redis://{self.host}:{self.port}"
@property @classmethod
def pip_packages(self) -> list[str]: def pip_packages(cls) -> list[str]:
return ["redis"] return ["redis"]
@classmethod @classmethod
def sample_run_config(cls): def sample_run_config(cls):
return { return {
"type": "redis", "type": "redis",
"namespace": None,
"host": "${env.REDIS_HOST:=localhost}", "host": "${env.REDIS_HOST:=localhost}",
"port": "${env.REDIS_PORT:=6379}", "port": "${env.REDIS_PORT:=6379}",
} }
@ -57,15 +56,14 @@ class SqliteKVStoreConfig(CommonConfig):
description="File path for the sqlite database", description="File path for the sqlite database",
) )
@property @classmethod
def pip_packages(self) -> list[str]: def pip_packages(cls) -> list[str]:
return ["aiosqlite"] return ["aiosqlite"]
@classmethod @classmethod
def sample_run_config(cls, __distro_dir__: str, db_name: str = "kvstore.db"): def sample_run_config(cls, __distro_dir__: str, db_name: str = "kvstore.db"):
return { return {
"type": "sqlite", "type": "sqlite",
"namespace": None,
"db_path": "${env.SQLITE_STORE_DIR:=" + __distro_dir__ + "}/" + db_name, "db_path": "${env.SQLITE_STORE_DIR:=" + __distro_dir__ + "}/" + db_name,
} }
@ -73,7 +71,7 @@ class SqliteKVStoreConfig(CommonConfig):
class PostgresKVStoreConfig(CommonConfig): class PostgresKVStoreConfig(CommonConfig):
type: Literal[KVStoreType.postgres.value] = KVStoreType.postgres.value type: Literal[KVStoreType.postgres.value] = KVStoreType.postgres.value
host: str = "localhost" host: str = "localhost"
port: str = "5432" port: int = 5432
db: str = "llamastack" db: str = "llamastack"
user: str user: str
password: str | None = None password: str | None = None
@ -83,7 +81,6 @@ class PostgresKVStoreConfig(CommonConfig):
def sample_run_config(cls, table_name: str = "llamastack_kvstore", **kwargs): def sample_run_config(cls, table_name: str = "llamastack_kvstore", **kwargs):
return { return {
"type": "postgres", "type": "postgres",
"namespace": None,
"host": "${env.POSTGRES_HOST:=localhost}", "host": "${env.POSTGRES_HOST:=localhost}",
"port": "${env.POSTGRES_PORT:=5432}", "port": "${env.POSTGRES_PORT:=5432}",
"db": "${env.POSTGRES_DB:=llamastack}", "db": "${env.POSTGRES_DB:=llamastack}",
@ -108,8 +105,8 @@ class PostgresKVStoreConfig(CommonConfig):
raise ValueError("Table name must be less than 63 characters") raise ValueError("Table name must be less than 63 characters")
return v return v
@property @classmethod
def pip_packages(self) -> list[str]: def pip_packages(cls) -> list[str]:
return ["psycopg2-binary"] return ["psycopg2-binary"]
@ -122,15 +119,14 @@ class MongoDBKVStoreConfig(CommonConfig):
password: str | None = None password: str | None = None
collection_name: str = "llamastack_kvstore" collection_name: str = "llamastack_kvstore"
@property @classmethod
def pip_packages(self) -> list[str]: def pip_packages(cls) -> list[str]:
return ["pymongo"] return ["pymongo"]
@classmethod @classmethod
def sample_run_config(cls, collection_name: str = "llamastack_kvstore"): def sample_run_config(cls, collection_name: str = "llamastack_kvstore"):
return { return {
"type": "mongodb", "type": "mongodb",
"namespace": None,
"host": "${env.MONGODB_HOST:=localhost}", "host": "${env.MONGODB_HOST:=localhost}",
"port": "${env.MONGODB_PORT:=5432}", "port": "${env.MONGODB_PORT:=5432}",
"db": "${env.MONGODB_DB}", "db": "${env.MONGODB_DB}",
@ -144,3 +140,21 @@ KVStoreConfig = Annotated[
RedisKVStoreConfig | SqliteKVStoreConfig | PostgresKVStoreConfig | MongoDBKVStoreConfig, RedisKVStoreConfig | SqliteKVStoreConfig | PostgresKVStoreConfig | MongoDBKVStoreConfig,
Field(discriminator="type", default=KVStoreType.sqlite.value), Field(discriminator="type", default=KVStoreType.sqlite.value),
] ]
def get_pip_packages(store_config: dict | KVStoreConfig) -> list[str]:
"""Get pip packages for KV store config, handling both dict and object cases."""
if isinstance(store_config, dict):
store_type = store_config.get("type")
if store_type == "sqlite":
return SqliteKVStoreConfig.pip_packages()
elif store_type == "postgres":
return PostgresKVStoreConfig.pip_packages()
elif store_type == "redis":
return RedisKVStoreConfig.pip_packages()
elif store_type == "mongodb":
return MongoDBKVStoreConfig.pip_packages()
else:
raise ValueError(f"Unknown KV store type: {store_type}")
else:
return store_config.pip_packages()

View file

@ -30,8 +30,8 @@ class SqlAlchemySqlStoreConfig(BaseModel):
def engine_str(self) -> str: ... def engine_str(self) -> str: ...
# TODO: move this when we have a better way to specify dependencies with internal APIs # TODO: move this when we have a better way to specify dependencies with internal APIs
@property @classmethod
def pip_packages(self) -> list[str]: def pip_packages(cls) -> list[str]:
return ["sqlalchemy[asyncio]"] return ["sqlalchemy[asyncio]"]
@ -48,20 +48,20 @@ class SqliteSqlStoreConfig(SqlAlchemySqlStoreConfig):
@classmethod @classmethod
def sample_run_config(cls, __distro_dir__: str, db_name: str = "sqlstore.db"): def sample_run_config(cls, __distro_dir__: str, db_name: str = "sqlstore.db"):
return cls( return {
type="sqlite", "type": "sqlite",
db_path="${env.SQLITE_STORE_DIR:=" + __distro_dir__ + "}/" + db_name, "db_path": "${env.SQLITE_STORE_DIR:=" + __distro_dir__ + "}/" + db_name,
) }
@property @classmethod
def pip_packages(self) -> list[str]: def pip_packages(cls) -> list[str]:
return super().pip_packages + ["aiosqlite"] return super().pip_packages() + ["aiosqlite"]
class PostgresSqlStoreConfig(SqlAlchemySqlStoreConfig): class PostgresSqlStoreConfig(SqlAlchemySqlStoreConfig):
type: Literal["postgres"] = SqlStoreType.postgres.value type: Literal["postgres"] = SqlStoreType.postgres.value
host: str = "localhost" host: str = "localhost"
port: str = "5432" port: int = 5432
db: str = "llamastack" db: str = "llamastack"
user: str user: str
password: str | None = None password: str | None = None
@ -70,20 +70,20 @@ class PostgresSqlStoreConfig(SqlAlchemySqlStoreConfig):
def engine_str(self) -> str: def engine_str(self) -> str:
return f"postgresql+asyncpg://{self.user}:{self.password}@{self.host}:{self.port}/{self.db}" return f"postgresql+asyncpg://{self.user}:{self.password}@{self.host}:{self.port}/{self.db}"
@property @classmethod
def pip_packages(self) -> list[str]: def pip_packages(cls) -> list[str]:
return super().pip_packages + ["asyncpg"] return super().pip_packages() + ["asyncpg"]
@classmethod @classmethod
def sample_run_config(cls, **kwargs): def sample_run_config(cls, **kwargs):
return cls( return {
type="postgres", "type": "postgres",
host="${env.POSTGRES_HOST:=localhost}", "host": "${env.POSTGRES_HOST:=localhost}",
port="${env.POSTGRES_PORT:=5432}", "port": "${env.POSTGRES_PORT:=5432}",
db="${env.POSTGRES_DB:=llamastack}", "db": "${env.POSTGRES_DB:=llamastack}",
user="${env.POSTGRES_USER:=llamastack}", "user": "${env.POSTGRES_USER:=llamastack}",
password="${env.POSTGRES_PASSWORD:=llamastack}", "password": "${env.POSTGRES_PASSWORD:=llamastack}",
) }
SqlStoreConfig = Annotated[ SqlStoreConfig = Annotated[
@ -92,6 +92,20 @@ SqlStoreConfig = Annotated[
] ]
def get_pip_packages(store_config: dict | SqlStoreConfig) -> list[str]:
"""Get pip packages for SQL store config, handling both dict and object cases."""
if isinstance(store_config, dict):
store_type = store_config.get("type")
if store_type == "sqlite":
return SqliteSqlStoreConfig.pip_packages()
elif store_type == "postgres":
return PostgresSqlStoreConfig.pip_packages()
else:
raise ValueError(f"Unknown SQL store type: {store_type}")
else:
return store_config.pip_packages()
def sqlstore_impl(config: SqlStoreConfig) -> SqlStore: def sqlstore_impl(config: SqlStoreConfig) -> SqlStore:
if config.type in [SqlStoreType.sqlite.value, SqlStoreType.postgres.value]: if config.type in [SqlStoreType.sqlite.value, SqlStoreType.postgres.value]:
from .sqlalchemy_sqlstore import SqlAlchemySqlStoreImpl from .sqlalchemy_sqlstore import SqlAlchemySqlStoreImpl

View file

@ -9,6 +9,11 @@ import uuid
def generate_chunk_id(document_id: str, chunk_text: str) -> str: def generate_chunk_id(document_id: str, chunk_text: str) -> str:
"""Generate a unique chunk ID using a hash of document ID and chunk text.""" """
Generate a unique chunk ID using a hash of the document ID and chunk text.
Note: MD5 is used only to calculate an identifier, not for security purposes.
Adding usedforsecurity=False for compatibility with FIPS environments.
"""
hash_input = f"{document_id}:{chunk_text}".encode() hash_input = f"{document_id}:{chunk_text}".encode()
return str(uuid.UUID(hashlib.md5(hash_input).hexdigest())) return str(uuid.UUID(hashlib.md5(hash_input, usedforsecurity=False).hexdigest()))

View file

@ -21,7 +21,6 @@ providers:
config: config:
kvstore: kvstore:
type: sqlite type: sqlite
namespace: null
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/bedrock}/faiss_store.db db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/bedrock}/faiss_store.db
safety: safety:
- provider_id: bedrock - provider_id: bedrock
@ -33,7 +32,6 @@ providers:
config: config:
persistence_store: persistence_store:
type: sqlite type: sqlite
namespace: null
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/bedrock}/agents_store.db db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/bedrock}/agents_store.db
responses_store: responses_store:
type: sqlite type: sqlite
@ -51,7 +49,6 @@ providers:
config: config:
kvstore: kvstore:
type: sqlite type: sqlite
namespace: null
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/bedrock}/meta_reference_eval.db db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/bedrock}/meta_reference_eval.db
datasetio: datasetio:
- provider_id: huggingface - provider_id: huggingface
@ -59,14 +56,12 @@ providers:
config: config:
kvstore: kvstore:
type: sqlite type: sqlite
namespace: null
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/bedrock}/huggingface_datasetio.db db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/bedrock}/huggingface_datasetio.db
- provider_id: localfs - provider_id: localfs
provider_type: inline::localfs provider_type: inline::localfs
config: config:
kvstore: kvstore:
type: sqlite type: sqlite
namespace: null
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/bedrock}/localfs_datasetio.db db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/bedrock}/localfs_datasetio.db
scoring: scoring:
- provider_id: basic - provider_id: basic

View file

@ -31,7 +31,6 @@ providers:
config: config:
kvstore: kvstore:
type: sqlite type: sqlite
namespace: null
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/cerebras}/faiss_store.db db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/cerebras}/faiss_store.db
agents: agents:
- provider_id: meta-reference - provider_id: meta-reference
@ -39,7 +38,6 @@ providers:
config: config:
persistence_store: persistence_store:
type: sqlite type: sqlite
namespace: null
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/cerebras}/agents_store.db db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/cerebras}/agents_store.db
responses_store: responses_store:
type: sqlite type: sqlite
@ -50,7 +48,6 @@ providers:
config: config:
kvstore: kvstore:
type: sqlite type: sqlite
namespace: null
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/cerebras}/meta_reference_eval.db db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/cerebras}/meta_reference_eval.db
datasetio: datasetio:
- provider_id: huggingface - provider_id: huggingface
@ -58,14 +55,12 @@ providers:
config: config:
kvstore: kvstore:
type: sqlite type: sqlite
namespace: null
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/cerebras}/huggingface_datasetio.db db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/cerebras}/huggingface_datasetio.db
- provider_id: localfs - provider_id: localfs
provider_type: inline::localfs provider_type: inline::localfs
config: config:
kvstore: kvstore:
type: sqlite type: sqlite
namespace: null
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/cerebras}/localfs_datasetio.db db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/cerebras}/localfs_datasetio.db
scoring: scoring:
- provider_id: basic - provider_id: basic

View file

@ -36,7 +36,6 @@ providers:
config: config:
persistence_store: persistence_store:
type: sqlite type: sqlite
namespace: null
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ci-tests}/agents_store.db db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ci-tests}/agents_store.db
responses_store: responses_store:
type: sqlite type: sqlite
@ -54,7 +53,6 @@ providers:
config: config:
kvstore: kvstore:
type: sqlite type: sqlite
namespace: null
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ci-tests}/meta_reference_eval.db db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ci-tests}/meta_reference_eval.db
datasetio: datasetio:
- provider_id: huggingface - provider_id: huggingface
@ -62,14 +60,12 @@ providers:
config: config:
kvstore: kvstore:
type: sqlite type: sqlite
namespace: null
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ci-tests}/huggingface_datasetio.db db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ci-tests}/huggingface_datasetio.db
- provider_id: localfs - provider_id: localfs
provider_type: inline::localfs provider_type: inline::localfs
config: config:
kvstore: kvstore:
type: sqlite type: sqlite
namespace: null
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ci-tests}/localfs_datasetio.db db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ci-tests}/localfs_datasetio.db
scoring: scoring:
- provider_id: basic - provider_id: basic

View file

@ -39,7 +39,6 @@ providers:
config: config:
persistence_store: persistence_store:
type: sqlite type: sqlite
namespace: null
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/dell}/agents_store.db db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/dell}/agents_store.db
responses_store: responses_store:
type: sqlite type: sqlite
@ -57,7 +56,6 @@ providers:
config: config:
kvstore: kvstore:
type: sqlite type: sqlite
namespace: null
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/dell}/meta_reference_eval.db db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/dell}/meta_reference_eval.db
datasetio: datasetio:
- provider_id: huggingface - provider_id: huggingface
@ -65,14 +63,12 @@ providers:
config: config:
kvstore: kvstore:
type: sqlite type: sqlite
namespace: null
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/dell}/huggingface_datasetio.db db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/dell}/huggingface_datasetio.db
- provider_id: localfs - provider_id: localfs
provider_type: inline::localfs provider_type: inline::localfs
config: config:
kvstore: kvstore:
type: sqlite type: sqlite
namespace: null
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/dell}/localfs_datasetio.db db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/dell}/localfs_datasetio.db
scoring: scoring:
- provider_id: basic - provider_id: basic

View file

@ -35,7 +35,6 @@ providers:
config: config:
persistence_store: persistence_store:
type: sqlite type: sqlite
namespace: null
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/dell}/agents_store.db db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/dell}/agents_store.db
responses_store: responses_store:
type: sqlite type: sqlite
@ -53,7 +52,6 @@ providers:
config: config:
kvstore: kvstore:
type: sqlite type: sqlite
namespace: null
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/dell}/meta_reference_eval.db db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/dell}/meta_reference_eval.db
datasetio: datasetio:
- provider_id: huggingface - provider_id: huggingface
@ -61,14 +59,12 @@ providers:
config: config:
kvstore: kvstore:
type: sqlite type: sqlite
namespace: null
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/dell}/huggingface_datasetio.db db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/dell}/huggingface_datasetio.db
- provider_id: localfs - provider_id: localfs
provider_type: inline::localfs provider_type: inline::localfs
config: config:
kvstore: kvstore:
type: sqlite type: sqlite
namespace: null
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/dell}/localfs_datasetio.db db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/dell}/localfs_datasetio.db
scoring: scoring:
- provider_id: basic - provider_id: basic

View file

@ -27,7 +27,6 @@ providers:
config: config:
kvstore: kvstore:
type: sqlite type: sqlite
namespace: null
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/fireworks}/faiss_store.db db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/fireworks}/faiss_store.db
safety: safety:
- provider_id: llama-guard - provider_id: llama-guard
@ -45,7 +44,6 @@ providers:
config: config:
persistence_store: persistence_store:
type: sqlite type: sqlite
namespace: null
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/fireworks}/agents_store.db db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/fireworks}/agents_store.db
responses_store: responses_store:
type: sqlite type: sqlite
@ -63,7 +61,6 @@ providers:
config: config:
kvstore: kvstore:
type: sqlite type: sqlite
namespace: null
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/fireworks}/meta_reference_eval.db db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/fireworks}/meta_reference_eval.db
datasetio: datasetio:
- provider_id: huggingface - provider_id: huggingface
@ -71,14 +68,12 @@ providers:
config: config:
kvstore: kvstore:
type: sqlite type: sqlite
namespace: null
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/fireworks}/huggingface_datasetio.db db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/fireworks}/huggingface_datasetio.db
- provider_id: localfs - provider_id: localfs
provider_type: inline::localfs provider_type: inline::localfs
config: config:
kvstore: kvstore:
type: sqlite type: sqlite
namespace: null
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/fireworks}/localfs_datasetio.db db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/fireworks}/localfs_datasetio.db
scoring: scoring:
- provider_id: basic - provider_id: basic

View file

@ -27,7 +27,6 @@ providers:
config: config:
kvstore: kvstore:
type: sqlite type: sqlite
namespace: null
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/fireworks}/faiss_store.db db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/fireworks}/faiss_store.db
safety: safety:
- provider_id: llama-guard - provider_id: llama-guard
@ -40,7 +39,6 @@ providers:
config: config:
persistence_store: persistence_store:
type: sqlite type: sqlite
namespace: null
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/fireworks}/agents_store.db db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/fireworks}/agents_store.db
responses_store: responses_store:
type: sqlite type: sqlite
@ -58,7 +56,6 @@ providers:
config: config:
kvstore: kvstore:
type: sqlite type: sqlite
namespace: null
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/fireworks}/meta_reference_eval.db db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/fireworks}/meta_reference_eval.db
datasetio: datasetio:
- provider_id: huggingface - provider_id: huggingface
@ -66,14 +63,12 @@ providers:
config: config:
kvstore: kvstore:
type: sqlite type: sqlite
namespace: null
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/fireworks}/huggingface_datasetio.db db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/fireworks}/huggingface_datasetio.db
- provider_id: localfs - provider_id: localfs
provider_type: inline::localfs provider_type: inline::localfs
config: config:
kvstore: kvstore:
type: sqlite type: sqlite
namespace: null
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/fireworks}/localfs_datasetio.db db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/fireworks}/localfs_datasetio.db
scoring: scoring:
- provider_id: basic - provider_id: basic

View file

@ -26,7 +26,6 @@ providers:
config: config:
kvstore: kvstore:
type: sqlite type: sqlite
namespace: null
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/groq}/faiss_store.db db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/groq}/faiss_store.db
safety: safety:
- provider_id: llama-guard - provider_id: llama-guard
@ -39,7 +38,6 @@ providers:
config: config:
persistence_store: persistence_store:
type: sqlite type: sqlite
namespace: null
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/groq}/agents_store.db db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/groq}/agents_store.db
responses_store: responses_store:
type: sqlite type: sqlite
@ -57,7 +55,6 @@ providers:
config: config:
kvstore: kvstore:
type: sqlite type: sqlite
namespace: null
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/groq}/meta_reference_eval.db db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/groq}/meta_reference_eval.db
datasetio: datasetio:
- provider_id: huggingface - provider_id: huggingface
@ -65,14 +62,12 @@ providers:
config: config:
kvstore: kvstore:
type: sqlite type: sqlite
namespace: null
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/groq}/huggingface_datasetio.db db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/groq}/huggingface_datasetio.db
- provider_id: localfs - provider_id: localfs
provider_type: inline::localfs provider_type: inline::localfs
config: config:
kvstore: kvstore:
type: sqlite type: sqlite
namespace: null
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/groq}/localfs_datasetio.db db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/groq}/localfs_datasetio.db
scoring: scoring:
- provider_id: basic - provider_id: basic

View file

@ -31,7 +31,6 @@ providers:
config: config:
kvstore: kvstore:
type: sqlite type: sqlite
namespace: null
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/hf-endpoint}/faiss_store.db db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/hf-endpoint}/faiss_store.db
safety: safety:
- provider_id: llama-guard - provider_id: llama-guard
@ -44,7 +43,6 @@ providers:
config: config:
persistence_store: persistence_store:
type: sqlite type: sqlite
namespace: null
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/hf-endpoint}/agents_store.db db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/hf-endpoint}/agents_store.db
responses_store: responses_store:
type: sqlite type: sqlite
@ -62,7 +60,6 @@ providers:
config: config:
kvstore: kvstore:
type: sqlite type: sqlite
namespace: null
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/hf-endpoint}/meta_reference_eval.db db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/hf-endpoint}/meta_reference_eval.db
datasetio: datasetio:
- provider_id: huggingface - provider_id: huggingface
@ -70,14 +67,12 @@ providers:
config: config:
kvstore: kvstore:
type: sqlite type: sqlite
namespace: null
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/hf-endpoint}/huggingface_datasetio.db db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/hf-endpoint}/huggingface_datasetio.db
- provider_id: localfs - provider_id: localfs
provider_type: inline::localfs provider_type: inline::localfs
config: config:
kvstore: kvstore:
type: sqlite type: sqlite
namespace: null
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/hf-endpoint}/localfs_datasetio.db db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/hf-endpoint}/localfs_datasetio.db
scoring: scoring:
- provider_id: basic - provider_id: basic

View file

@ -26,7 +26,6 @@ providers:
config: config:
kvstore: kvstore:
type: sqlite type: sqlite
namespace: null
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/hf-endpoint}/faiss_store.db db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/hf-endpoint}/faiss_store.db
safety: safety:
- provider_id: llama-guard - provider_id: llama-guard
@ -39,7 +38,6 @@ providers:
config: config:
persistence_store: persistence_store:
type: sqlite type: sqlite
namespace: null
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/hf-endpoint}/agents_store.db db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/hf-endpoint}/agents_store.db
responses_store: responses_store:
type: sqlite type: sqlite
@ -57,7 +55,6 @@ providers:
config: config:
kvstore: kvstore:
type: sqlite type: sqlite
namespace: null
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/hf-endpoint}/meta_reference_eval.db db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/hf-endpoint}/meta_reference_eval.db
datasetio: datasetio:
- provider_id: huggingface - provider_id: huggingface
@ -65,14 +62,12 @@ providers:
config: config:
kvstore: kvstore:
type: sqlite type: sqlite
namespace: null
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/hf-endpoint}/huggingface_datasetio.db db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/hf-endpoint}/huggingface_datasetio.db
- provider_id: localfs - provider_id: localfs
provider_type: inline::localfs provider_type: inline::localfs
config: config:
kvstore: kvstore:
type: sqlite type: sqlite
namespace: null
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/hf-endpoint}/localfs_datasetio.db db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/hf-endpoint}/localfs_datasetio.db
scoring: scoring:
- provider_id: basic - provider_id: basic

View file

@ -31,7 +31,6 @@ providers:
config: config:
kvstore: kvstore:
type: sqlite type: sqlite
namespace: null
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/hf-serverless}/faiss_store.db db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/hf-serverless}/faiss_store.db
safety: safety:
- provider_id: llama-guard - provider_id: llama-guard
@ -44,7 +43,6 @@ providers:
config: config:
persistence_store: persistence_store:
type: sqlite type: sqlite
namespace: null
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/hf-serverless}/agents_store.db db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/hf-serverless}/agents_store.db
responses_store: responses_store:
type: sqlite type: sqlite
@ -62,7 +60,6 @@ providers:
config: config:
kvstore: kvstore:
type: sqlite type: sqlite
namespace: null
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/hf-serverless}/meta_reference_eval.db db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/hf-serverless}/meta_reference_eval.db
datasetio: datasetio:
- provider_id: huggingface - provider_id: huggingface
@ -70,14 +67,12 @@ providers:
config: config:
kvstore: kvstore:
type: sqlite type: sqlite
namespace: null
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/hf-serverless}/huggingface_datasetio.db db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/hf-serverless}/huggingface_datasetio.db
- provider_id: localfs - provider_id: localfs
provider_type: inline::localfs provider_type: inline::localfs
config: config:
kvstore: kvstore:
type: sqlite type: sqlite
namespace: null
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/hf-serverless}/localfs_datasetio.db db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/hf-serverless}/localfs_datasetio.db
scoring: scoring:
- provider_id: basic - provider_id: basic

View file

@ -26,7 +26,6 @@ providers:
config: config:
kvstore: kvstore:
type: sqlite type: sqlite
namespace: null
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/hf-serverless}/faiss_store.db db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/hf-serverless}/faiss_store.db
safety: safety:
- provider_id: llama-guard - provider_id: llama-guard
@ -39,7 +38,6 @@ providers:
config: config:
persistence_store: persistence_store:
type: sqlite type: sqlite
namespace: null
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/hf-serverless}/agents_store.db db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/hf-serverless}/agents_store.db
responses_store: responses_store:
type: sqlite type: sqlite
@ -57,7 +55,6 @@ providers:
config: config:
kvstore: kvstore:
type: sqlite type: sqlite
namespace: null
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/hf-serverless}/meta_reference_eval.db db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/hf-serverless}/meta_reference_eval.db
datasetio: datasetio:
- provider_id: huggingface - provider_id: huggingface
@ -65,14 +62,12 @@ providers:
config: config:
kvstore: kvstore:
type: sqlite type: sqlite
namespace: null
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/hf-serverless}/huggingface_datasetio.db db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/hf-serverless}/huggingface_datasetio.db
- provider_id: localfs - provider_id: localfs
provider_type: inline::localfs provider_type: inline::localfs
config: config:
kvstore: kvstore:
type: sqlite type: sqlite
namespace: null
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/hf-serverless}/localfs_datasetio.db db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/hf-serverless}/localfs_datasetio.db
scoring: scoring:
- provider_id: basic - provider_id: basic

View file

@ -48,7 +48,6 @@ providers:
config: config:
persistence_store: persistence_store:
type: sqlite type: sqlite
namespace: null
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/llama_api}/agents_store.db db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/llama_api}/agents_store.db
responses_store: responses_store:
type: sqlite type: sqlite
@ -66,7 +65,6 @@ providers:
config: config:
kvstore: kvstore:
type: sqlite type: sqlite
namespace: null
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/llama_api}/meta_reference_eval.db db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/llama_api}/meta_reference_eval.db
datasetio: datasetio:
- provider_id: huggingface - provider_id: huggingface
@ -74,14 +72,12 @@ providers:
config: config:
kvstore: kvstore:
type: sqlite type: sqlite
namespace: null
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/llama_api}/huggingface_datasetio.db db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/llama_api}/huggingface_datasetio.db
- provider_id: localfs - provider_id: localfs
provider_type: inline::localfs provider_type: inline::localfs
config: config:
kvstore: kvstore:
type: sqlite type: sqlite
namespace: null
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/llama_api}/localfs_datasetio.db db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/llama_api}/localfs_datasetio.db
scoring: scoring:
- provider_id: basic - provider_id: basic

View file

@ -41,7 +41,6 @@ providers:
config: config:
kvstore: kvstore:
type: sqlite type: sqlite
namespace: null
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/meta-reference-gpu}/faiss_store.db db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/meta-reference-gpu}/faiss_store.db
safety: safety:
- provider_id: llama-guard - provider_id: llama-guard
@ -54,7 +53,6 @@ providers:
config: config:
persistence_store: persistence_store:
type: sqlite type: sqlite
namespace: null
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/meta-reference-gpu}/agents_store.db db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/meta-reference-gpu}/agents_store.db
responses_store: responses_store:
type: sqlite type: sqlite
@ -72,7 +70,6 @@ providers:
config: config:
kvstore: kvstore:
type: sqlite type: sqlite
namespace: null
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/meta-reference-gpu}/meta_reference_eval.db db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/meta-reference-gpu}/meta_reference_eval.db
datasetio: datasetio:
- provider_id: huggingface - provider_id: huggingface
@ -80,14 +77,12 @@ providers:
config: config:
kvstore: kvstore:
type: sqlite type: sqlite
namespace: null
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/meta-reference-gpu}/huggingface_datasetio.db db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/meta-reference-gpu}/huggingface_datasetio.db
- provider_id: localfs - provider_id: localfs
provider_type: inline::localfs provider_type: inline::localfs
config: config:
kvstore: kvstore:
type: sqlite type: sqlite
namespace: null
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/meta-reference-gpu}/localfs_datasetio.db db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/meta-reference-gpu}/localfs_datasetio.db
scoring: scoring:
- provider_id: basic - provider_id: basic

View file

@ -31,7 +31,6 @@ providers:
config: config:
kvstore: kvstore:
type: sqlite type: sqlite
namespace: null
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/meta-reference-gpu}/faiss_store.db db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/meta-reference-gpu}/faiss_store.db
safety: safety:
- provider_id: llama-guard - provider_id: llama-guard
@ -44,7 +43,6 @@ providers:
config: config:
persistence_store: persistence_store:
type: sqlite type: sqlite
namespace: null
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/meta-reference-gpu}/agents_store.db db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/meta-reference-gpu}/agents_store.db
responses_store: responses_store:
type: sqlite type: sqlite
@ -62,7 +60,6 @@ providers:
config: config:
kvstore: kvstore:
type: sqlite type: sqlite
namespace: null
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/meta-reference-gpu}/meta_reference_eval.db db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/meta-reference-gpu}/meta_reference_eval.db
datasetio: datasetio:
- provider_id: huggingface - provider_id: huggingface
@ -70,14 +67,12 @@ providers:
config: config:
kvstore: kvstore:
type: sqlite type: sqlite
namespace: null
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/meta-reference-gpu}/huggingface_datasetio.db db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/meta-reference-gpu}/huggingface_datasetio.db
- provider_id: localfs - provider_id: localfs
provider_type: inline::localfs provider_type: inline::localfs
config: config:
kvstore: kvstore:
type: sqlite type: sqlite
namespace: null
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/meta-reference-gpu}/localfs_datasetio.db db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/meta-reference-gpu}/localfs_datasetio.db
scoring: scoring:
- provider_id: basic - provider_id: basic

View file

@ -30,7 +30,6 @@ providers:
config: config:
kvstore: kvstore:
type: sqlite type: sqlite
namespace: null
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/nvidia}/faiss_store.db db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/nvidia}/faiss_store.db
safety: safety:
- provider_id: nvidia - provider_id: nvidia
@ -44,7 +43,6 @@ providers:
config: config:
persistence_store: persistence_store:
type: sqlite type: sqlite
namespace: null
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/nvidia}/agents_store.db db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/nvidia}/agents_store.db
responses_store: responses_store:
type: sqlite type: sqlite
@ -75,7 +73,6 @@ providers:
config: config:
kvstore: kvstore:
type: sqlite type: sqlite
namespace: null
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/nvidia}/localfs_datasetio.db db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/nvidia}/localfs_datasetio.db
- provider_id: nvidia - provider_id: nvidia
provider_type: remote::nvidia provider_type: remote::nvidia

View file

@ -25,7 +25,6 @@ providers:
config: config:
kvstore: kvstore:
type: sqlite type: sqlite
namespace: null
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/nvidia}/faiss_store.db db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/nvidia}/faiss_store.db
safety: safety:
- provider_id: nvidia - provider_id: nvidia
@ -39,7 +38,6 @@ providers:
config: config:
persistence_store: persistence_store:
type: sqlite type: sqlite
namespace: null
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/nvidia}/agents_store.db db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/nvidia}/agents_store.db
responses_store: responses_store:
type: sqlite type: sqlite

View file

@ -25,7 +25,6 @@ providers:
config: config:
kvstore: kvstore:
type: sqlite type: sqlite
namespace: null
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ollama}/faiss_store.db db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ollama}/faiss_store.db
safety: safety:
- provider_id: llama-guard - provider_id: llama-guard
@ -40,7 +39,6 @@ providers:
config: config:
persistence_store: persistence_store:
type: sqlite type: sqlite
namespace: null
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ollama}/agents_store.db db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ollama}/agents_store.db
responses_store: responses_store:
type: sqlite type: sqlite
@ -58,7 +56,6 @@ providers:
config: config:
kvstore: kvstore:
type: sqlite type: sqlite
namespace: null
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ollama}/meta_reference_eval.db db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ollama}/meta_reference_eval.db
datasetio: datasetio:
- provider_id: huggingface - provider_id: huggingface
@ -66,14 +63,12 @@ providers:
config: config:
kvstore: kvstore:
type: sqlite type: sqlite
namespace: null
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ollama}/huggingface_datasetio.db db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ollama}/huggingface_datasetio.db
- provider_id: localfs - provider_id: localfs
provider_type: inline::localfs provider_type: inline::localfs
config: config:
kvstore: kvstore:
type: sqlite type: sqlite
namespace: null
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ollama}/localfs_datasetio.db db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ollama}/localfs_datasetio.db
scoring: scoring:
- provider_id: basic - provider_id: basic

View file

@ -25,7 +25,6 @@ providers:
config: config:
kvstore: kvstore:
type: sqlite type: sqlite
namespace: null
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ollama}/faiss_store.db db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ollama}/faiss_store.db
safety: safety:
- provider_id: llama-guard - provider_id: llama-guard
@ -38,7 +37,6 @@ providers:
config: config:
persistence_store: persistence_store:
type: sqlite type: sqlite
namespace: null
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ollama}/agents_store.db db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ollama}/agents_store.db
responses_store: responses_store:
type: sqlite type: sqlite
@ -56,7 +54,6 @@ providers:
config: config:
kvstore: kvstore:
type: sqlite type: sqlite
namespace: null
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ollama}/meta_reference_eval.db db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ollama}/meta_reference_eval.db
datasetio: datasetio:
- provider_id: huggingface - provider_id: huggingface
@ -64,14 +61,12 @@ providers:
config: config:
kvstore: kvstore:
type: sqlite type: sqlite
namespace: null
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ollama}/huggingface_datasetio.db db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ollama}/huggingface_datasetio.db
- provider_id: localfs - provider_id: localfs
provider_type: inline::localfs provider_type: inline::localfs
config: config:
kvstore: kvstore:
type: sqlite type: sqlite
namespace: null
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ollama}/localfs_datasetio.db db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ollama}/localfs_datasetio.db
scoring: scoring:
- provider_id: basic - provider_id: basic

View file

@ -62,7 +62,6 @@ providers:
config: config:
persistence_store: persistence_store:
type: sqlite type: sqlite
namespace: null
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/open-benchmark}/agents_store.db db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/open-benchmark}/agents_store.db
responses_store: responses_store:
type: sqlite type: sqlite
@ -80,7 +79,6 @@ providers:
config: config:
kvstore: kvstore:
type: sqlite type: sqlite
namespace: null
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/open-benchmark}/meta_reference_eval.db db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/open-benchmark}/meta_reference_eval.db
datasetio: datasetio:
- provider_id: huggingface - provider_id: huggingface
@ -88,14 +86,12 @@ providers:
config: config:
kvstore: kvstore:
type: sqlite type: sqlite
namespace: null
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/open-benchmark}/huggingface_datasetio.db db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/open-benchmark}/huggingface_datasetio.db
- provider_id: localfs - provider_id: localfs
provider_type: inline::localfs provider_type: inline::localfs
config: config:
kvstore: kvstore:
type: sqlite type: sqlite
namespace: null
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/open-benchmark}/localfs_datasetio.db db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/open-benchmark}/localfs_datasetio.db
scoring: scoring:
- provider_id: basic - provider_id: basic

View file

@ -26,7 +26,6 @@ providers:
config: config:
kvstore: kvstore:
type: sqlite type: sqlite
namespace: null
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/passthrough}/faiss_store.db db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/passthrough}/faiss_store.db
safety: safety:
- provider_id: llama-guard - provider_id: llama-guard
@ -44,7 +43,6 @@ providers:
config: config:
persistence_store: persistence_store:
type: sqlite type: sqlite
namespace: null
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/passthrough}/agents_store.db db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/passthrough}/agents_store.db
responses_store: responses_store:
type: sqlite type: sqlite
@ -62,7 +60,6 @@ providers:
config: config:
kvstore: kvstore:
type: sqlite type: sqlite
namespace: null
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/passthrough}/meta_reference_eval.db db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/passthrough}/meta_reference_eval.db
datasetio: datasetio:
- provider_id: huggingface - provider_id: huggingface
@ -70,14 +67,12 @@ providers:
config: config:
kvstore: kvstore:
type: sqlite type: sqlite
namespace: null
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/passthrough}/huggingface_datasetio.db db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/passthrough}/huggingface_datasetio.db
- provider_id: localfs - provider_id: localfs
provider_type: inline::localfs provider_type: inline::localfs
config: config:
kvstore: kvstore:
type: sqlite type: sqlite
namespace: null
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/passthrough}/localfs_datasetio.db db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/passthrough}/localfs_datasetio.db
scoring: scoring:
- provider_id: basic - provider_id: basic

View file

@ -26,7 +26,6 @@ providers:
config: config:
kvstore: kvstore:
type: sqlite type: sqlite
namespace: null
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/passthrough}/faiss_store.db db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/passthrough}/faiss_store.db
safety: safety:
- provider_id: llama-guard - provider_id: llama-guard
@ -39,7 +38,6 @@ providers:
config: config:
persistence_store: persistence_store:
type: sqlite type: sqlite
namespace: null
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/passthrough}/agents_store.db db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/passthrough}/agents_store.db
responses_store: responses_store:
type: sqlite type: sqlite
@ -57,7 +55,6 @@ providers:
config: config:
kvstore: kvstore:
type: sqlite type: sqlite
namespace: null
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/passthrough}/meta_reference_eval.db db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/passthrough}/meta_reference_eval.db
datasetio: datasetio:
- provider_id: huggingface - provider_id: huggingface
@ -65,14 +62,12 @@ providers:
config: config:
kvstore: kvstore:
type: sqlite type: sqlite
namespace: null
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/passthrough}/huggingface_datasetio.db db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/passthrough}/huggingface_datasetio.db
- provider_id: localfs - provider_id: localfs
provider_type: inline::localfs provider_type: inline::localfs
config: config:
kvstore: kvstore:
type: sqlite type: sqlite
namespace: null
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/passthrough}/localfs_datasetio.db db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/passthrough}/localfs_datasetio.db
scoring: scoring:
- provider_id: basic - provider_id: basic

View file

@ -114,7 +114,7 @@ def get_distribution_template() -> DistributionTemplate:
provider_id="meta-reference", provider_id="meta-reference",
provider_type="inline::meta-reference", provider_type="inline::meta-reference",
config=dict( config=dict(
service_name="${env.OTEL_SERVICE_NAME:=}", service_name="${env.OTEL_SERVICE_NAME:=\u200b}",
sinks="${env.TELEMETRY_SINKS:=console,otel_trace}", sinks="${env.TELEMETRY_SINKS:=console,otel_trace}",
otel_trace_endpoint="${env.OTEL_TRACE_ENDPOINT:=http://localhost:4318/v1/traces}", otel_trace_endpoint="${env.OTEL_TRACE_ENDPOINT:=http://localhost:4318/v1/traces}",
), ),

View file

@ -51,7 +51,7 @@ providers:
- provider_id: meta-reference - provider_id: meta-reference
provider_type: inline::meta-reference provider_type: inline::meta-reference
config: config:
service_name: ${env.OTEL_SERVICE_NAME:=} service_name: "${env.OTEL_SERVICE_NAME:=\u200B}"
sinks: ${env.TELEMETRY_SINKS:=console,otel_trace} sinks: ${env.TELEMETRY_SINKS:=console,otel_trace}
otel_trace_endpoint: ${env.OTEL_TRACE_ENDPOINT:=http://localhost:4318/v1/traces} otel_trace_endpoint: ${env.OTEL_TRACE_ENDPOINT:=http://localhost:4318/v1/traces}
tool_runtime: tool_runtime:

View file

@ -35,7 +35,6 @@ providers:
config: config:
kvstore: kvstore:
type: sqlite type: sqlite
namespace: null
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/remote-vllm}/faiss_store.db db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/remote-vllm}/faiss_store.db
safety: safety:
- provider_id: llama-guard - provider_id: llama-guard
@ -48,7 +47,6 @@ providers:
config: config:
persistence_store: persistence_store:
type: sqlite type: sqlite
namespace: null
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/remote-vllm}/agents_store.db db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/remote-vllm}/agents_store.db
responses_store: responses_store:
type: sqlite type: sqlite
@ -59,7 +57,6 @@ providers:
config: config:
kvstore: kvstore:
type: sqlite type: sqlite
namespace: null
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/remote-vllm}/meta_reference_eval.db db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/remote-vllm}/meta_reference_eval.db
datasetio: datasetio:
- provider_id: huggingface - provider_id: huggingface
@ -67,14 +64,12 @@ providers:
config: config:
kvstore: kvstore:
type: sqlite type: sqlite
namespace: null
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/remote-vllm}/huggingface_datasetio.db db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/remote-vllm}/huggingface_datasetio.db
- provider_id: localfs - provider_id: localfs
provider_type: inline::localfs provider_type: inline::localfs
config: config:
kvstore: kvstore:
type: sqlite type: sqlite
namespace: null
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/remote-vllm}/localfs_datasetio.db db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/remote-vllm}/localfs_datasetio.db
scoring: scoring:
- provider_id: basic - provider_id: basic

View file

@ -28,7 +28,6 @@ providers:
config: config:
kvstore: kvstore:
type: sqlite type: sqlite
namespace: null
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/remote-vllm}/faiss_store.db db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/remote-vllm}/faiss_store.db
safety: safety:
- provider_id: llama-guard - provider_id: llama-guard
@ -41,7 +40,6 @@ providers:
config: config:
persistence_store: persistence_store:
type: sqlite type: sqlite
namespace: null
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/remote-vllm}/agents_store.db db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/remote-vllm}/agents_store.db
responses_store: responses_store:
type: sqlite type: sqlite
@ -52,7 +50,6 @@ providers:
config: config:
kvstore: kvstore:
type: sqlite type: sqlite
namespace: null
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/remote-vllm}/meta_reference_eval.db db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/remote-vllm}/meta_reference_eval.db
datasetio: datasetio:
- provider_id: huggingface - provider_id: huggingface
@ -60,14 +57,12 @@ providers:
config: config:
kvstore: kvstore:
type: sqlite type: sqlite
namespace: null
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/remote-vllm}/huggingface_datasetio.db db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/remote-vllm}/huggingface_datasetio.db
- provider_id: localfs - provider_id: localfs
provider_type: inline::localfs provider_type: inline::localfs
config: config:
kvstore: kvstore:
type: sqlite type: sqlite
namespace: null
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/remote-vllm}/localfs_datasetio.db db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/remote-vllm}/localfs_datasetio.db
scoring: scoring:
- provider_id: basic - provider_id: basic

View file

@ -23,7 +23,6 @@ providers:
config: config:
kvstore: kvstore:
type: sqlite type: sqlite
namespace: null
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/sambanova}/faiss_store.db db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/sambanova}/faiss_store.db
- provider_id: ${env.ENABLE_CHROMADB:+chromadb} - provider_id: ${env.ENABLE_CHROMADB:+chromadb}
provider_type: remote::chromadb provider_type: remote::chromadb
@ -49,7 +48,6 @@ providers:
config: config:
persistence_store: persistence_store:
type: sqlite type: sqlite
namespace: null
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/sambanova}/agents_store.db db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/sambanova}/agents_store.db
responses_store: responses_store:
type: sqlite type: sqlite

View file

@ -66,7 +66,6 @@ providers:
config: config:
kvstore: kvstore:
type: sqlite type: sqlite
namespace: null
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/faiss_store.db db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/faiss_store.db
- provider_id: ${env.ENABLE_SQLITE_VEC:+sqlite-vec} - provider_id: ${env.ENABLE_SQLITE_VEC:+sqlite-vec}
provider_type: inline::sqlite-vec provider_type: inline::sqlite-vec
@ -78,7 +77,6 @@ providers:
db_path: ${env.MILVUS_DB_PATH:=~/.llama/distributions/starter}/milvus.db db_path: ${env.MILVUS_DB_PATH:=~/.llama/distributions/starter}/milvus.db
kvstore: kvstore:
type: sqlite type: sqlite
namespace: null
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/milvus_registry.db db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/milvus_registry.db
- provider_id: ${env.ENABLE_CHROMADB:+chromadb} - provider_id: ${env.ENABLE_CHROMADB:+chromadb}
provider_type: remote::chromadb provider_type: remote::chromadb
@ -111,7 +109,6 @@ providers:
config: config:
persistence_store: persistence_store:
type: sqlite type: sqlite
namespace: null
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/agents_store.db db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/agents_store.db
responses_store: responses_store:
type: sqlite type: sqlite
@ -129,7 +126,6 @@ providers:
config: config:
kvstore: kvstore:
type: sqlite type: sqlite
namespace: null
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/meta_reference_eval.db db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/meta_reference_eval.db
datasetio: datasetio:
- provider_id: huggingface - provider_id: huggingface
@ -137,14 +133,12 @@ providers:
config: config:
kvstore: kvstore:
type: sqlite type: sqlite
namespace: null
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/huggingface_datasetio.db db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/huggingface_datasetio.db
- provider_id: localfs - provider_id: localfs
provider_type: inline::localfs provider_type: inline::localfs
config: config:
kvstore: kvstore:
type: sqlite type: sqlite
namespace: null
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/localfs_datasetio.db db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/localfs_datasetio.db
scoring: scoring:
- provider_id: basic - provider_id: basic

View file

@ -234,7 +234,6 @@ def get_distribution_template() -> DistributionTemplate:
default_models = get_model_registry(available_models) default_models = get_model_registry(available_models)
postgres_store = PostgresSqlStoreConfig.sample_run_config()
return DistributionTemplate( return DistributionTemplate(
name=name, name=name,
distro_type="self_hosted", distro_type="self_hosted",
@ -243,7 +242,7 @@ def get_distribution_template() -> DistributionTemplate:
template_path=None, template_path=None,
providers=providers, providers=providers,
available_models_by_provider=available_models, available_models_by_provider=available_models,
additional_pip_packages=postgres_store.pip_packages, additional_pip_packages=PostgresSqlStoreConfig.pip_packages(),
run_configs={ run_configs={
"run.yaml": RunConfigSettings( "run.yaml": RunConfigSettings(
provider_overrides={ provider_overrides={

View file

@ -15,6 +15,7 @@ from pydantic import BaseModel, Field
from llama_stack.apis.datasets import DatasetPurpose from llama_stack.apis.datasets import DatasetPurpose
from llama_stack.apis.models import ModelType from llama_stack.apis.models import ModelType
from llama_stack.distribution.datatypes import ( from llama_stack.distribution.datatypes import (
LLAMA_STACK_RUN_CONFIG_VERSION,
Api, Api,
BenchmarkInput, BenchmarkInput,
BuildConfig, BuildConfig,
@ -23,14 +24,15 @@ from llama_stack.distribution.datatypes import (
ModelInput, ModelInput,
Provider, Provider,
ShieldInput, ShieldInput,
StackRunConfig,
ToolGroupInput, ToolGroupInput,
) )
from llama_stack.distribution.distribution import get_provider_registry from llama_stack.distribution.distribution import get_provider_registry
from llama_stack.distribution.utils.dynamic import instantiate_class_type from llama_stack.distribution.utils.dynamic import instantiate_class_type
from llama_stack.providers.utils.inference.model_registry import ProviderModelEntry from llama_stack.providers.utils.inference.model_registry import ProviderModelEntry
from llama_stack.providers.utils.kvstore.config import KVStoreConfig, SqliteKVStoreConfig from llama_stack.providers.utils.kvstore.config import SqliteKVStoreConfig
from llama_stack.providers.utils.sqlstore.sqlstore import SqliteSqlStoreConfig, SqlStoreConfig from llama_stack.providers.utils.kvstore.config import get_pip_packages as get_kv_pip_packages
from llama_stack.providers.utils.sqlstore.sqlstore import SqliteSqlStoreConfig
from llama_stack.providers.utils.sqlstore.sqlstore import get_pip_packages as get_sql_pip_packages
def get_model_registry( def get_model_registry(
@ -87,21 +89,24 @@ class RunConfigSettings(BaseModel):
default_tool_groups: list[ToolGroupInput] | None = None default_tool_groups: list[ToolGroupInput] | None = None
default_datasets: list[DatasetInput] | None = None default_datasets: list[DatasetInput] | None = None
default_benchmarks: list[BenchmarkInput] | None = None default_benchmarks: list[BenchmarkInput] | None = None
metadata_store: KVStoreConfig | None = None metadata_store: dict | None = None
inference_store: SqlStoreConfig | None = None inference_store: dict | None = None
def run_config( def run_config(
self, self,
name: str, name: str,
providers: dict[str, list[str]], providers: dict[str, list[str]],
container_image: str | None = None, container_image: str | None = None,
) -> StackRunConfig: ) -> dict:
provider_registry = get_provider_registry() provider_registry = get_provider_registry()
provider_configs = {} provider_configs = {}
for api_str, provider_types in providers.items(): for api_str, provider_types in providers.items():
if api_providers := self.provider_overrides.get(api_str): if api_providers := self.provider_overrides.get(api_str):
provider_configs[api_str] = api_providers # Convert Provider objects to dicts for YAML serialization
provider_configs[api_str] = [
p.model_dump(exclude_none=True) if isinstance(p, Provider) else p for p in api_providers
]
continue continue
provider_configs[api_str] = [] provider_configs[api_str] = []
@ -128,33 +133,40 @@ class RunConfigSettings(BaseModel):
provider_id=provider_id, provider_id=provider_id,
provider_type=provider_type, provider_type=provider_type,
config=config, config=config,
) ).model_dump(exclude_none=True)
) )
# Get unique set of APIs from providers # Get unique set of APIs from providers
apis = sorted(providers.keys()) apis = sorted(providers.keys())
return StackRunConfig( # Return a dict that matches StackRunConfig structure
image_name=name, return {
container_image=container_image, "version": LLAMA_STACK_RUN_CONFIG_VERSION,
apis=apis, "image_name": name,
providers=provider_configs, "container_image": container_image,
metadata_store=self.metadata_store "apis": apis,
"providers": provider_configs,
"metadata_store": self.metadata_store
or SqliteKVStoreConfig.sample_run_config( or SqliteKVStoreConfig.sample_run_config(
__distro_dir__=f"~/.llama/distributions/{name}", __distro_dir__=f"~/.llama/distributions/{name}",
db_name="registry.db", db_name="registry.db",
), ),
inference_store=self.inference_store "inference_store": self.inference_store
or SqliteSqlStoreConfig.sample_run_config( or SqliteSqlStoreConfig.sample_run_config(
__distro_dir__=f"~/.llama/distributions/{name}", __distro_dir__=f"~/.llama/distributions/{name}",
db_name="inference_store.db", db_name="inference_store.db",
), ),
models=self.default_models or [], "models": [m.model_dump(exclude_none=True) for m in (self.default_models or [])],
shields=self.default_shields or [], "shields": [s.model_dump(exclude_none=True) for s in (self.default_shields or [])],
tool_groups=self.default_tool_groups or [], "vector_dbs": [],
datasets=self.default_datasets or [], "datasets": [d.model_dump(exclude_none=True) for d in (self.default_datasets or [])],
benchmarks=self.default_benchmarks or [], "scoring_fns": [],
) "benchmarks": [b.model_dump(exclude_none=True) for b in (self.default_benchmarks or [])],
"tool_groups": [t.model_dump(exclude_none=True) for t in (self.default_tool_groups or [])],
"server": {
"port": 8321,
},
}
class DistributionTemplate(BaseModel): class DistributionTemplate(BaseModel):
@ -190,10 +202,12 @@ class DistributionTemplate(BaseModel):
# TODO: This is a hack to get the dependencies for internal APIs into build # TODO: This is a hack to get the dependencies for internal APIs into build
# We should have a better way to do this by formalizing the concept of "internal" APIs # We should have a better way to do this by formalizing the concept of "internal" APIs
# and providers, with a way to specify dependencies for them. # and providers, with a way to specify dependencies for them.
if run_config_.inference_store:
additional_pip_packages.extend(run_config_.inference_store.pip_packages) if run_config_.get("inference_store"):
if run_config_.metadata_store: additional_pip_packages.extend(get_sql_pip_packages(run_config_["inference_store"]))
additional_pip_packages.extend(run_config_.metadata_store.pip_packages)
if run_config_.get("metadata_store"):
additional_pip_packages.extend(get_kv_pip_packages(run_config_["metadata_store"]))
if self.additional_pip_packages: if self.additional_pip_packages:
additional_pip_packages.extend(self.additional_pip_packages) additional_pip_packages.extend(self.additional_pip_packages)
@ -286,7 +300,7 @@ class DistributionTemplate(BaseModel):
run_config = settings.run_config(self.name, self.providers, self.container_image) run_config = settings.run_config(self.name, self.providers, self.container_image)
with open(yaml_output_dir / yaml_pth, "w") as f: with open(yaml_output_dir / yaml_pth, "w") as f:
yaml.safe_dump( yaml.safe_dump(
run_config.model_dump(exclude_none=True), {k: v for k, v in run_config.items() if v is not None},
f, f,
sort_keys=False, sort_keys=False,
) )

View file

@ -26,7 +26,6 @@ providers:
config: config:
kvstore: kvstore:
type: sqlite type: sqlite
namespace: null
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/tgi}/faiss_store.db db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/tgi}/faiss_store.db
safety: safety:
- provider_id: llama-guard - provider_id: llama-guard
@ -39,7 +38,6 @@ providers:
config: config:
persistence_store: persistence_store:
type: sqlite type: sqlite
namespace: null
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/tgi}/agents_store.db db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/tgi}/agents_store.db
responses_store: responses_store:
type: sqlite type: sqlite
@ -57,7 +55,6 @@ providers:
config: config:
kvstore: kvstore:
type: sqlite type: sqlite
namespace: null
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/tgi}/meta_reference_eval.db db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/tgi}/meta_reference_eval.db
datasetio: datasetio:
- provider_id: huggingface - provider_id: huggingface
@ -65,14 +62,12 @@ providers:
config: config:
kvstore: kvstore:
type: sqlite type: sqlite
namespace: null
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/tgi}/huggingface_datasetio.db db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/tgi}/huggingface_datasetio.db
- provider_id: localfs - provider_id: localfs
provider_type: inline::localfs provider_type: inline::localfs
config: config:
kvstore: kvstore:
type: sqlite type: sqlite
namespace: null
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/tgi}/localfs_datasetio.db db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/tgi}/localfs_datasetio.db
scoring: scoring:
- provider_id: basic - provider_id: basic

View file

@ -25,7 +25,6 @@ providers:
config: config:
kvstore: kvstore:
type: sqlite type: sqlite
namespace: null
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/tgi}/faiss_store.db db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/tgi}/faiss_store.db
safety: safety:
- provider_id: llama-guard - provider_id: llama-guard
@ -38,7 +37,6 @@ providers:
config: config:
persistence_store: persistence_store:
type: sqlite type: sqlite
namespace: null
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/tgi}/agents_store.db db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/tgi}/agents_store.db
responses_store: responses_store:
type: sqlite type: sqlite
@ -56,7 +54,6 @@ providers:
config: config:
kvstore: kvstore:
type: sqlite type: sqlite
namespace: null
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/tgi}/meta_reference_eval.db db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/tgi}/meta_reference_eval.db
datasetio: datasetio:
- provider_id: huggingface - provider_id: huggingface
@ -64,14 +61,12 @@ providers:
config: config:
kvstore: kvstore:
type: sqlite type: sqlite
namespace: null
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/tgi}/huggingface_datasetio.db db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/tgi}/huggingface_datasetio.db
- provider_id: localfs - provider_id: localfs
provider_type: inline::localfs provider_type: inline::localfs
config: config:
kvstore: kvstore:
type: sqlite type: sqlite
namespace: null
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/tgi}/localfs_datasetio.db db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/tgi}/localfs_datasetio.db
scoring: scoring:
- provider_id: basic - provider_id: basic

View file

@ -26,7 +26,6 @@ providers:
config: config:
kvstore: kvstore:
type: sqlite type: sqlite
namespace: null
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/together}/faiss_store.db db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/together}/faiss_store.db
safety: safety:
- provider_id: llama-guard - provider_id: llama-guard
@ -44,7 +43,6 @@ providers:
config: config:
persistence_store: persistence_store:
type: sqlite type: sqlite
namespace: null
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/together}/agents_store.db db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/together}/agents_store.db
responses_store: responses_store:
type: sqlite type: sqlite
@ -62,7 +60,6 @@ providers:
config: config:
kvstore: kvstore:
type: sqlite type: sqlite
namespace: null
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/together}/meta_reference_eval.db db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/together}/meta_reference_eval.db
datasetio: datasetio:
- provider_id: huggingface - provider_id: huggingface
@ -70,14 +67,12 @@ providers:
config: config:
kvstore: kvstore:
type: sqlite type: sqlite
namespace: null
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/together}/huggingface_datasetio.db db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/together}/huggingface_datasetio.db
- provider_id: localfs - provider_id: localfs
provider_type: inline::localfs provider_type: inline::localfs
config: config:
kvstore: kvstore:
type: sqlite type: sqlite
namespace: null
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/together}/localfs_datasetio.db db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/together}/localfs_datasetio.db
scoring: scoring:
- provider_id: basic - provider_id: basic

View file

@ -26,7 +26,6 @@ providers:
config: config:
kvstore: kvstore:
type: sqlite type: sqlite
namespace: null
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/together}/faiss_store.db db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/together}/faiss_store.db
safety: safety:
- provider_id: llama-guard - provider_id: llama-guard
@ -39,7 +38,6 @@ providers:
config: config:
persistence_store: persistence_store:
type: sqlite type: sqlite
namespace: null
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/together}/agents_store.db db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/together}/agents_store.db
responses_store: responses_store:
type: sqlite type: sqlite
@ -57,7 +55,6 @@ providers:
config: config:
kvstore: kvstore:
type: sqlite type: sqlite
namespace: null
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/together}/meta_reference_eval.db db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/together}/meta_reference_eval.db
datasetio: datasetio:
- provider_id: huggingface - provider_id: huggingface
@ -65,14 +62,12 @@ providers:
config: config:
kvstore: kvstore:
type: sqlite type: sqlite
namespace: null
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/together}/huggingface_datasetio.db db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/together}/huggingface_datasetio.db
- provider_id: localfs - provider_id: localfs
provider_type: inline::localfs provider_type: inline::localfs
config: config:
kvstore: kvstore:
type: sqlite type: sqlite
namespace: null
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/together}/localfs_datasetio.db db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/together}/localfs_datasetio.db
scoring: scoring:
- provider_id: basic - provider_id: basic

View file

@ -30,7 +30,6 @@ providers:
config: config:
kvstore: kvstore:
type: sqlite type: sqlite
namespace: null
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/vllm-gpu}/faiss_store.db db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/vllm-gpu}/faiss_store.db
safety: safety:
- provider_id: llama-guard - provider_id: llama-guard
@ -43,7 +42,6 @@ providers:
config: config:
persistence_store: persistence_store:
type: sqlite type: sqlite
namespace: null
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/vllm-gpu}/agents_store.db db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/vllm-gpu}/agents_store.db
responses_store: responses_store:
type: sqlite type: sqlite
@ -61,7 +59,6 @@ providers:
config: config:
kvstore: kvstore:
type: sqlite type: sqlite
namespace: null
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/vllm-gpu}/meta_reference_eval.db db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/vllm-gpu}/meta_reference_eval.db
datasetio: datasetio:
- provider_id: huggingface - provider_id: huggingface
@ -69,14 +66,12 @@ providers:
config: config:
kvstore: kvstore:
type: sqlite type: sqlite
namespace: null
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/vllm-gpu}/huggingface_datasetio.db db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/vllm-gpu}/huggingface_datasetio.db
- provider_id: localfs - provider_id: localfs
provider_type: inline::localfs provider_type: inline::localfs
config: config:
kvstore: kvstore:
type: sqlite type: sqlite
namespace: null
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/vllm-gpu}/localfs_datasetio.db db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/vllm-gpu}/localfs_datasetio.db
scoring: scoring:
- provider_id: basic - provider_id: basic

View file

@ -27,7 +27,6 @@ providers:
config: config:
kvstore: kvstore:
type: sqlite type: sqlite
namespace: null
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/watsonx}/faiss_store.db db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/watsonx}/faiss_store.db
safety: safety:
- provider_id: llama-guard - provider_id: llama-guard
@ -40,7 +39,6 @@ providers:
config: config:
persistence_store: persistence_store:
type: sqlite type: sqlite
namespace: null
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/watsonx}/agents_store.db db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/watsonx}/agents_store.db
responses_store: responses_store:
type: sqlite type: sqlite
@ -58,7 +56,6 @@ providers:
config: config:
kvstore: kvstore:
type: sqlite type: sqlite
namespace: null
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/watsonx}/meta_reference_eval.db db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/watsonx}/meta_reference_eval.db
datasetio: datasetio:
- provider_id: huggingface - provider_id: huggingface
@ -66,14 +63,12 @@ providers:
config: config:
kvstore: kvstore:
type: sqlite type: sqlite
namespace: null
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/watsonx}/huggingface_datasetio.db db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/watsonx}/huggingface_datasetio.db
- provider_id: localfs - provider_id: localfs
provider_type: inline::localfs provider_type: inline::localfs
config: config:
kvstore: kvstore:
type: sqlite type: sqlite
namespace: null
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/watsonx}/localfs_datasetio.db db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/watsonx}/localfs_datasetio.db
scoring: scoring:
- provider_id: basic - provider_id: basic

View file

@ -9,7 +9,9 @@ pytest --help
``` ```
Here are the most important options: Here are the most important options:
- `--stack-config`: specify the stack config to use. You have three ways to point to a stack: - `--stack-config`: specify the stack config to use. You have four ways to point to a stack:
- **`server:<config>`** - automatically start a server with the given config (e.g., `server:fireworks`). This provides one-step testing by auto-starting the server if the port is available, or reusing an existing server if already running.
- **`server:<config>:<port>`** - same as above but with a custom port (e.g., `server:together:8322`)
- a URL which points to a Llama Stack distribution server - a URL which points to a Llama Stack distribution server
- a template (e.g., `fireworks`, `together`) or a path to a `run.yaml` file - a template (e.g., `fireworks`, `together`) or a path to a `run.yaml` file
- a comma-separated list of api=provider pairs, e.g. `inference=fireworks,safety=llama-guard,agents=meta-reference`. This is most useful for testing a single API surface. - a comma-separated list of api=provider pairs, e.g. `inference=fireworks,safety=llama-guard,agents=meta-reference`. This is most useful for testing a single API surface.
@ -26,12 +28,39 @@ Model parameters can be influenced by the following options:
Each of these are comma-separated lists and can be used to generate multiple parameter combinations. Note that tests will be skipped Each of these are comma-separated lists and can be used to generate multiple parameter combinations. Note that tests will be skipped
if no model is specified. if no model is specified.
Experimental, under development, options:
- `--record-responses`: record new API responses instead of using cached ones
## Examples ## Examples
### Testing against a Server
Run all text inference tests by auto-starting a server with the `fireworks` config:
```bash
pytest -s -v tests/integration/inference/test_text_inference.py \
--stack-config=server:fireworks \
--text-model=meta-llama/Llama-3.1-8B-Instruct
```
Run tests with auto-server startup on a custom port:
```bash
pytest -s -v tests/integration/inference/ \
--stack-config=server:together:8322 \
--text-model=meta-llama/Llama-3.1-8B-Instruct
```
Run multiple test suites with auto-server (eliminates manual server management):
```bash
# Auto-start server and run all integration tests
export FIREWORKS_API_KEY=<your_key>
pytest -s -v tests/integration/inference/ tests/integration/safety/ tests/integration/agents/ \
--stack-config=server:fireworks \
--text-model=meta-llama/Llama-3.1-8B-Instruct
```
### Testing with Library Client
Run all text inference tests with the `together` distribution: Run all text inference tests with the `together` distribution:
```bash ```bash

View file

@ -6,9 +6,13 @@
import inspect import inspect
import os import os
import socket
import subprocess
import tempfile import tempfile
import time
import pytest import pytest
import requests
import yaml import yaml
from llama_stack_client import LlamaStackClient from llama_stack_client import LlamaStackClient
from openai import OpenAI from openai import OpenAI
@ -17,6 +21,60 @@ from llama_stack import LlamaStackAsLibraryClient
from llama_stack.distribution.stack import run_config_from_adhoc_config_spec from llama_stack.distribution.stack import run_config_from_adhoc_config_spec
from llama_stack.env import get_env_or_fail from llama_stack.env import get_env_or_fail
DEFAULT_PORT = 8321
def is_port_available(port: int, host: str = "localhost") -> bool:
"""Check if a port is available for binding."""
try:
with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as sock:
sock.bind((host, port))
return True
except OSError:
return False
def start_llama_stack_server(config_name: str) -> subprocess.Popen:
"""Start a llama stack server with the given config."""
cmd = ["llama", "stack", "run", config_name]
devnull = open(os.devnull, "w")
process = subprocess.Popen(
cmd,
stdout=devnull, # redirect stdout to devnull to prevent deadlock
stderr=devnull, # redirect stderr to devnull to prevent deadlock
text=True,
env={**os.environ, "LLAMA_STACK_LOG_FILE": "server.log"},
)
return process
def wait_for_server_ready(base_url: str, timeout: int = 30, process: subprocess.Popen | None = None) -> bool:
"""Wait for the server to be ready by polling the health endpoint."""
health_url = f"{base_url}/v1/health"
start_time = time.time()
while time.time() - start_time < timeout:
if process and process.poll() is not None:
print(f"Server process terminated with return code: {process.returncode}")
return False
try:
response = requests.get(health_url, timeout=5)
if response.status_code == 200:
return True
except (requests.exceptions.ConnectionError, requests.exceptions.Timeout):
pass
# Print progress every 5 seconds
elapsed = time.time() - start_time
if int(elapsed) % 5 == 0 and elapsed > 0:
print(f"Waiting for server at {base_url}... ({elapsed:.1f}s elapsed)")
time.sleep(0.5)
print(f"Server failed to respond within {timeout} seconds")
return False
@pytest.fixture(scope="session") @pytest.fixture(scope="session")
def provider_data(): def provider_data():
@ -122,6 +180,41 @@ def llama_stack_client(request, provider_data):
if not config: if not config:
raise ValueError("You must specify either --stack-config or LLAMA_STACK_CONFIG") raise ValueError("You must specify either --stack-config or LLAMA_STACK_CONFIG")
# Handle server:<config_name> format or server:<config_name>:<port>
if config.startswith("server:"):
parts = config.split(":")
config_name = parts[1]
port = int(parts[2]) if len(parts) > 2 else int(os.environ.get("LLAMA_STACK_PORT", DEFAULT_PORT))
base_url = f"http://localhost:{port}"
# Check if port is available
if is_port_available(port):
print(f"Starting llama stack server with config '{config_name}' on port {port}...")
# Start server
server_process = start_llama_stack_server(config_name)
# Wait for server to be ready
if not wait_for_server_ready(base_url, timeout=30, process=server_process):
print("Server failed to start within timeout")
server_process.terminate()
raise RuntimeError(
f"Server failed to start within timeout. Check that config '{config_name}' exists and is valid. "
f"See server.log for details."
)
print(f"Server is ready at {base_url}")
# Store process for potential cleanup (pytest will handle termination at session end)
request.session._llama_stack_server_process = server_process
else:
print(f"Port {port} is already in use, assuming server is already running...")
return LlamaStackClient(
base_url=base_url,
provider_data=provider_data,
)
# check if this looks like a URL # check if this looks like a URL
if config.startswith("http") or "//" in config: if config.startswith("http") or "//" in config:
return LlamaStackClient( return LlamaStackClient(
@ -151,3 +244,31 @@ def llama_stack_client(request, provider_data):
def openai_client(client_with_models): def openai_client(client_with_models):
base_url = f"{client_with_models.base_url}/v1/openai/v1" base_url = f"{client_with_models.base_url}/v1/openai/v1"
return OpenAI(base_url=base_url, api_key="fake") return OpenAI(base_url=base_url, api_key="fake")
@pytest.fixture(scope="session", autouse=True)
def cleanup_server_process(request):
"""Cleanup server process at the end of the test session."""
yield # Run tests
if hasattr(request.session, "_llama_stack_server_process"):
server_process = request.session._llama_stack_server_process
if server_process:
if server_process.poll() is None:
print("Terminating llama stack server process...")
else:
print(f"Server process already terminated with return code: {server_process.returncode}")
return
try:
server_process.terminate()
server_process.wait(timeout=10)
print("Server process terminated gracefully")
except subprocess.TimeoutExpired:
print("Server process did not terminate gracefully, killing it")
server_process.kill()
server_process.wait()
print("Server process killed")
except Exception as e:
print(f"Error during server cleanup: {e}")
else:
print("Server process not found - won't be able to cleanup")

View file

@ -4,14 +4,14 @@
# This source code is licensed under the terms described in the LICENSE file in # This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree. # the root directory of this source tree.
import pytest import pytest_asyncio
from llama_stack.distribution.store.registry import CachedDiskDistributionRegistry, DiskDistributionRegistry from llama_stack.distribution.store.registry import CachedDiskDistributionRegistry, DiskDistributionRegistry
from llama_stack.providers.utils.kvstore.config import SqliteKVStoreConfig from llama_stack.providers.utils.kvstore.config import SqliteKVStoreConfig
from llama_stack.providers.utils.kvstore.sqlite import SqliteKVStoreImpl from llama_stack.providers.utils.kvstore.sqlite import SqliteKVStoreImpl
@pytest.fixture(scope="function") @pytest_asyncio.fixture(scope="function")
async def sqlite_kvstore(tmp_path): async def sqlite_kvstore(tmp_path):
db_path = tmp_path / "test_kv.db" db_path = tmp_path / "test_kv.db"
kvstore_config = SqliteKVStoreConfig(db_path=db_path.as_posix()) kvstore_config = SqliteKVStoreConfig(db_path=db_path.as_posix())
@ -20,14 +20,14 @@ async def sqlite_kvstore(tmp_path):
yield kvstore yield kvstore
@pytest.fixture(scope="function") @pytest_asyncio.fixture(scope="function")
async def disk_dist_registry(sqlite_kvstore): async def disk_dist_registry(sqlite_kvstore):
registry = DiskDistributionRegistry(sqlite_kvstore) registry = DiskDistributionRegistry(sqlite_kvstore)
await registry.initialize() await registry.initialize()
yield registry yield registry
@pytest.fixture(scope="function") @pytest_asyncio.fixture(scope="function")
async def cached_disk_dist_registry(sqlite_kvstore): async def cached_disk_dist_registry(sqlite_kvstore):
registry = CachedDiskDistributionRegistry(sqlite_kvstore) registry = CachedDiskDistributionRegistry(sqlite_kvstore)
await registry.initialize() await registry.initialize()

View file

@ -9,6 +9,7 @@ from datetime import datetime
from unittest.mock import patch from unittest.mock import patch
import pytest import pytest
import pytest_asyncio
from llama_stack.apis.agents import Turn from llama_stack.apis.agents import Turn
from llama_stack.apis.inference import CompletionMessage, StopReason from llama_stack.apis.inference import CompletionMessage, StopReason
@ -16,7 +17,7 @@ from llama_stack.distribution.datatypes import User
from llama_stack.providers.inline.agents.meta_reference.persistence import AgentPersistence, AgentSessionInfo from llama_stack.providers.inline.agents.meta_reference.persistence import AgentPersistence, AgentSessionInfo
@pytest.fixture @pytest_asyncio.fixture
async def test_setup(sqlite_kvstore): async def test_setup(sqlite_kvstore):
agent_persistence = AgentPersistence(agent_id="test_agent", kvstore=sqlite_kvstore, policy={}) agent_persistence = AgentPersistence(agent_id="test_agent", kvstore=sqlite_kvstore, policy={})
yield agent_persistence yield agent_persistence

View file

@ -148,7 +148,7 @@ async def test_chunk_id_conflict(sqlite_vec_index, sample_chunks, embedding_dime
assert len(chunk_ids) == len(set(chunk_ids)), "Duplicate chunk IDs detected across batches!" assert len(chunk_ids) == len(set(chunk_ids)), "Duplicate chunk IDs detected across batches!"
@pytest.fixture(scope="session") @pytest_asyncio.fixture(scope="session")
async def sqlite_vec_adapter(sqlite_connection): async def sqlite_vec_adapter(sqlite_connection):
config = type("Config", (object,), {"db_path": ":memory:"}) # Mock config with in-memory database config = type("Config", (object,), {"db_path": ":memory:"}) # Mock config with in-memory database
adapter = SQLiteVecVectorIOAdapter(config=config, inference_api=None) adapter = SQLiteVecVectorIOAdapter(config=config, inference_api=None)

View file

@ -7,6 +7,7 @@
from unittest.mock import MagicMock, Mock, patch from unittest.mock import MagicMock, Mock, patch
import pytest import pytest
import pytest_asyncio
import yaml import yaml
from pydantic import TypeAdapter, ValidationError from pydantic import TypeAdapter, ValidationError
@ -26,7 +27,7 @@ def _return_model(model):
return model return model
@pytest.fixture @pytest_asyncio.fixture
async def test_setup(cached_disk_dist_registry): async def test_setup(cached_disk_dist_registry):
mock_inference = Mock() mock_inference = Mock()
mock_inference.__provider_spec__ = MagicMock() mock_inference.__provider_spec__ = MagicMock()
@ -245,7 +246,7 @@ async def test_automatic_access_attributes(mock_get_authenticated_user, test_set
assert model.identifier == "auto-access-model" assert model.identifier == "auto-access-model"
@pytest.fixture @pytest_asyncio.fixture
async def test_setup_with_access_policy(cached_disk_dist_registry): async def test_setup_with_access_policy(cached_disk_dist_registry):
mock_inference = Mock() mock_inference = Mock()
mock_inference.__provider_spec__ = MagicMock() mock_inference.__provider_spec__ = MagicMock()

View file

@ -0,0 +1,187 @@
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.
from unittest.mock import Mock
from fastapi import HTTPException
from openai import BadRequestError
from pydantic import ValidationError
from llama_stack.distribution.access_control.access_control import AccessDeniedError
from llama_stack.distribution.datatypes import AuthenticationRequiredError
from llama_stack.distribution.server.server import translate_exception
class TestTranslateException:
"""Test cases for the translate_exception function."""
def test_translate_access_denied_error(self):
"""Test that AccessDeniedError is translated to 403 HTTP status."""
exc = AccessDeniedError()
result = translate_exception(exc)
assert isinstance(result, HTTPException)
assert result.status_code == 403
assert result.detail == "Permission denied: Insufficient permissions"
def test_translate_access_denied_error_with_context(self):
"""Test that AccessDeniedError with context includes detailed information."""
from llama_stack.distribution.datatypes import User
# Create mock user and resource
user = User("test-user", {"roles": ["user"], "teams": ["dev"]})
# Create a simple mock object that implements the ProtectedResource protocol
class MockResource:
def __init__(self, type: str, identifier: str, owner=None):
self.type = type
self.identifier = identifier
self.owner = owner
resource = MockResource("vector_db", "test-db")
exc = AccessDeniedError("create", resource, user)
result = translate_exception(exc)
assert isinstance(result, HTTPException)
assert result.status_code == 403
assert "test-user" in result.detail
assert "vector_db::test-db" in result.detail
assert "create" in result.detail
assert "roles=['user']" in result.detail
assert "teams=['dev']" in result.detail
def test_translate_permission_error(self):
"""Test that PermissionError is translated to 403 HTTP status."""
exc = PermissionError("Permission denied")
result = translate_exception(exc)
assert isinstance(result, HTTPException)
assert result.status_code == 403
assert result.detail == "Permission denied: Permission denied"
def test_translate_value_error(self):
"""Test that ValueError is translated to 400 HTTP status."""
exc = ValueError("Invalid input")
result = translate_exception(exc)
assert isinstance(result, HTTPException)
assert result.status_code == 400
assert result.detail == "Invalid value: Invalid input"
def test_translate_bad_request_error(self):
"""Test that BadRequestError is translated to 400 HTTP status."""
# Create a mock response for BadRequestError
mock_response = Mock()
mock_response.status_code = 400
mock_response.headers = {}
exc = BadRequestError("Bad request", response=mock_response, body="Bad request")
result = translate_exception(exc)
assert isinstance(result, HTTPException)
assert result.status_code == 400
assert result.detail == "Bad request"
def test_translate_authentication_required_error(self):
"""Test that AuthenticationRequiredError is translated to 401 HTTP status."""
exc = AuthenticationRequiredError("Authentication required")
result = translate_exception(exc)
assert isinstance(result, HTTPException)
assert result.status_code == 401
assert result.detail == "Authentication required: Authentication required"
def test_translate_timeout_error(self):
"""Test that TimeoutError is translated to 504 HTTP status."""
exc = TimeoutError("Operation timed out")
result = translate_exception(exc)
assert isinstance(result, HTTPException)
assert result.status_code == 504
assert result.detail == "Operation timed out: Operation timed out"
def test_translate_asyncio_timeout_error(self):
"""Test that asyncio.TimeoutError is translated to 504 HTTP status."""
exc = TimeoutError()
result = translate_exception(exc)
assert isinstance(result, HTTPException)
assert result.status_code == 504
assert result.detail == "Operation timed out: "
def test_translate_not_implemented_error(self):
"""Test that NotImplementedError is translated to 501 HTTP status."""
exc = NotImplementedError("Not implemented")
result = translate_exception(exc)
assert isinstance(result, HTTPException)
assert result.status_code == 501
assert result.detail == "Not implemented: Not implemented"
def test_translate_validation_error(self):
"""Test that ValidationError is translated to 400 HTTP status with proper format."""
# Create a mock validation error using proper Pydantic error format
exc = ValidationError.from_exception_data(
"TestModel",
[
{
"loc": ("field", "nested"),
"msg": "field required",
"type": "missing",
}
],
)
result = translate_exception(exc)
assert isinstance(result, HTTPException)
assert result.status_code == 400
assert "errors" in result.detail
assert len(result.detail["errors"]) == 1
assert result.detail["errors"][0]["loc"] == ["field", "nested"]
assert result.detail["errors"][0]["msg"] == "Field required"
assert result.detail["errors"][0]["type"] == "missing"
def test_translate_generic_exception(self):
"""Test that generic exceptions are translated to 500 HTTP status."""
exc = Exception("Unexpected error")
result = translate_exception(exc)
assert isinstance(result, HTTPException)
assert result.status_code == 500
assert result.detail == "Internal server error: An unexpected error occurred."
def test_translate_runtime_error(self):
"""Test that RuntimeError is translated to 500 HTTP status."""
exc = RuntimeError("Runtime error")
result = translate_exception(exc)
assert isinstance(result, HTTPException)
assert result.status_code == 500
assert result.detail == "Internal server error: An unexpected error occurred."
def test_multiple_access_denied_scenarios(self):
"""Test various scenarios that should result in 403 status codes."""
# Test AccessDeniedError (uses enhanced message)
exc1 = AccessDeniedError()
result1 = translate_exception(exc1)
assert isinstance(result1, HTTPException)
assert result1.status_code == 403
assert result1.detail == "Permission denied: Insufficient permissions"
# Test PermissionError (uses generic message)
exc2 = PermissionError("No permission")
result2 = translate_exception(exc2)
assert isinstance(result2, HTTPException)
assert result2.status_code == 403
assert result2.detail == "Permission denied: No permission"
exc3 = PermissionError("Access denied")
result3 = translate_exception(exc3)
assert isinstance(result3, HTTPException)
assert result3.status_code == 403
assert result3.detail == "Permission denied: Access denied"