diff --git a/.github/workflows/integration-tests.yml b/.github/workflows/integration-tests.yml index 32e221128..0dc7a9889 100644 --- a/.github/workflows/integration-tests.yml +++ b/.github/workflows/integration-tests.yml @@ -25,7 +25,7 @@ jobs: # Listing tests manually since some of them currently fail # TODO: generate matrix list from tests/integration when fixed test-type: [agents, inference, datasets, inspect, scoring, post_training, providers, tool_runtime, vector_io] - client-type: [library, http] + client-type: [library, server] python-version: ["3.12", "3.13"] fail-fast: false # we want to run all tests regardless of failure @@ -45,39 +45,6 @@ jobs: run: | uv run llama stack build --template ollama --image-type venv - - name: Start Llama Stack server in background - if: matrix.client-type == 'http' - env: - INFERENCE_MODEL: "meta-llama/Llama-3.2-3B-Instruct" - run: | - LLAMA_STACK_LOG_FILE=server.log nohup uv run llama stack run ./llama_stack/templates/ollama/run.yaml --image-type venv --env OLLAMA_URL="http://0.0.0.0:11434" & - - - name: Wait for Llama Stack server to be ready - if: matrix.client-type == 'http' - run: | - echo "Waiting for Llama Stack server..." - for i in {1..30}; do - if curl -s http://localhost:8321/v1/health | grep -q "OK"; then - echo "Llama Stack server is up!" - exit 0 - fi - sleep 1 - done - echo "Llama Stack server failed to start" - cat server.log - exit 1 - - - name: Verify Ollama status is OK - if: matrix.client-type == 'http' - run: | - echo "Verifying Ollama status..." - ollama_status=$(curl -s -L http://127.0.0.1:8321/v1/providers/ollama|jq --raw-output .health.status) - echo "Ollama status: $ollama_status" - if [ "$ollama_status" != "OK" ]; then - echo "Ollama health check failed" - exit 1 - fi - - name: Check Storage and Memory Available Before Tests if: ${{ always() }} run: | @@ -92,12 +59,14 @@ jobs: if [ "${{ matrix.client-type }}" == "library" ]; then stack_config="ollama" else - stack_config="http://localhost:8321" + stack_config="server:ollama" fi uv run pytest -s -v tests/integration/${{ matrix.test-type }} --stack-config=${stack_config} \ -k "not(builtin_tool or safety_with_image or code_interpreter or test_rag)" \ --text-model="meta-llama/Llama-3.2-3B-Instruct" \ - --embedding-model=all-MiniLM-L6-v2 + --embedding-model=all-MiniLM-L6-v2 \ + --color=yes \ + --capture=tee-sys | tee pytest-${{ matrix.test-type }}.log - name: Check Storage and Memory Available After Tests if: ${{ always() }} diff --git a/README.md b/README.md index 7f34c3340..3b5358ec2 100644 --- a/README.md +++ b/README.md @@ -35,6 +35,8 @@ pip install llama-stack-client ### CLI ```bash # Run a chat completion +MODEL="Llama-4-Scout-17B-16E-Instruct" + llama-stack-client --endpoint http://localhost:8321 \ inference chat-completion \ --model-id meta-llama/$MODEL \ @@ -106,46 +108,59 @@ By reducing friction and complexity, Llama Stack empowers developers to focus on ### API Providers Here is a list of the various API providers and available distributions that can help developers get started easily with Llama Stack. +Please checkout for [full list](https://llama-stack.readthedocs.io/en/latest/providers/index.html) -| **API Provider Builder** | **Environments** | **Agents** | **Inference** | **Memory** | **Safety** | **Telemetry** | **Post Training** | -|:------------------------:|:----------------------:|:----------:|:-------------:|:----------:|:----------:|:-------------:|:-----------------:| -| Meta Reference | Single Node | ✅ | ✅ | ✅ | ✅ | ✅ | | -| SambaNova | Hosted | | ✅ | | ✅ | | | -| Cerebras | Hosted | | ✅ | | | | | -| Fireworks | Hosted | ✅ | ✅ | ✅ | | | | -| AWS Bedrock | Hosted | | ✅ | | ✅ | | | -| Together | Hosted | ✅ | ✅ | | ✅ | | | -| Groq | Hosted | | ✅ | | | | | -| Ollama | Single Node | | ✅ | | | | | -| TGI | Hosted and Single Node | | ✅ | | | | | -| NVIDIA NIM | Hosted and Single Node | | ✅ | | | | | -| Chroma | Single Node | | | ✅ | | | | -| PG Vector | Single Node | | | ✅ | | | | -| PyTorch ExecuTorch | On-device iOS | ✅ | ✅ | | | | | -| vLLM | Hosted and Single Node | | ✅ | | | | | -| OpenAI | Hosted | | ✅ | | | | | -| Anthropic | Hosted | | ✅ | | | | | -| Gemini | Hosted | | ✅ | | | | | -| watsonx | Hosted | | ✅ | | | | | -| HuggingFace | Single Node | | | | | | ✅ | -| TorchTune | Single Node | | | | | | ✅ | -| NVIDIA NEMO | Hosted | | | | | | ✅ | +| API Provider Builder | Environments | Agents | Inference | VectorIO | Safety | Telemetry | Post Training | Eval | DatasetIO | +|:-------------------:|:------------:|:------:|:---------:|:--------:|:------:|:---------:|:-------------:|:----:|:--------:| +| Meta Reference | Single Node | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | +| SambaNova | Hosted | | ✅ | | ✅ | | | | | +| Cerebras | Hosted | | ✅ | | | | | | | +| Fireworks | Hosted | ✅ | ✅ | ✅ | | | | | | +| AWS Bedrock | Hosted | | ✅ | | ✅ | | | | | +| Together | Hosted | ✅ | ✅ | | ✅ | | | | | +| Groq | Hosted | | ✅ | | | | | | | +| Ollama | Single Node | | ✅ | | | | | | | +| TGI | Hosted/Single Node | | ✅ | | | | | | | +| NVIDIA NIM | Hosted/Single Node | | ✅ | | ✅ | | | | | +| ChromaDB | Hosted/Single Node | | | ✅ | | | | | | +| PG Vector | Single Node | | | ✅ | | | | | | +| PyTorch ExecuTorch | On-device iOS | ✅ | ✅ | | | | | | | +| vLLM | Single Node | | ✅ | | | | | | | +| OpenAI | Hosted | | ✅ | | | | | | | +| Anthropic | Hosted | | ✅ | | | | | | | +| Gemini | Hosted | | ✅ | | | | | | | +| WatsonX | Hosted | | ✅ | | | | | | | +| HuggingFace | Single Node | | | | | | ✅ | | ✅ | +| TorchTune | Single Node | | | | | | ✅ | | | +| NVIDIA NEMO | Hosted | | ✅ | ✅ | | | ✅ | ✅ | ✅ | +| NVIDIA | Hosted | | | | | | ✅ | ✅ | ✅ | +> **Note**: Additional providers are available through external packages. See [External Providers](https://llama-stack.readthedocs.io/en/latest/providers/external.html) documentation. ### Distributions -A Llama Stack Distribution (or "distro") is a pre-configured bundle of provider implementations for each API component. Distributions make it easy to get started with a specific deployment scenario - you can begin with a local development setup (eg. ollama) and seamlessly transition to production (eg. Fireworks) without changing your application code. Here are some of the distributions we support: +A Llama Stack Distribution (or "distro") is a pre-configured bundle of provider implementations for each API component. Distributions make it easy to get started with a specific deployment scenario - you can begin with a local development setup (eg. ollama) and seamlessly transition to production (eg. Fireworks) without changing your application code. +Here are some of the distributions we support: | **Distribution** | **Llama Stack Docker** | Start This Distribution | |:---------------------------------------------:|:-------------------------------------------------------------------------------------------------------------------------------------------------------------:|:------------------------------------------------------------------------------------------------------------------------:| | Meta Reference | [llamastack/distribution-meta-reference-gpu](https://hub.docker.com/repository/docker/llamastack/distribution-meta-reference-gpu/general) | [Guide](https://llama-stack.readthedocs.io/en/latest/distributions/self_hosted_distro/meta-reference-gpu.html) | -| SambaNova | [llamastack/distribution-sambanova](https://hub.docker.com/repository/docker/llamastack/distribution-sambanova/general) | [Guide](https://llama-stack.readthedocs.io/en/latest/distributions/self_hosted_distro/sambanova.html) | -| Cerebras | [llamastack/distribution-cerebras](https://hub.docker.com/repository/docker/llamastack/distribution-cerebras/general) | [Guide](https://llama-stack.readthedocs.io/en/latest/distributions/self_hosted_distro/cerebras.html) | +| TGI | [llamastack/distribution-tgi](https://hub.docker.com/repository/docker/llamastack/distribution-tgi/general) | [Guide](https://llama-stack.readthedocs.io/en/latest/distributions/self_hosted_distro/tgi.html) +| vLLM | [llamastack/distribution-remote-vllm](https://hub.docker.com/repository/docker/llamastack/distribution-remote-vllm/general) | [Guide](https://llama-stack.readthedocs.io/en/latest/distributions/self_hosted_distro/remote-vllm.html) +| Starter | [llamastack/distribution-starter](https://hub.docker.com/repository/docker/llamastack/distribution-starter/general) | | +| PostgreSQL | [llamastack/distribution-postgres-demo](https://hub.docker.com/repository/docker/llamastack/distribution-postgres-demo/general) | | + + +Here are the ones out of support scope but still avaiable from Dockerhub: + +| **Distribution** | **Llama Stack Docker** | Start This Distribution | +|:---------------------------------------------:|:-------------------------------------------------------------------------------------------------------------------------------------------------------------:|:------------------------------------------------------------------------------------------------------------------------:| | Ollama | [llamastack/distribution-ollama](https://hub.docker.com/repository/docker/llamastack/distribution-ollama/general) | [Guide](https://llama-stack.readthedocs.io/en/latest/distributions/self_hosted_distro/ollama.html) | -| TGI | [llamastack/distribution-tgi](https://hub.docker.com/repository/docker/llamastack/distribution-tgi/general) | [Guide](https://llama-stack.readthedocs.io/en/latest/distributions/self_hosted_distro/tgi.html) | | Together | [llamastack/distribution-together](https://hub.docker.com/repository/docker/llamastack/distribution-together/general) | [Guide](https://llama-stack.readthedocs.io/en/latest/distributions/self_hosted_distro/together.html) | | Fireworks | [llamastack/distribution-fireworks](https://hub.docker.com/repository/docker/llamastack/distribution-fireworks/general) | [Guide](https://llama-stack.readthedocs.io/en/latest/distributions/self_hosted_distro/fireworks.html) | -| vLLM | [llamastack/distribution-remote-vllm](https://hub.docker.com/repository/docker/llamastack/distribution-remote-vllm/general) | [Guide](https://llama-stack.readthedocs.io/en/latest/distributions/self_hosted_distro/remote-vllm.html) | +| AWS Bedrock | [llamastack/distribution-bedrock](https://hub.docker.com/repository/docker/llamastack/distribution-bedrock/general) | [Guide](https://llama-stack.readthedocs.io/en/latest/distributions/self_hosted_distro/bedrock.html) | +| SambaNova | [llamastack/distribution-sambanova](https://hub.docker.com/repository/docker/llamastack/distribution-sambanova/general) | [Guide](https://llama-stack.readthedocs.io/en/latest/distributions/self_hosted_distro/sambanova.html) | +| Cerebras | [llamastack/distribution-cerebras](https://hub.docker.com/repository/docker/llamastack/distribution-cerebras/general) | [Guide](https://llama-stack.readthedocs.io/en/latest/distributions/self_hosted_distro/cerebras.html) | | | | ### Documentation diff --git a/docs/getting_started.ipynb b/docs/getting_started.ipynb index cdaf074b8..88878c9be 100644 --- a/docs/getting_started.ipynb +++ b/docs/getting_started.ipynb @@ -17,7 +17,9 @@ "\n", "Read more about the project here: https://llama-stack.readthedocs.io/en/latest/index.html\n", "\n", - "In this guide, we will showcase how you can build LLM-powered agentic applications using Llama Stack.\n" + "In this guide, we will showcase how you can build LLM-powered agentic applications using Llama Stack.\n", + "\n", + "**💡 Quick Start Option:** If you want a simpler and faster way to test out Llama Stack, check out the [quick_start.ipynb](quick_start.ipynb) notebook instead. It provides a streamlined experience for getting up and running in just a few steps.\n" ] }, { diff --git a/docs/getting_started_llama4.ipynb b/docs/getting_started_llama4.ipynb index d489b5d06..edefda28c 100644 --- a/docs/getting_started_llama4.ipynb +++ b/docs/getting_started_llama4.ipynb @@ -17,7 +17,9 @@ "\n", "Read more about the project here: https://llama-stack.readthedocs.io/en/latest/index.html\n", "\n", - "In this guide, we will showcase how you can get started with using Llama 4 in Llama Stack.\n" + "In this guide, we will showcase how you can get started with using Llama 4 in Llama Stack.\n", + "\n", + "**💡 Quick Start Option:** If you want a simpler and faster way to test out Llama Stack, check out the [quick_start.ipynb](quick_start.ipynb) notebook instead. It provides a streamlined experience for getting up and running in just a few steps.\n" ] }, { diff --git a/docs/getting_started_llama_api.ipynb b/docs/getting_started_llama_api.ipynb index 128e9114a..e6c74986b 100644 --- a/docs/getting_started_llama_api.ipynb +++ b/docs/getting_started_llama_api.ipynb @@ -17,7 +17,9 @@ "\n", "Read more about the project here: https://llama-stack.readthedocs.io/en/latest/index.html\n", "\n", - "In this guide, we will showcase how you can get started with using Llama 4 in Llama Stack.\n" + "In this guide, we will showcase how you can get started with using Llama 4 in Llama Stack.\n", + "\n", + "**💡 Quick Start Option:** If you want a simpler and faster way to test out Llama Stack, check out the [quick_start.ipynb](quick_start.ipynb) notebook instead. It provides a streamlined experience for getting up and running in just a few steps.\n" ] }, { diff --git a/docs/quick_start.ipynb b/docs/quick_start.ipynb new file mode 100644 index 000000000..4ae1dbe8d --- /dev/null +++ b/docs/quick_start.ipynb @@ -0,0 +1,367 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "c1e7571c", + "metadata": { + "id": "c1e7571c" + }, + "source": [ + "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/meta-llama/llama-stack/blob/main/docs/getting_started.ipynb)\n", + "\n", + "# Llama Stack - Building AI Applications\n", + "\n", + "\"drawing\"\n", + "\n", + "Get started with Llama Stack in minutes!\n", + "\n", + "[Llama Stack](https://github.com/meta-llama/llama-stack) is a stateful service with REST APIs to support the seamless transition of AI applications across different environments. You can build and test using a local server first and deploy to a hosted endpoint for production.\n", + "\n", + "In this guide, we'll walk through how to build a RAG application locally using Llama Stack with [Ollama](https://ollama.com/)\n", + "as the inference [provider](docs/source/providers/index.md#inference) for a Llama Model.\n" + ] + }, + { + "cell_type": "markdown", + "id": "4CV1Q19BDMVw", + "metadata": { + "id": "4CV1Q19BDMVw" + }, + "source": [ + "## Step 1: Install and setup" + ] + }, + { + "cell_type": "markdown", + "id": "K4AvfUAJZOeS", + "metadata": { + "id": "K4AvfUAJZOeS" + }, + "source": [ + "### 1.1. Install uv and test inference with Ollama\n", + "\n", + "We'll install [uv](https://docs.astral.sh/uv/) to setup the Python virtual environment, along with [colab-xterm](https://github.com/InfuseAI/colab-xterm) for running command-line tools, and [Ollama](https://ollama.com/download) as the inference provider." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "7a2d7b85", + "metadata": {}, + "outputs": [], + "source": [ + "%pip install uv llama_stack llama-stack-client\n", + "\n", + "## If running on Collab:\n", + "# !pip install colab-xterm\n", + "# %load_ext colabxterm\n", + "\n", + "!curl https://ollama.ai/install.sh | sh" + ] + }, + { + "cell_type": "markdown", + "id": "39fa584b", + "metadata": {}, + "source": [ + "### 1.2. Test inference with Ollama" + ] + }, + { + "cell_type": "markdown", + "id": "3bf81522", + "metadata": {}, + "source": [ + "We’ll now launch a terminal and run inference on a Llama model with Ollama to verify that the model is working correctly." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a7e8e0f1", + "metadata": {}, + "outputs": [], + "source": [ + "## If running on Colab:\n", + "# %xterm\n", + "\n", + "## To be ran in the terminal:\n", + "# ollama serve &\n", + "# ollama run llama3.2:3b --keepalive 60m" + ] + }, + { + "cell_type": "markdown", + "id": "f3c5f243", + "metadata": {}, + "source": [ + "If successful, you should see the model respond to a prompt.\n", + "\n", + "...\n", + "```\n", + ">>> hi\n", + "Hello! How can I assist you today?\n", + "```" + ] + }, + { + "cell_type": "markdown", + "id": "oDUB7M_qe-Gs", + "metadata": { + "id": "oDUB7M_qe-Gs" + }, + "source": [ + "## Step 2: Run the Llama Stack server\n", + "\n", + "In this showcase, we will start a Llama Stack server that is running locally." + ] + }, + { + "cell_type": "markdown", + "id": "732eadc6", + "metadata": {}, + "source": [ + "### 2.1. Setup the Llama Stack Server" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "J2kGed0R5PSf", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "collapsed": true, + "id": "J2kGed0R5PSf", + "outputId": "2478ea60-8d35-48a1-b011-f233831740c5" + }, + "outputs": [], + "source": [ + "import os \n", + "import subprocess\n", + "\n", + "if \"UV_SYSTEM_PYTHON\" in os.environ:\n", + " del os.environ[\"UV_SYSTEM_PYTHON\"]\n", + "\n", + "# this command installs all the dependencies needed for the llama stack server with the ollama inference provider\n", + "!uv run --with llama-stack llama stack build --template ollama --image-type venv --image-name myvenv\n", + "\n", + "def run_llama_stack_server_background():\n", + " log_file = open(\"llama_stack_server.log\", \"w\")\n", + " process = subprocess.Popen(\n", + " f\"uv run --with llama-stack llama stack run ollama --image-type venv --image-name myvenv --env INFERENCE_MODEL=llama3.2:3b\",\n", + " shell=True,\n", + " stdout=log_file,\n", + " stderr=log_file,\n", + " text=True\n", + " )\n", + " \n", + " print(f\"Starting Llama Stack server with PID: {process.pid}\")\n", + " return process\n", + "\n", + "def wait_for_server_to_start():\n", + " import requests\n", + " from requests.exceptions import ConnectionError\n", + " import time\n", + " \n", + " url = \"http://0.0.0.0:8321/v1/health\"\n", + " max_retries = 30\n", + " retry_interval = 1\n", + " \n", + " print(\"Waiting for server to start\", end=\"\")\n", + " for _ in range(max_retries):\n", + " try:\n", + " response = requests.get(url)\n", + " if response.status_code == 200:\n", + " print(\"\\nServer is ready!\")\n", + " return True\n", + " except ConnectionError:\n", + " print(\".\", end=\"\", flush=True)\n", + " time.sleep(retry_interval)\n", + " \n", + " print(\"\\nServer failed to start after\", max_retries * retry_interval, \"seconds\")\n", + " return False\n", + "\n", + "\n", + "# use this helper if needed to kill the server \n", + "def kill_llama_stack_server():\n", + " # Kill any existing llama stack server processes\n", + " os.system(\"ps aux | grep -v grep | grep llama_stack.distribution.server.server | awk '{print $2}' | xargs kill -9\")\n" + ] + }, + { + "cell_type": "markdown", + "id": "c40e9efd", + "metadata": {}, + "source": [ + "### 2.2. Start the Llama Stack Server" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "f779283d", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Starting Llama Stack server with PID: 787100\n", + "Waiting for server to start\n", + "Server is ready!\n" + ] + } + ], + "source": [ + "server_process = run_llama_stack_server_background()\n", + "assert wait_for_server_to_start()" + ] + }, + { + "cell_type": "markdown", + "id": "28477c03", + "metadata": {}, + "source": [ + "## Step 3: Run the demo" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "7da71011", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "rag_tool> Ingesting document: https://www.paulgraham.com/greatwork.html\n", + "prompt> How do you do great work?\n", + "\u001b[33minference> \u001b[0m\u001b[33m[k\u001b[0m\u001b[33mnowledge\u001b[0m\u001b[33m_search\u001b[0m\u001b[33m(query\u001b[0m\u001b[33m=\"\u001b[0m\u001b[33mWhat\u001b[0m\u001b[33m is\u001b[0m\u001b[33m the\u001b[0m\u001b[33m key\u001b[0m\u001b[33m to\u001b[0m\u001b[33m doing\u001b[0m\u001b[33m great\u001b[0m\u001b[33m work\u001b[0m\u001b[33m\")]\u001b[0m\u001b[97m\u001b[0m\n", + "\u001b[32mtool_execution> Tool:knowledge_search Args:{'query': 'What is the key to doing great work'}\u001b[0m\n", + "\u001b[32mtool_execution> Tool:knowledge_search Response:[TextContentItem(text='knowledge_search tool found 5 chunks:\\nBEGIN of knowledge_search tool results.\\n', type='text'), TextContentItem(text=\"Result 1:\\nDocument_id:docum\\nContent: work. Doing great work means doing something important\\nso well that you expand people's ideas of what's possible. But\\nthere's no threshold for importance. It's a matter of degree, and\\noften hard to judge at the time anyway.\\n\", type='text'), TextContentItem(text=\"Result 2:\\nDocument_id:docum\\nContent: work. Doing great work means doing something important\\nso well that you expand people's ideas of what's possible. But\\nthere's no threshold for importance. It's a matter of degree, and\\noften hard to judge at the time anyway.\\n\", type='text'), TextContentItem(text=\"Result 3:\\nDocument_id:docum\\nContent: work. Doing great work means doing something important\\nso well that you expand people's ideas of what's possible. But\\nthere's no threshold for importance. It's a matter of degree, and\\noften hard to judge at the time anyway.\\n\", type='text'), TextContentItem(text=\"Result 4:\\nDocument_id:docum\\nContent: work. Doing great work means doing something important\\nso well that you expand people's ideas of what's possible. But\\nthere's no threshold for importance. It's a matter of degree, and\\noften hard to judge at the time anyway.\\n\", type='text'), TextContentItem(text=\"Result 5:\\nDocument_id:docum\\nContent: work. Doing great work means doing something important\\nso well that you expand people's ideas of what's possible. But\\nthere's no threshold for importance. It's a matter of degree, and\\noften hard to judge at the time anyway.\\n\", type='text'), TextContentItem(text='END of knowledge_search tool results.\\n', type='text'), TextContentItem(text='The above results were retrieved to help answer the user\\'s query: \"What is the key to doing great work\". Use them as supporting information only in answering this query.\\n', type='text')]\u001b[0m\n", + "\u001b[33minference> \u001b[0m\u001b[33mDoing\u001b[0m\u001b[33m great\u001b[0m\u001b[33m work\u001b[0m\u001b[33m means\u001b[0m\u001b[33m doing\u001b[0m\u001b[33m something\u001b[0m\u001b[33m important\u001b[0m\u001b[33m so\u001b[0m\u001b[33m well\u001b[0m\u001b[33m that\u001b[0m\u001b[33m you\u001b[0m\u001b[33m expand\u001b[0m\u001b[33m people\u001b[0m\u001b[33m's\u001b[0m\u001b[33m ideas\u001b[0m\u001b[33m of\u001b[0m\u001b[33m what\u001b[0m\u001b[33m's\u001b[0m\u001b[33m possible\u001b[0m\u001b[33m.\u001b[0m\u001b[33m However\u001b[0m\u001b[33m,\u001b[0m\u001b[33m there\u001b[0m\u001b[33m's\u001b[0m\u001b[33m no\u001b[0m\u001b[33m threshold\u001b[0m\u001b[33m for\u001b[0m\u001b[33m importance\u001b[0m\u001b[33m,\u001b[0m\u001b[33m and\u001b[0m\u001b[33m it\u001b[0m\u001b[33m's\u001b[0m\u001b[33m often\u001b[0m\u001b[33m hard\u001b[0m\u001b[33m to\u001b[0m\u001b[33m judge\u001b[0m\u001b[33m at\u001b[0m\u001b[33m the\u001b[0m\u001b[33m time\u001b[0m\u001b[33m anyway\u001b[0m\u001b[33m.\u001b[0m\u001b[33m Great\u001b[0m\u001b[33m work\u001b[0m\u001b[33m is\u001b[0m\u001b[33m a\u001b[0m\u001b[33m matter\u001b[0m\u001b[33m of\u001b[0m\u001b[33m degree\u001b[0m\u001b[33m,\u001b[0m\u001b[33m and\u001b[0m\u001b[33m it\u001b[0m\u001b[33m can\u001b[0m\u001b[33m be\u001b[0m\u001b[33m difficult\u001b[0m\u001b[33m to\u001b[0m\u001b[33m determine\u001b[0m\u001b[33m whether\u001b[0m\u001b[33m someone\u001b[0m\u001b[33m has\u001b[0m\u001b[33m done\u001b[0m\u001b[33m great\u001b[0m\u001b[33m work\u001b[0m\u001b[33m until\u001b[0m\u001b[33m after\u001b[0m\u001b[33m the\u001b[0m\u001b[33m fact\u001b[0m\u001b[33m.\u001b[0m\u001b[97m\u001b[0m\n", + "\u001b[30m\u001b[0m" + ] + } + ], + "source": [ + "from llama_stack_client import Agent, AgentEventLogger, RAGDocument, LlamaStackClient\n", + "\n", + "vector_db_id = \"my_demo_vector_db\"\n", + "client = LlamaStackClient(base_url=\"http://0.0.0.0:8321\")\n", + "\n", + "models = client.models.list()\n", + "\n", + "# Select the first LLM and first embedding models\n", + "model_id = next(m for m in models if m.model_type == \"llm\").identifier\n", + "embedding_model_id = (\n", + " em := next(m for m in models if m.model_type == \"embedding\")\n", + ").identifier\n", + "embedding_dimension = em.metadata[\"embedding_dimension\"]\n", + "\n", + "_ = client.vector_dbs.register(\n", + " vector_db_id=vector_db_id,\n", + " embedding_model=embedding_model_id,\n", + " embedding_dimension=embedding_dimension,\n", + " provider_id=\"faiss\",\n", + ")\n", + "source = \"https://www.paulgraham.com/greatwork.html\"\n", + "print(\"rag_tool> Ingesting document:\", source)\n", + "document = RAGDocument(\n", + " document_id=\"document_1\",\n", + " content=source,\n", + " mime_type=\"text/html\",\n", + " metadata={},\n", + ")\n", + "client.tool_runtime.rag_tool.insert(\n", + " documents=[document],\n", + " vector_db_id=vector_db_id,\n", + " chunk_size_in_tokens=50,\n", + ")\n", + "agent = Agent(\n", + " client,\n", + " model=model_id,\n", + " instructions=\"You are a helpful assistant\",\n", + " tools=[\n", + " {\n", + " \"name\": \"builtin::rag/knowledge_search\",\n", + " \"args\": {\"vector_db_ids\": [vector_db_id]},\n", + " }\n", + " ],\n", + ")\n", + "\n", + "prompt = \"How do you do great work?\"\n", + "print(\"prompt>\", prompt)\n", + "\n", + "response = agent.create_turn(\n", + " messages=[{\"role\": \"user\", \"content\": prompt}],\n", + " session_id=agent.create_session(\"rag_session\"),\n", + " stream=True,\n", + ")\n", + "\n", + "for log in AgentEventLogger().log(response):\n", + " log.print()" + ] + }, + { + "cell_type": "markdown", + "id": "341aaadf", + "metadata": {}, + "source": [ + "Congratulations! You've successfully built your first RAG application using Llama Stack! 🎉🥳" + ] + }, + { + "cell_type": "markdown", + "id": "e88e1185", + "metadata": {}, + "source": [ + "## Next Steps" + ] + }, + { + "cell_type": "markdown", + "id": "bcb73600", + "metadata": {}, + "source": [ + "Now you're ready to dive deeper into Llama Stack!\n", + "- Explore the [Detailed Tutorial](./detailed_tutorial.md).\n", + "- Try the [Getting Started Notebook](https://github.com/meta-llama/llama-stack/blob/main/docs/getting_started.ipynb).\n", + "- Browse more [Notebooks on GitHub](https://github.com/meta-llama/llama-stack/tree/main/docs/notebooks).\n", + "- Learn about Llama Stack [Concepts](../concepts/index.md).\n", + "- Discover how to [Build Llama Stacks](../distributions/index.md).\n", + "- Refer to our [References](../references/index.md) for details on the Llama CLI and Python SDK.\n", + "- Check out the [llama-stack-apps](https://github.com/meta-llama/llama-stack-apps/tree/main/examples) repository for example applications and tutorials." + ] + } + ], + "metadata": { + "accelerator": "GPU", + "colab": { + "gpuType": "T4", + "provenance": [] + }, + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.6" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/docs/source/distributions/configuration.md b/docs/source/distributions/configuration.md index 0a0ce994f..1bba6677e 100644 --- a/docs/source/distributions/configuration.md +++ b/docs/source/distributions/configuration.md @@ -77,10 +77,10 @@ Next up is the most critical part: the set of providers that the stack will use ```yaml providers: inference: - # provider_id is a string you can choose freely + # provider_id is a string you can choose freely - provider_id: ollama # provider_type is a string that specifies the type of provider. - # in this case, the provider for inference is ollama and it is run remotely (outside of the distribution) + # in this case, the provider for inference is ollama and it runs remotely (outside of the distribution) provider_type: remote::ollama # config is a dictionary that contains the configuration for the provider. # in this case, the configuration is the url of the ollama server @@ -88,7 +88,7 @@ providers: url: ${env.OLLAMA_URL:=http://localhost:11434} ``` A few things to note: -- A _provider instance_ is identified with an (id, type, configuration) triplet. +- A _provider instance_ is identified with an (id, type, config) triplet. - The id is a string you can choose freely. - You can instantiate any number of provider instances of the same type. - The configuration dictionary is provider-specific. @@ -187,7 +187,7 @@ The environment variable substitution system is type-safe: ## Resources -Finally, let's look at the `models` section: +Let's look at the `models` section: ```yaml models: @@ -195,8 +195,9 @@ models: model_id: ${env.INFERENCE_MODEL} provider_id: ollama provider_model_id: null + model_type: llm ``` -A Model is an instance of a "Resource" (see [Concepts](../concepts/index)) and is associated with a specific inference provider (in this case, the provider with identifier `ollama`). This is an instance of a "pre-registered" model. While we always encourage the clients to always register models before using them, some Stack servers may come up a list of "already known and available" models. +A Model is an instance of a "Resource" (see [Concepts](../concepts/index)) and is associated with a specific inference provider (in this case, the provider with identifier `ollama`). This is an instance of a "pre-registered" model. While we always encourage the clients to register models before using them, some Stack servers may come up a list of "already known and available" models. What's with the `provider_model_id` field? This is an identifier for the model inside the provider's model catalog. Contrast it with `model_id` which is the identifier for the same model for Llama Stack's purposes. For example, you may want to name "llama3.2:vision-11b" as "image_captioning_model" when you use it in your Stack interactions. When omitted, the server will set `provider_model_id` to be the same as `model_id`. diff --git a/docs/source/getting_started/index.md b/docs/source/getting_started/index.md index 8382758cc..ea45da1f7 100644 --- a/docs/source/getting_started/index.md +++ b/docs/source/getting_started/index.md @@ -8,6 +8,8 @@ environments. You can build and test using a local server first and deploy to a In this guide, we'll walk through how to build a RAG application locally using Llama Stack with [Ollama](https://ollama.com/) as the inference [provider](../providers/inference/index) for a Llama Model. +**💡 Notebook Version:** You can also follow this quickstart guide in a Jupyter notebook format: [quick_start.ipynb](https://github.com/meta-llama/llama-stack/blob/main/docs/quick_start.ipynb) + #### Step 1: Install and setup 1. Install [uv](https://docs.astral.sh/uv/) 2. Run inference on a Llama model with [Ollama](https://ollama.com/download) diff --git a/docs/source/index.md b/docs/source/index.md index 1df5e8507..755b228e3 100644 --- a/docs/source/index.md +++ b/docs/source/index.md @@ -73,17 +73,26 @@ A number of "adapters" are available for some popular Inference and Vector Store | OpenAI | Hosted | | Anthropic | Hosted | | Gemini | Hosted | +| WatsonX | Hosted | +**Agents API** +| **Provider** | **Environments** | +| :----: | :----: | +| Meta Reference | Single Node | +| Fireworks | Hosted | +| Together | Hosted | +| PyTorch ExecuTorch | On-device iOS | **Vector IO API** | **Provider** | **Environments** | | :----: | :----: | | FAISS | Single Node | -| SQLite-Vec| Single Node | +| SQLite-Vec | Single Node | | Chroma | Hosted and Single Node | | Milvus | Hosted and Single Node | | Postgres (PGVector) | Hosted and Single Node | | Weaviate | Hosted | +| Qdrant | Hosted and Single Node | **Safety API** | **Provider** | **Environments** | @@ -93,6 +102,30 @@ A number of "adapters" are available for some popular Inference and Vector Store | Code Scanner | Single Node | | AWS Bedrock | Hosted | +**Post Training API** +| **Provider** | **Environments** | +| :----: | :----: | +| Meta Reference | Single Node | +| HuggingFace | Single Node | +| TorchTune | Single Node | +| NVIDIA NEMO | Hosted | + +**Eval API** +| **Provider** | **Environments** | +| :----: | :----: | +| Meta Reference | Single Node | +| NVIDIA NEMO | Hosted | + +**Telemetry API** +| **Provider** | **Environments** | +| :----: | :----: | +| Meta Reference | Single Node | + +**Tool Runtime API** +| **Provider** | **Environments** | +| :----: | :----: | +| Brave Search | Hosted | +| RAG Runtime | Single Node | ```{toctree} :hidden: diff --git a/docs/source/providers/agents/inline_meta-reference.md b/docs/source/providers/agents/inline_meta-reference.md index cfc0c6881..5f64f79e1 100644 --- a/docs/source/providers/agents/inline_meta-reference.md +++ b/docs/source/providers/agents/inline_meta-reference.md @@ -16,7 +16,6 @@ Meta's reference implementation of an agent system that can use tools, access ve ```yaml persistence_store: type: sqlite - namespace: null db_path: ${env.SQLITE_STORE_DIR:=~/.llama/dummy}/agents_store.db responses_store: type: sqlite diff --git a/docs/source/providers/datasetio/inline_localfs.md b/docs/source/providers/datasetio/inline_localfs.md index fbe4c40e3..87a0c795c 100644 --- a/docs/source/providers/datasetio/inline_localfs.md +++ b/docs/source/providers/datasetio/inline_localfs.md @@ -15,7 +15,6 @@ Local filesystem-based dataset I/O provider for reading and writing datasets to ```yaml kvstore: type: sqlite - namespace: null db_path: ${env.SQLITE_STORE_DIR:=~/.llama/dummy}/localfs_datasetio.db ``` diff --git a/docs/source/providers/datasetio/remote_huggingface.md b/docs/source/providers/datasetio/remote_huggingface.md index e2052602e..3711f7396 100644 --- a/docs/source/providers/datasetio/remote_huggingface.md +++ b/docs/source/providers/datasetio/remote_huggingface.md @@ -15,7 +15,6 @@ HuggingFace datasets provider for accessing and managing datasets from the Huggi ```yaml kvstore: type: sqlite - namespace: null db_path: ${env.SQLITE_STORE_DIR:=~/.llama/dummy}/huggingface_datasetio.db ``` diff --git a/docs/source/providers/eval/inline_meta-reference.md b/docs/source/providers/eval/inline_meta-reference.md index 704741b5a..606883c72 100644 --- a/docs/source/providers/eval/inline_meta-reference.md +++ b/docs/source/providers/eval/inline_meta-reference.md @@ -15,7 +15,6 @@ Meta's reference implementation of evaluation tasks with support for multiple la ```yaml kvstore: type: sqlite - namespace: null db_path: ${env.SQLITE_STORE_DIR:=~/.llama/dummy}/meta_reference_eval.db ``` diff --git a/docs/source/providers/index.md b/docs/source/providers/index.md index 3ea253685..f804582d7 100644 --- a/docs/source/providers/index.md +++ b/docs/source/providers/index.md @@ -1,9 +1,10 @@ # Providers Overview The goal of Llama Stack is to build an ecosystem where users can easily swap out different implementations for the same API. Examples for these include: -- LLM inference providers (e.g., Ollama, Fireworks, Together, AWS Bedrock, Groq, Cerebras, SambaNova, vLLM, etc.), -- Vector databases (e.g., ChromaDB, Weaviate, Qdrant, Milvus, FAISS, PGVector, SQLite-Vec, etc.), -- Safety providers (e.g., Meta's Llama Guard, AWS Bedrock Guardrails, etc.) +- LLM inference providers (e.g., Meta Reference, Ollama, Fireworks, Together, AWS Bedrock, Groq, Cerebras, SambaNova, vLLM, OpenAI, Anthropic, Gemini, WatsonX, etc.), +- Vector databases (e.g., FAISS, SQLite-Vec, ChromaDB, Weaviate, Qdrant, Milvus, PGVector, etc.), +- Safety providers (e.g., Meta's Llama Guard, Prompt Guard, Code Scanner, AWS Bedrock Guardrails, etc.), +- Tool Runtime providers (e.g., RAG Runtime, Brave Search, etc.) Providers come in two flavors: - **Remote**: the provider runs as a separate service external to the Llama Stack codebase. Llama Stack contains a small amount of adapter code. diff --git a/docs/source/providers/vector_io/inline_faiss.md b/docs/source/providers/vector_io/inline_faiss.md index 2dcf4625b..bcff66f3f 100644 --- a/docs/source/providers/vector_io/inline_faiss.md +++ b/docs/source/providers/vector_io/inline_faiss.md @@ -44,7 +44,6 @@ more details about Faiss in general. ```yaml kvstore: type: sqlite - namespace: null db_path: ${env.SQLITE_STORE_DIR:=~/.llama/dummy}/faiss_store.db ``` diff --git a/docs/source/providers/vector_io/inline_meta-reference.md b/docs/source/providers/vector_io/inline_meta-reference.md index c9ca12ff2..0aac445bd 100644 --- a/docs/source/providers/vector_io/inline_meta-reference.md +++ b/docs/source/providers/vector_io/inline_meta-reference.md @@ -15,7 +15,6 @@ Meta's reference implementation of a vector database. ```yaml kvstore: type: sqlite - namespace: null db_path: ${env.SQLITE_STORE_DIR:=~/.llama/dummy}/faiss_store.db ``` diff --git a/docs/source/providers/vector_io/inline_milvus.md b/docs/source/providers/vector_io/inline_milvus.md index 8e99d7f95..65c67f3ee 100644 --- a/docs/source/providers/vector_io/inline_milvus.md +++ b/docs/source/providers/vector_io/inline_milvus.md @@ -19,7 +19,6 @@ Please refer to the remote provider documentation. db_path: ${env.MILVUS_DB_PATH:=~/.llama/dummy}/milvus.db kvstore: type: sqlite - namespace: null db_path: ${env.SQLITE_STORE_DIR:=~/.llama/dummy}/milvus_registry.db ``` diff --git a/docs/zero_to_hero_guide/04_Tool_Calling101.ipynb b/docs/zero_to_hero_guide/04_Tool_Calling101.ipynb index de3754b21..00a427b07 100644 --- a/docs/zero_to_hero_guide/04_Tool_Calling101.ipynb +++ b/docs/zero_to_hero_guide/04_Tool_Calling101.ipynb @@ -36,7 +36,7 @@ "from dotenv import load_dotenv\n", "from llama_stack_client import LlamaStackClient\n", "from llama_stack_client.lib.agents.agent import Agent\n", - "from llama_stack_client.lib.agents.custom_tool import CustomTool\n", + "from llama_stack_client.lib.agents.client_tool import ClientTool\n", "from llama_stack_client.lib.agents.event_logger import EventLogger\n", "from llama_stack_client.types import CompletionMessage\n", "from llama_stack_client.types.agent_create_params import AgentConfig\n", @@ -129,7 +129,7 @@ "source": [ "## Step 3: Create a Custom Tool Class\n", "\n", - "Here, we defines the `WebSearchTool` class, which extends `CustomTool` to integrate the Brave Search API with Llama Stack, enabling web search capabilities within AI workflows. The class handles incoming user queries, interacts with the `BraveSearch` class for data retrieval, and formats results for effective response generation." + "Here, we defines the `WebSearchTool` class, which extends `ClientTool` to integrate the Brave Search API with Llama Stack, enabling web search capabilities within AI workflows. The class handles incoming user queries, interacts with the `BraveSearch` class for data retrieval, and formats results for effective response generation." ] }, { @@ -139,7 +139,7 @@ "metadata": {}, "outputs": [], "source": [ - "class WebSearchTool(CustomTool):\n", + "class WebSearchTool(ClientTool):\n", " def __init__(self, api_key: str):\n", " self.api_key = api_key\n", " self.engine = BraveSearch(api_key)\n", diff --git a/docs/zero_to_hero_guide/07_Agents101.ipynb b/docs/zero_to_hero_guide/07_Agents101.ipynb index b6df2a4c8..905799946 100644 --- a/docs/zero_to_hero_guide/07_Agents101.ipynb +++ b/docs/zero_to_hero_guide/07_Agents101.ipynb @@ -45,7 +45,7 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": 14, "metadata": {}, "outputs": [], "source": [ @@ -65,7 +65,7 @@ "from dotenv import load_dotenv\n", "\n", "load_dotenv()\n", - "BRAVE_SEARCH_API_KEY = os.environ[\"BRAVE_SEARCH_API_KEY\"]\n" + "TAVILY_SEARCH_API_KEY = os.environ[\"TAVILY_SEARCH_API_KEY\"]\n" ] }, { @@ -110,10 +110,17 @@ "from llama_stack_client import LlamaStackClient\n", "from llama_stack_client.lib.agents.agent import Agent\n", "from llama_stack_client.lib.agents.event_logger import EventLogger\n", + "from llama_stack_client.types import UserMessage\n", + "from typing import cast, Iterator\n", "\n", "\n", "async def agent_example():\n", - " client = LlamaStackClient(base_url=f\"http://{HOST}:{PORT}\")\n", + " client = LlamaStackClient(\n", + " base_url=f\"http://{HOST}:{PORT}\",\n", + " provider_data={\n", + " \"tavily_search_api_key\": TAVILY_SEARCH_API_KEY,\n", + " }\n", + " )\n", " agent = Agent(\n", " client,\n", " model=MODEL_NAME,\n", @@ -123,13 +130,7 @@ " \"type\": \"greedy\",\n", " },\n", " },\n", - " tools=[\n", - " {\n", - " \"type\": \"brave_search\",\n", - " \"engine\": \"brave\",\n", - " \"api_key\": BRAVE_SEARCH_API_KEY,\n", - " }\n", - " ],\n", + " tools=[\"builtin::websearch\"],\n", " )\n", " session_id = agent.create_session(\"test-session\")\n", " print(f\"Created session_id={session_id} for Agent({agent.agent_id})\")\n", @@ -142,15 +143,13 @@ " for prompt in user_prompts:\n", " response = agent.create_turn(\n", " messages=[\n", - " {\n", - " \"role\": \"user\",\n", - " \"content\": prompt,\n", - " }\n", + " UserMessage(role=\"user\", content=prompt)\n", " ],\n", " session_id=session_id,\n", + " stream=True,\n", " )\n", "\n", - " async for log in EventLogger().log(response):\n", + " for log in EventLogger().log(cast(Iterator, response)):\n", " log.print()\n", "\n", "\n", diff --git a/docs/zero_to_hero_guide/README.md b/docs/zero_to_hero_guide/README.md index 96f9768de..a891aa343 100644 --- a/docs/zero_to_hero_guide/README.md +++ b/docs/zero_to_hero_guide/README.md @@ -2,9 +2,9 @@ Llama Stack defines and standardizes the set of core building blocks needed to bring generative AI applications to market. These building blocks are presented in the form of interoperable APIs with a broad set of Providers providing their implementations. These building blocks are assembled into Distributions which are easy for developers to get from zero to production. -This guide will walk you through an end-to-end workflow with Llama Stack with Ollama as the inference provider and ChromaDB as the memory provider. Please note the steps for configuring your provider and distribution will vary a little depending on the services you use. However, the user experience will remain universal - this is the power of Llama-Stack. +This guide will walk you through an end-to-end workflow with Llama Stack with Ollama as the inference provider and ChromaDB as the VectorIO provider. Please note the steps for configuring your provider and distribution will vary depending on the services you use. However, the user experience will remain universal - this is the power of Llama-Stack. -If you're looking for more specific topics, we have a [Zero to Hero Guide](#next-steps) that covers everything from Tool Calling to Agents in detail. Feel free to skip to the end to explore the advanced topics you're interested in. +If you're looking for more specific topics, we have a [Zero to Hero Guide](#next-steps) that covers everything from 'Tool Calling' to 'Agents' in detail. Feel free to skip to the end to explore the advanced topics you're interested in. > If you'd prefer not to set up a local server, explore our notebook on [tool calling with the Together API](Tool_Calling101_Using_Together_Llama_Stack_Server.ipynb). This notebook will show you how to leverage together.ai's Llama Stack Server API, allowing you to get started with Llama Stack without the need for a locally built and running server. @@ -26,15 +26,15 @@ If you're looking for more specific topics, we have a [Zero to Hero Guide](#next - Follow instructions based on the OS you are on. For example, if you are on a Mac, download and unzip `Ollama-darwin.zip`. - Run the `Ollama` application. -1. **Download the Ollama CLI**: +2. **Download the Ollama CLI**: Ensure you have the `ollama` command line tool by downloading and installing it from the same website. -1. **Start ollama server**: +3. **Start ollama server**: Open the terminal and run: - ``` + ```bash ollama serve ``` -1. **Run the model**: +4. **Run the model**: Open the terminal and run: ```bash ollama run llama3.2:3b-instruct-fp16 --keepalive -1m @@ -48,9 +48,9 @@ If you're looking for more specific topics, we have a [Zero to Hero Guide](#next ## Install Dependencies and Set Up Environment 1. **Create a Conda Environment**: - Create a new Conda environment with Python 3.10: + Create a new Conda environment with Python 3.12: ```bash - conda create -n ollama python=3.10 + conda create -n ollama python=3.12 ``` Activate the environment: ```bash diff --git a/llama_stack/apis/telemetry/telemetry.py b/llama_stack/apis/telemetry/telemetry.py index 0eb53f397..d621e601e 100644 --- a/llama_stack/apis/telemetry/telemetry.py +++ b/llama_stack/apis/telemetry/telemetry.py @@ -101,7 +101,7 @@ class MetricInResponse(BaseModel): # This is a short term solution to allow inference API to return metrics # The ideal way to do this is to have a way for all response types to include metrics -# and all metric events logged to the telemetry API to be inlcuded with the response +# and all metric events logged to the telemetry API to be included with the response # To do this, we will need to augment all response types with a metrics field. # We have hit a blocker from stainless SDK that prevents us from doing this. # The blocker is that if we were to augment the response types that have a data field diff --git a/llama_stack/distribution/access_control/access_control.py b/llama_stack/distribution/access_control/access_control.py index 84d506d8f..075152ce4 100644 --- a/llama_stack/distribution/access_control/access_control.py +++ b/llama_stack/distribution/access_control/access_control.py @@ -106,4 +106,26 @@ def is_action_allowed( class AccessDeniedError(RuntimeError): - pass + def __init__(self, action: str | None = None, resource: ProtectedResource | None = None, user: User | None = None): + self.action = action + self.resource = resource + self.user = user + + message = _build_access_denied_message(action, resource, user) + super().__init__(message) + + +def _build_access_denied_message(action: str | None, resource: ProtectedResource | None, user: User | None) -> str: + """Build detailed error message for access denied scenarios.""" + if action and resource and user: + resource_info = f"{resource.type}::{resource.identifier}" + user_info = f"'{user.principal}'" + if user.attributes: + attrs = ", ".join([f"{k}={v}" for k, v in user.attributes.items()]) + user_info += f" (attributes: {attrs})" + + message = f"User {user_info} cannot perform action '{action}' on resource '{resource_info}'" + else: + message = "Insufficient permissions" + + return message diff --git a/llama_stack/distribution/configure.py b/llama_stack/distribution/configure.py index e58ea0338..35b216b30 100644 --- a/llama_stack/distribution/configure.py +++ b/llama_stack/distribution/configure.py @@ -17,6 +17,7 @@ from llama_stack.distribution.distribution import ( builtin_automatically_routed_apis, get_provider_registry, ) +from llama_stack.distribution.stack import replace_env_vars from llama_stack.distribution.utils.config_dirs import EXTERNAL_PROVIDERS_DIR from llama_stack.distribution.utils.dynamic import instantiate_class_type from llama_stack.distribution.utils.prompt_for_config import prompt_for_config @@ -163,7 +164,7 @@ def upgrade_from_routing_table( def parse_and_maybe_upgrade_config(config_dict: dict[str, Any]) -> StackRunConfig: version = config_dict.get("version", None) if version == LLAMA_STACK_RUN_CONFIG_VERSION: - return StackRunConfig(**config_dict) + return StackRunConfig(**replace_env_vars(config_dict)) if "routing_table" in config_dict: logger.info("Upgrading config...") @@ -174,4 +175,4 @@ def parse_and_maybe_upgrade_config(config_dict: dict[str, Any]) -> StackRunConfi if not config_dict.get("external_providers_dir", None): config_dict["external_providers_dir"] = EXTERNAL_PROVIDERS_DIR - return StackRunConfig(**config_dict) + return StackRunConfig(**replace_env_vars(config_dict)) diff --git a/llama_stack/distribution/routing_tables/common.py b/llama_stack/distribution/routing_tables/common.py index b79c8a2a8..7f7de32fe 100644 --- a/llama_stack/distribution/routing_tables/common.py +++ b/llama_stack/distribution/routing_tables/common.py @@ -175,8 +175,9 @@ class CommonRoutingTableImpl(RoutingTable): return obj async def unregister_object(self, obj: RoutableObjectWithProvider) -> None: - if not is_action_allowed(self.policy, "delete", obj, get_authenticated_user()): - raise AccessDeniedError() + user = get_authenticated_user() + if not is_action_allowed(self.policy, "delete", obj, user): + raise AccessDeniedError("delete", obj, user) await self.dist_registry.delete(obj.type, obj.identifier) await unregister_object_from_provider(obj, self.impls_by_provider_id[obj.provider_id]) @@ -193,7 +194,7 @@ class CommonRoutingTableImpl(RoutingTable): # If object supports access control but no attributes set, use creator's attributes creator = get_authenticated_user() if not is_action_allowed(self.policy, "create", obj, creator): - raise AccessDeniedError() + raise AccessDeniedError("create", obj, creator) if creator: obj.owner = creator logger.info(f"Setting owner for {obj.type} '{obj.identifier}' to {obj.owner.principal}") diff --git a/llama_stack/distribution/server/server.py b/llama_stack/distribution/server/server.py index 83407a25f..681ab320d 100644 --- a/llama_stack/distribution/server/server.py +++ b/llama_stack/distribution/server/server.py @@ -9,6 +9,7 @@ import asyncio import functools import inspect import json +import logging import os import ssl import sys @@ -31,6 +32,7 @@ from openai import BadRequestError from pydantic import BaseModel, ValidationError from llama_stack.apis.common.responses import PaginatedResponse +from llama_stack.distribution.access_control.access_control import AccessDeniedError from llama_stack.distribution.datatypes import AuthenticationRequiredError, LoggingConfig, StackRunConfig from llama_stack.distribution.distribution import builtin_automatically_routed_apis from llama_stack.distribution.request_headers import PROVIDER_DATA_VAR, User, request_provider_data_context @@ -116,7 +118,7 @@ def translate_exception(exc: Exception) -> HTTPException | RequestValidationErro return HTTPException(status_code=400, detail=f"Invalid value: {str(exc)}") elif isinstance(exc, BadRequestError): return HTTPException(status_code=400, detail=str(exc)) - elif isinstance(exc, PermissionError): + elif isinstance(exc, PermissionError | AccessDeniedError): return HTTPException(status_code=403, detail=f"Permission denied: {str(exc)}") elif isinstance(exc, asyncio.TimeoutError | TimeoutError): return HTTPException(status_code=504, detail=f"Operation timed out: {str(exc)}") @@ -236,7 +238,10 @@ def create_dynamic_typed_route(func: Any, method: str, route: str) -> Callable: result.url = route return result except Exception as e: - logger.exception(f"Error executing endpoint {route=} {method=}") + if logger.isEnabledFor(logging.DEBUG): + logger.exception(f"Error executing endpoint {route=} {method=}") + else: + logger.error(f"Error executing endpoint {route=} {method=}: {str(e)}") raise translate_exception(e) from e sig = inspect.signature(func) diff --git a/llama_stack/distribution/store/registry.py b/llama_stack/distribution/store/registry.py index 0e84854c2..cd7cd9f00 100644 --- a/llama_stack/distribution/store/registry.py +++ b/llama_stack/distribution/store/registry.py @@ -10,11 +10,11 @@ from typing import Protocol import pydantic -from llama_stack.distribution.datatypes import KVStoreConfig, RoutableObjectWithProvider +from llama_stack.distribution.datatypes import RoutableObjectWithProvider from llama_stack.distribution.utils.config_dirs import DISTRIBS_BASE_DIR from llama_stack.log import get_logger from llama_stack.providers.utils.kvstore import KVStore, kvstore_impl -from llama_stack.providers.utils.kvstore.config import SqliteKVStoreConfig +from llama_stack.providers.utils.kvstore.config import KVStoreConfig, SqliteKVStoreConfig logger = get_logger(__name__, category="core") diff --git a/llama_stack/providers/inline/agents/meta_reference/persistence.py b/llama_stack/providers/inline/agents/meta_reference/persistence.py index 717387008..cda535937 100644 --- a/llama_stack/providers/inline/agents/meta_reference/persistence.py +++ b/llama_stack/providers/inline/agents/meta_reference/persistence.py @@ -53,7 +53,7 @@ class AgentPersistence: identifier=name, # should this be qualified in any way? ) if not is_action_allowed(self.policy, "create", session_info, user): - raise AccessDeniedError() + raise AccessDeniedError("create", session_info, user) await self.kvstore.set( key=f"session:{self.agent_id}:{session_id}", diff --git a/llama_stack/providers/registry/agents.py b/llama_stack/providers/registry/agents.py index 834e81b96..6f8c05a67 100644 --- a/llama_stack/providers/registry/agents.py +++ b/llama_stack/providers/registry/agents.py @@ -23,6 +23,7 @@ def available_providers() -> list[ProviderSpec]: "pillow", "pandas", "scikit-learn", + "mcp", ] + kvstore_dependencies(), # TODO make this dynamic based on the kvstore config module="llama_stack.providers.inline.agents.meta_reference", diff --git a/llama_stack/providers/utils/kvstore/config.py b/llama_stack/providers/utils/kvstore/config.py index a45ff4ce8..0219bbebe 100644 --- a/llama_stack/providers/utils/kvstore/config.py +++ b/llama_stack/providers/utils/kvstore/config.py @@ -36,15 +36,14 @@ class RedisKVStoreConfig(CommonConfig): def url(self) -> str: return f"redis://{self.host}:{self.port}" - @property - def pip_packages(self) -> list[str]: + @classmethod + def pip_packages(cls) -> list[str]: return ["redis"] @classmethod def sample_run_config(cls): return { "type": "redis", - "namespace": None, "host": "${env.REDIS_HOST:=localhost}", "port": "${env.REDIS_PORT:=6379}", } @@ -57,15 +56,14 @@ class SqliteKVStoreConfig(CommonConfig): description="File path for the sqlite database", ) - @property - def pip_packages(self) -> list[str]: + @classmethod + def pip_packages(cls) -> list[str]: return ["aiosqlite"] @classmethod def sample_run_config(cls, __distro_dir__: str, db_name: str = "kvstore.db"): return { "type": "sqlite", - "namespace": None, "db_path": "${env.SQLITE_STORE_DIR:=" + __distro_dir__ + "}/" + db_name, } @@ -73,7 +71,7 @@ class SqliteKVStoreConfig(CommonConfig): class PostgresKVStoreConfig(CommonConfig): type: Literal[KVStoreType.postgres.value] = KVStoreType.postgres.value host: str = "localhost" - port: str = "5432" + port: int = 5432 db: str = "llamastack" user: str password: str | None = None @@ -83,7 +81,6 @@ class PostgresKVStoreConfig(CommonConfig): def sample_run_config(cls, table_name: str = "llamastack_kvstore", **kwargs): return { "type": "postgres", - "namespace": None, "host": "${env.POSTGRES_HOST:=localhost}", "port": "${env.POSTGRES_PORT:=5432}", "db": "${env.POSTGRES_DB:=llamastack}", @@ -108,8 +105,8 @@ class PostgresKVStoreConfig(CommonConfig): raise ValueError("Table name must be less than 63 characters") return v - @property - def pip_packages(self) -> list[str]: + @classmethod + def pip_packages(cls) -> list[str]: return ["psycopg2-binary"] @@ -122,15 +119,14 @@ class MongoDBKVStoreConfig(CommonConfig): password: str | None = None collection_name: str = "llamastack_kvstore" - @property - def pip_packages(self) -> list[str]: + @classmethod + def pip_packages(cls) -> list[str]: return ["pymongo"] @classmethod def sample_run_config(cls, collection_name: str = "llamastack_kvstore"): return { "type": "mongodb", - "namespace": None, "host": "${env.MONGODB_HOST:=localhost}", "port": "${env.MONGODB_PORT:=5432}", "db": "${env.MONGODB_DB}", @@ -144,3 +140,21 @@ KVStoreConfig = Annotated[ RedisKVStoreConfig | SqliteKVStoreConfig | PostgresKVStoreConfig | MongoDBKVStoreConfig, Field(discriminator="type", default=KVStoreType.sqlite.value), ] + + +def get_pip_packages(store_config: dict | KVStoreConfig) -> list[str]: + """Get pip packages for KV store config, handling both dict and object cases.""" + if isinstance(store_config, dict): + store_type = store_config.get("type") + if store_type == "sqlite": + return SqliteKVStoreConfig.pip_packages() + elif store_type == "postgres": + return PostgresKVStoreConfig.pip_packages() + elif store_type == "redis": + return RedisKVStoreConfig.pip_packages() + elif store_type == "mongodb": + return MongoDBKVStoreConfig.pip_packages() + else: + raise ValueError(f"Unknown KV store type: {store_type}") + else: + return store_config.pip_packages() diff --git a/llama_stack/providers/utils/sqlstore/sqlstore.py b/llama_stack/providers/utils/sqlstore/sqlstore.py index d558a2a26..227c5abcd 100644 --- a/llama_stack/providers/utils/sqlstore/sqlstore.py +++ b/llama_stack/providers/utils/sqlstore/sqlstore.py @@ -30,8 +30,8 @@ class SqlAlchemySqlStoreConfig(BaseModel): def engine_str(self) -> str: ... # TODO: move this when we have a better way to specify dependencies with internal APIs - @property - def pip_packages(self) -> list[str]: + @classmethod + def pip_packages(cls) -> list[str]: return ["sqlalchemy[asyncio]"] @@ -48,20 +48,20 @@ class SqliteSqlStoreConfig(SqlAlchemySqlStoreConfig): @classmethod def sample_run_config(cls, __distro_dir__: str, db_name: str = "sqlstore.db"): - return cls( - type="sqlite", - db_path="${env.SQLITE_STORE_DIR:=" + __distro_dir__ + "}/" + db_name, - ) + return { + "type": "sqlite", + "db_path": "${env.SQLITE_STORE_DIR:=" + __distro_dir__ + "}/" + db_name, + } - @property - def pip_packages(self) -> list[str]: - return super().pip_packages + ["aiosqlite"] + @classmethod + def pip_packages(cls) -> list[str]: + return super().pip_packages() + ["aiosqlite"] class PostgresSqlStoreConfig(SqlAlchemySqlStoreConfig): type: Literal["postgres"] = SqlStoreType.postgres.value host: str = "localhost" - port: str = "5432" + port: int = 5432 db: str = "llamastack" user: str password: str | None = None @@ -70,20 +70,20 @@ class PostgresSqlStoreConfig(SqlAlchemySqlStoreConfig): def engine_str(self) -> str: return f"postgresql+asyncpg://{self.user}:{self.password}@{self.host}:{self.port}/{self.db}" - @property - def pip_packages(self) -> list[str]: - return super().pip_packages + ["asyncpg"] + @classmethod + def pip_packages(cls) -> list[str]: + return super().pip_packages() + ["asyncpg"] @classmethod def sample_run_config(cls, **kwargs): - return cls( - type="postgres", - host="${env.POSTGRES_HOST:=localhost}", - port="${env.POSTGRES_PORT:=5432}", - db="${env.POSTGRES_DB:=llamastack}", - user="${env.POSTGRES_USER:=llamastack}", - password="${env.POSTGRES_PASSWORD:=llamastack}", - ) + return { + "type": "postgres", + "host": "${env.POSTGRES_HOST:=localhost}", + "port": "${env.POSTGRES_PORT:=5432}", + "db": "${env.POSTGRES_DB:=llamastack}", + "user": "${env.POSTGRES_USER:=llamastack}", + "password": "${env.POSTGRES_PASSWORD:=llamastack}", + } SqlStoreConfig = Annotated[ @@ -92,6 +92,20 @@ SqlStoreConfig = Annotated[ ] +def get_pip_packages(store_config: dict | SqlStoreConfig) -> list[str]: + """Get pip packages for SQL store config, handling both dict and object cases.""" + if isinstance(store_config, dict): + store_type = store_config.get("type") + if store_type == "sqlite": + return SqliteSqlStoreConfig.pip_packages() + elif store_type == "postgres": + return PostgresSqlStoreConfig.pip_packages() + else: + raise ValueError(f"Unknown SQL store type: {store_type}") + else: + return store_config.pip_packages() + + def sqlstore_impl(config: SqlStoreConfig) -> SqlStore: if config.type in [SqlStoreType.sqlite.value, SqlStoreType.postgres.value]: from .sqlalchemy_sqlstore import SqlAlchemySqlStoreImpl diff --git a/llama_stack/providers/utils/vector_io/chunk_utils.py b/llama_stack/providers/utils/vector_io/chunk_utils.py index 68cf11cad..2a939bfba 100644 --- a/llama_stack/providers/utils/vector_io/chunk_utils.py +++ b/llama_stack/providers/utils/vector_io/chunk_utils.py @@ -9,6 +9,11 @@ import uuid def generate_chunk_id(document_id: str, chunk_text: str) -> str: - """Generate a unique chunk ID using a hash of document ID and chunk text.""" + """ + Generate a unique chunk ID using a hash of the document ID and chunk text. + + Note: MD5 is used only to calculate an identifier, not for security purposes. + Adding usedforsecurity=False for compatibility with FIPS environments. + """ hash_input = f"{document_id}:{chunk_text}".encode() - return str(uuid.UUID(hashlib.md5(hash_input).hexdigest())) + return str(uuid.UUID(hashlib.md5(hash_input, usedforsecurity=False).hexdigest())) diff --git a/llama_stack/templates/bedrock/run.yaml b/llama_stack/templates/bedrock/run.yaml index f12c5bec5..068278c66 100644 --- a/llama_stack/templates/bedrock/run.yaml +++ b/llama_stack/templates/bedrock/run.yaml @@ -21,7 +21,6 @@ providers: config: kvstore: type: sqlite - namespace: null db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/bedrock}/faiss_store.db safety: - provider_id: bedrock @@ -33,7 +32,6 @@ providers: config: persistence_store: type: sqlite - namespace: null db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/bedrock}/agents_store.db responses_store: type: sqlite @@ -51,7 +49,6 @@ providers: config: kvstore: type: sqlite - namespace: null db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/bedrock}/meta_reference_eval.db datasetio: - provider_id: huggingface @@ -59,14 +56,12 @@ providers: config: kvstore: type: sqlite - namespace: null db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/bedrock}/huggingface_datasetio.db - provider_id: localfs provider_type: inline::localfs config: kvstore: type: sqlite - namespace: null db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/bedrock}/localfs_datasetio.db scoring: - provider_id: basic diff --git a/llama_stack/templates/cerebras/run.yaml b/llama_stack/templates/cerebras/run.yaml index c3877ddce..305e9a20f 100644 --- a/llama_stack/templates/cerebras/run.yaml +++ b/llama_stack/templates/cerebras/run.yaml @@ -31,7 +31,6 @@ providers: config: kvstore: type: sqlite - namespace: null db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/cerebras}/faiss_store.db agents: - provider_id: meta-reference @@ -39,7 +38,6 @@ providers: config: persistence_store: type: sqlite - namespace: null db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/cerebras}/agents_store.db responses_store: type: sqlite @@ -50,7 +48,6 @@ providers: config: kvstore: type: sqlite - namespace: null db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/cerebras}/meta_reference_eval.db datasetio: - provider_id: huggingface @@ -58,14 +55,12 @@ providers: config: kvstore: type: sqlite - namespace: null db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/cerebras}/huggingface_datasetio.db - provider_id: localfs provider_type: inline::localfs config: kvstore: type: sqlite - namespace: null db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/cerebras}/localfs_datasetio.db scoring: - provider_id: basic diff --git a/llama_stack/templates/ci-tests/run.yaml b/llama_stack/templates/ci-tests/run.yaml index a38d09324..5a68af3e6 100644 --- a/llama_stack/templates/ci-tests/run.yaml +++ b/llama_stack/templates/ci-tests/run.yaml @@ -36,7 +36,6 @@ providers: config: persistence_store: type: sqlite - namespace: null db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ci-tests}/agents_store.db responses_store: type: sqlite @@ -54,7 +53,6 @@ providers: config: kvstore: type: sqlite - namespace: null db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ci-tests}/meta_reference_eval.db datasetio: - provider_id: huggingface @@ -62,14 +60,12 @@ providers: config: kvstore: type: sqlite - namespace: null db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ci-tests}/huggingface_datasetio.db - provider_id: localfs provider_type: inline::localfs config: kvstore: type: sqlite - namespace: null db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ci-tests}/localfs_datasetio.db scoring: - provider_id: basic diff --git a/llama_stack/templates/dell/run-with-safety.yaml b/llama_stack/templates/dell/run-with-safety.yaml index 48639c772..1e1ef1ea9 100644 --- a/llama_stack/templates/dell/run-with-safety.yaml +++ b/llama_stack/templates/dell/run-with-safety.yaml @@ -39,7 +39,6 @@ providers: config: persistence_store: type: sqlite - namespace: null db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/dell}/agents_store.db responses_store: type: sqlite @@ -57,7 +56,6 @@ providers: config: kvstore: type: sqlite - namespace: null db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/dell}/meta_reference_eval.db datasetio: - provider_id: huggingface @@ -65,14 +63,12 @@ providers: config: kvstore: type: sqlite - namespace: null db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/dell}/huggingface_datasetio.db - provider_id: localfs provider_type: inline::localfs config: kvstore: type: sqlite - namespace: null db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/dell}/localfs_datasetio.db scoring: - provider_id: basic diff --git a/llama_stack/templates/dell/run.yaml b/llama_stack/templates/dell/run.yaml index 13d43530b..6f5c56dd3 100644 --- a/llama_stack/templates/dell/run.yaml +++ b/llama_stack/templates/dell/run.yaml @@ -35,7 +35,6 @@ providers: config: persistence_store: type: sqlite - namespace: null db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/dell}/agents_store.db responses_store: type: sqlite @@ -53,7 +52,6 @@ providers: config: kvstore: type: sqlite - namespace: null db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/dell}/meta_reference_eval.db datasetio: - provider_id: huggingface @@ -61,14 +59,12 @@ providers: config: kvstore: type: sqlite - namespace: null db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/dell}/huggingface_datasetio.db - provider_id: localfs provider_type: inline::localfs config: kvstore: type: sqlite - namespace: null db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/dell}/localfs_datasetio.db scoring: - provider_id: basic diff --git a/llama_stack/templates/fireworks/run-with-safety.yaml b/llama_stack/templates/fireworks/run-with-safety.yaml index ecb53a18d..1233e2271 100644 --- a/llama_stack/templates/fireworks/run-with-safety.yaml +++ b/llama_stack/templates/fireworks/run-with-safety.yaml @@ -27,7 +27,6 @@ providers: config: kvstore: type: sqlite - namespace: null db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/fireworks}/faiss_store.db safety: - provider_id: llama-guard @@ -45,7 +44,6 @@ providers: config: persistence_store: type: sqlite - namespace: null db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/fireworks}/agents_store.db responses_store: type: sqlite @@ -63,7 +61,6 @@ providers: config: kvstore: type: sqlite - namespace: null db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/fireworks}/meta_reference_eval.db datasetio: - provider_id: huggingface @@ -71,14 +68,12 @@ providers: config: kvstore: type: sqlite - namespace: null db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/fireworks}/huggingface_datasetio.db - provider_id: localfs provider_type: inline::localfs config: kvstore: type: sqlite - namespace: null db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/fireworks}/localfs_datasetio.db scoring: - provider_id: basic diff --git a/llama_stack/templates/fireworks/run.yaml b/llama_stack/templates/fireworks/run.yaml index 298d28d52..7f0bc49f5 100644 --- a/llama_stack/templates/fireworks/run.yaml +++ b/llama_stack/templates/fireworks/run.yaml @@ -27,7 +27,6 @@ providers: config: kvstore: type: sqlite - namespace: null db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/fireworks}/faiss_store.db safety: - provider_id: llama-guard @@ -40,7 +39,6 @@ providers: config: persistence_store: type: sqlite - namespace: null db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/fireworks}/agents_store.db responses_store: type: sqlite @@ -58,7 +56,6 @@ providers: config: kvstore: type: sqlite - namespace: null db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/fireworks}/meta_reference_eval.db datasetio: - provider_id: huggingface @@ -66,14 +63,12 @@ providers: config: kvstore: type: sqlite - namespace: null db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/fireworks}/huggingface_datasetio.db - provider_id: localfs provider_type: inline::localfs config: kvstore: type: sqlite - namespace: null db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/fireworks}/localfs_datasetio.db scoring: - provider_id: basic diff --git a/llama_stack/templates/groq/run.yaml b/llama_stack/templates/groq/run.yaml index 13bb65ed2..351ca74f7 100644 --- a/llama_stack/templates/groq/run.yaml +++ b/llama_stack/templates/groq/run.yaml @@ -26,7 +26,6 @@ providers: config: kvstore: type: sqlite - namespace: null db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/groq}/faiss_store.db safety: - provider_id: llama-guard @@ -39,7 +38,6 @@ providers: config: persistence_store: type: sqlite - namespace: null db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/groq}/agents_store.db responses_store: type: sqlite @@ -57,7 +55,6 @@ providers: config: kvstore: type: sqlite - namespace: null db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/groq}/meta_reference_eval.db datasetio: - provider_id: huggingface @@ -65,14 +62,12 @@ providers: config: kvstore: type: sqlite - namespace: null db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/groq}/huggingface_datasetio.db - provider_id: localfs provider_type: inline::localfs config: kvstore: type: sqlite - namespace: null db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/groq}/localfs_datasetio.db scoring: - provider_id: basic diff --git a/llama_stack/templates/hf-endpoint/run-with-safety.yaml b/llama_stack/templates/hf-endpoint/run-with-safety.yaml index b2bc6a8e9..63063ad91 100644 --- a/llama_stack/templates/hf-endpoint/run-with-safety.yaml +++ b/llama_stack/templates/hf-endpoint/run-with-safety.yaml @@ -31,7 +31,6 @@ providers: config: kvstore: type: sqlite - namespace: null db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/hf-endpoint}/faiss_store.db safety: - provider_id: llama-guard @@ -44,7 +43,6 @@ providers: config: persistence_store: type: sqlite - namespace: null db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/hf-endpoint}/agents_store.db responses_store: type: sqlite @@ -62,7 +60,6 @@ providers: config: kvstore: type: sqlite - namespace: null db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/hf-endpoint}/meta_reference_eval.db datasetio: - provider_id: huggingface @@ -70,14 +67,12 @@ providers: config: kvstore: type: sqlite - namespace: null db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/hf-endpoint}/huggingface_datasetio.db - provider_id: localfs provider_type: inline::localfs config: kvstore: type: sqlite - namespace: null db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/hf-endpoint}/localfs_datasetio.db scoring: - provider_id: basic diff --git a/llama_stack/templates/hf-endpoint/run.yaml b/llama_stack/templates/hf-endpoint/run.yaml index d62921ccc..4caf0db04 100644 --- a/llama_stack/templates/hf-endpoint/run.yaml +++ b/llama_stack/templates/hf-endpoint/run.yaml @@ -26,7 +26,6 @@ providers: config: kvstore: type: sqlite - namespace: null db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/hf-endpoint}/faiss_store.db safety: - provider_id: llama-guard @@ -39,7 +38,6 @@ providers: config: persistence_store: type: sqlite - namespace: null db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/hf-endpoint}/agents_store.db responses_store: type: sqlite @@ -57,7 +55,6 @@ providers: config: kvstore: type: sqlite - namespace: null db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/hf-endpoint}/meta_reference_eval.db datasetio: - provider_id: huggingface @@ -65,14 +62,12 @@ providers: config: kvstore: type: sqlite - namespace: null db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/hf-endpoint}/huggingface_datasetio.db - provider_id: localfs provider_type: inline::localfs config: kvstore: type: sqlite - namespace: null db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/hf-endpoint}/localfs_datasetio.db scoring: - provider_id: basic diff --git a/llama_stack/templates/hf-serverless/run-with-safety.yaml b/llama_stack/templates/hf-serverless/run-with-safety.yaml index d7ff4f446..a4bba1f76 100644 --- a/llama_stack/templates/hf-serverless/run-with-safety.yaml +++ b/llama_stack/templates/hf-serverless/run-with-safety.yaml @@ -31,7 +31,6 @@ providers: config: kvstore: type: sqlite - namespace: null db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/hf-serverless}/faiss_store.db safety: - provider_id: llama-guard @@ -44,7 +43,6 @@ providers: config: persistence_store: type: sqlite - namespace: null db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/hf-serverless}/agents_store.db responses_store: type: sqlite @@ -62,7 +60,6 @@ providers: config: kvstore: type: sqlite - namespace: null db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/hf-serverless}/meta_reference_eval.db datasetio: - provider_id: huggingface @@ -70,14 +67,12 @@ providers: config: kvstore: type: sqlite - namespace: null db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/hf-serverless}/huggingface_datasetio.db - provider_id: localfs provider_type: inline::localfs config: kvstore: type: sqlite - namespace: null db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/hf-serverless}/localfs_datasetio.db scoring: - provider_id: basic diff --git a/llama_stack/templates/hf-serverless/run.yaml b/llama_stack/templates/hf-serverless/run.yaml index 19484cba6..23e4c1f28 100644 --- a/llama_stack/templates/hf-serverless/run.yaml +++ b/llama_stack/templates/hf-serverless/run.yaml @@ -26,7 +26,6 @@ providers: config: kvstore: type: sqlite - namespace: null db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/hf-serverless}/faiss_store.db safety: - provider_id: llama-guard @@ -39,7 +38,6 @@ providers: config: persistence_store: type: sqlite - namespace: null db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/hf-serverless}/agents_store.db responses_store: type: sqlite @@ -57,7 +55,6 @@ providers: config: kvstore: type: sqlite - namespace: null db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/hf-serverless}/meta_reference_eval.db datasetio: - provider_id: huggingface @@ -65,14 +62,12 @@ providers: config: kvstore: type: sqlite - namespace: null db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/hf-serverless}/huggingface_datasetio.db - provider_id: localfs provider_type: inline::localfs config: kvstore: type: sqlite - namespace: null db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/hf-serverless}/localfs_datasetio.db scoring: - provider_id: basic diff --git a/llama_stack/templates/llama_api/run.yaml b/llama_stack/templates/llama_api/run.yaml index 3bfb284a3..77bbcfbc8 100644 --- a/llama_stack/templates/llama_api/run.yaml +++ b/llama_stack/templates/llama_api/run.yaml @@ -48,7 +48,6 @@ providers: config: persistence_store: type: sqlite - namespace: null db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/llama_api}/agents_store.db responses_store: type: sqlite @@ -66,7 +65,6 @@ providers: config: kvstore: type: sqlite - namespace: null db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/llama_api}/meta_reference_eval.db datasetio: - provider_id: huggingface @@ -74,14 +72,12 @@ providers: config: kvstore: type: sqlite - namespace: null db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/llama_api}/huggingface_datasetio.db - provider_id: localfs provider_type: inline::localfs config: kvstore: type: sqlite - namespace: null db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/llama_api}/localfs_datasetio.db scoring: - provider_id: basic diff --git a/llama_stack/templates/meta-reference-gpu/run-with-safety.yaml b/llama_stack/templates/meta-reference-gpu/run-with-safety.yaml index 46b3a33a6..2f5ee4062 100644 --- a/llama_stack/templates/meta-reference-gpu/run-with-safety.yaml +++ b/llama_stack/templates/meta-reference-gpu/run-with-safety.yaml @@ -41,7 +41,6 @@ providers: config: kvstore: type: sqlite - namespace: null db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/meta-reference-gpu}/faiss_store.db safety: - provider_id: llama-guard @@ -54,7 +53,6 @@ providers: config: persistence_store: type: sqlite - namespace: null db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/meta-reference-gpu}/agents_store.db responses_store: type: sqlite @@ -72,7 +70,6 @@ providers: config: kvstore: type: sqlite - namespace: null db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/meta-reference-gpu}/meta_reference_eval.db datasetio: - provider_id: huggingface @@ -80,14 +77,12 @@ providers: config: kvstore: type: sqlite - namespace: null db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/meta-reference-gpu}/huggingface_datasetio.db - provider_id: localfs provider_type: inline::localfs config: kvstore: type: sqlite - namespace: null db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/meta-reference-gpu}/localfs_datasetio.db scoring: - provider_id: basic diff --git a/llama_stack/templates/meta-reference-gpu/run.yaml b/llama_stack/templates/meta-reference-gpu/run.yaml index 033ec245a..cc119bf4d 100644 --- a/llama_stack/templates/meta-reference-gpu/run.yaml +++ b/llama_stack/templates/meta-reference-gpu/run.yaml @@ -31,7 +31,6 @@ providers: config: kvstore: type: sqlite - namespace: null db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/meta-reference-gpu}/faiss_store.db safety: - provider_id: llama-guard @@ -44,7 +43,6 @@ providers: config: persistence_store: type: sqlite - namespace: null db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/meta-reference-gpu}/agents_store.db responses_store: type: sqlite @@ -62,7 +60,6 @@ providers: config: kvstore: type: sqlite - namespace: null db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/meta-reference-gpu}/meta_reference_eval.db datasetio: - provider_id: huggingface @@ -70,14 +67,12 @@ providers: config: kvstore: type: sqlite - namespace: null db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/meta-reference-gpu}/huggingface_datasetio.db - provider_id: localfs provider_type: inline::localfs config: kvstore: type: sqlite - namespace: null db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/meta-reference-gpu}/localfs_datasetio.db scoring: - provider_id: basic diff --git a/llama_stack/templates/nvidia/run-with-safety.yaml b/llama_stack/templates/nvidia/run-with-safety.yaml index 73783be98..7dcfd196d 100644 --- a/llama_stack/templates/nvidia/run-with-safety.yaml +++ b/llama_stack/templates/nvidia/run-with-safety.yaml @@ -30,7 +30,6 @@ providers: config: kvstore: type: sqlite - namespace: null db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/nvidia}/faiss_store.db safety: - provider_id: nvidia @@ -44,7 +43,6 @@ providers: config: persistence_store: type: sqlite - namespace: null db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/nvidia}/agents_store.db responses_store: type: sqlite @@ -75,7 +73,6 @@ providers: config: kvstore: type: sqlite - namespace: null db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/nvidia}/localfs_datasetio.db - provider_id: nvidia provider_type: remote::nvidia diff --git a/llama_stack/templates/nvidia/run.yaml b/llama_stack/templates/nvidia/run.yaml index af9d5904a..f69270fb5 100644 --- a/llama_stack/templates/nvidia/run.yaml +++ b/llama_stack/templates/nvidia/run.yaml @@ -25,7 +25,6 @@ providers: config: kvstore: type: sqlite - namespace: null db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/nvidia}/faiss_store.db safety: - provider_id: nvidia @@ -39,7 +38,6 @@ providers: config: persistence_store: type: sqlite - namespace: null db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/nvidia}/agents_store.db responses_store: type: sqlite diff --git a/llama_stack/templates/ollama/run-with-safety.yaml b/llama_stack/templates/ollama/run-with-safety.yaml index bad51de09..98db5fc98 100644 --- a/llama_stack/templates/ollama/run-with-safety.yaml +++ b/llama_stack/templates/ollama/run-with-safety.yaml @@ -25,7 +25,6 @@ providers: config: kvstore: type: sqlite - namespace: null db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ollama}/faiss_store.db safety: - provider_id: llama-guard @@ -40,7 +39,6 @@ providers: config: persistence_store: type: sqlite - namespace: null db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ollama}/agents_store.db responses_store: type: sqlite @@ -58,7 +56,6 @@ providers: config: kvstore: type: sqlite - namespace: null db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ollama}/meta_reference_eval.db datasetio: - provider_id: huggingface @@ -66,14 +63,12 @@ providers: config: kvstore: type: sqlite - namespace: null db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ollama}/huggingface_datasetio.db - provider_id: localfs provider_type: inline::localfs config: kvstore: type: sqlite - namespace: null db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ollama}/localfs_datasetio.db scoring: - provider_id: basic diff --git a/llama_stack/templates/ollama/run.yaml b/llama_stack/templates/ollama/run.yaml index e1dea730e..38fb2bace 100644 --- a/llama_stack/templates/ollama/run.yaml +++ b/llama_stack/templates/ollama/run.yaml @@ -25,7 +25,6 @@ providers: config: kvstore: type: sqlite - namespace: null db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ollama}/faiss_store.db safety: - provider_id: llama-guard @@ -38,7 +37,6 @@ providers: config: persistence_store: type: sqlite - namespace: null db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ollama}/agents_store.db responses_store: type: sqlite @@ -56,7 +54,6 @@ providers: config: kvstore: type: sqlite - namespace: null db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ollama}/meta_reference_eval.db datasetio: - provider_id: huggingface @@ -64,14 +61,12 @@ providers: config: kvstore: type: sqlite - namespace: null db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ollama}/huggingface_datasetio.db - provider_id: localfs provider_type: inline::localfs config: kvstore: type: sqlite - namespace: null db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ollama}/localfs_datasetio.db scoring: - provider_id: basic diff --git a/llama_stack/templates/open-benchmark/run.yaml b/llama_stack/templates/open-benchmark/run.yaml index 57ae6b9be..7b1ef8f10 100644 --- a/llama_stack/templates/open-benchmark/run.yaml +++ b/llama_stack/templates/open-benchmark/run.yaml @@ -62,7 +62,6 @@ providers: config: persistence_store: type: sqlite - namespace: null db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/open-benchmark}/agents_store.db responses_store: type: sqlite @@ -80,7 +79,6 @@ providers: config: kvstore: type: sqlite - namespace: null db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/open-benchmark}/meta_reference_eval.db datasetio: - provider_id: huggingface @@ -88,14 +86,12 @@ providers: config: kvstore: type: sqlite - namespace: null db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/open-benchmark}/huggingface_datasetio.db - provider_id: localfs provider_type: inline::localfs config: kvstore: type: sqlite - namespace: null db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/open-benchmark}/localfs_datasetio.db scoring: - provider_id: basic diff --git a/llama_stack/templates/passthrough/run-with-safety.yaml b/llama_stack/templates/passthrough/run-with-safety.yaml index 7a30f665c..5cd8a2930 100644 --- a/llama_stack/templates/passthrough/run-with-safety.yaml +++ b/llama_stack/templates/passthrough/run-with-safety.yaml @@ -26,7 +26,6 @@ providers: config: kvstore: type: sqlite - namespace: null db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/passthrough}/faiss_store.db safety: - provider_id: llama-guard @@ -44,7 +43,6 @@ providers: config: persistence_store: type: sqlite - namespace: null db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/passthrough}/agents_store.db responses_store: type: sqlite @@ -62,7 +60,6 @@ providers: config: kvstore: type: sqlite - namespace: null db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/passthrough}/meta_reference_eval.db datasetio: - provider_id: huggingface @@ -70,14 +67,12 @@ providers: config: kvstore: type: sqlite - namespace: null db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/passthrough}/huggingface_datasetio.db - provider_id: localfs provider_type: inline::localfs config: kvstore: type: sqlite - namespace: null db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/passthrough}/localfs_datasetio.db scoring: - provider_id: basic diff --git a/llama_stack/templates/passthrough/run.yaml b/llama_stack/templates/passthrough/run.yaml index dc751ea20..5b6078953 100644 --- a/llama_stack/templates/passthrough/run.yaml +++ b/llama_stack/templates/passthrough/run.yaml @@ -26,7 +26,6 @@ providers: config: kvstore: type: sqlite - namespace: null db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/passthrough}/faiss_store.db safety: - provider_id: llama-guard @@ -39,7 +38,6 @@ providers: config: persistence_store: type: sqlite - namespace: null db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/passthrough}/agents_store.db responses_store: type: sqlite @@ -57,7 +55,6 @@ providers: config: kvstore: type: sqlite - namespace: null db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/passthrough}/meta_reference_eval.db datasetio: - provider_id: huggingface @@ -65,14 +62,12 @@ providers: config: kvstore: type: sqlite - namespace: null db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/passthrough}/huggingface_datasetio.db - provider_id: localfs provider_type: inline::localfs config: kvstore: type: sqlite - namespace: null db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/passthrough}/localfs_datasetio.db scoring: - provider_id: basic diff --git a/llama_stack/templates/postgres-demo/postgres_demo.py b/llama_stack/templates/postgres-demo/postgres_demo.py index 67ad35db7..ed69c22db 100644 --- a/llama_stack/templates/postgres-demo/postgres_demo.py +++ b/llama_stack/templates/postgres-demo/postgres_demo.py @@ -114,7 +114,7 @@ def get_distribution_template() -> DistributionTemplate: provider_id="meta-reference", provider_type="inline::meta-reference", config=dict( - service_name="${env.OTEL_SERVICE_NAME:=}", + service_name="${env.OTEL_SERVICE_NAME:=\u200b}", sinks="${env.TELEMETRY_SINKS:=console,otel_trace}", otel_trace_endpoint="${env.OTEL_TRACE_ENDPOINT:=http://localhost:4318/v1/traces}", ), diff --git a/llama_stack/templates/postgres-demo/run.yaml b/llama_stack/templates/postgres-demo/run.yaml index dd20cc6ac..2b6b1a64f 100644 --- a/llama_stack/templates/postgres-demo/run.yaml +++ b/llama_stack/templates/postgres-demo/run.yaml @@ -51,7 +51,7 @@ providers: - provider_id: meta-reference provider_type: inline::meta-reference config: - service_name: ${env.OTEL_SERVICE_NAME:=} + service_name: "${env.OTEL_SERVICE_NAME:=\u200B}" sinks: ${env.TELEMETRY_SINKS:=console,otel_trace} otel_trace_endpoint: ${env.OTEL_TRACE_ENDPOINT:=http://localhost:4318/v1/traces} tool_runtime: diff --git a/llama_stack/templates/remote-vllm/run-with-safety.yaml b/llama_stack/templates/remote-vllm/run-with-safety.yaml index 78fb22d38..a8d30904d 100644 --- a/llama_stack/templates/remote-vllm/run-with-safety.yaml +++ b/llama_stack/templates/remote-vllm/run-with-safety.yaml @@ -35,7 +35,6 @@ providers: config: kvstore: type: sqlite - namespace: null db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/remote-vllm}/faiss_store.db safety: - provider_id: llama-guard @@ -48,7 +47,6 @@ providers: config: persistence_store: type: sqlite - namespace: null db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/remote-vllm}/agents_store.db responses_store: type: sqlite @@ -59,7 +57,6 @@ providers: config: kvstore: type: sqlite - namespace: null db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/remote-vllm}/meta_reference_eval.db datasetio: - provider_id: huggingface @@ -67,14 +64,12 @@ providers: config: kvstore: type: sqlite - namespace: null db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/remote-vllm}/huggingface_datasetio.db - provider_id: localfs provider_type: inline::localfs config: kvstore: type: sqlite - namespace: null db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/remote-vllm}/localfs_datasetio.db scoring: - provider_id: basic diff --git a/llama_stack/templates/remote-vllm/run.yaml b/llama_stack/templates/remote-vllm/run.yaml index 1cc4596f3..58c4f867d 100644 --- a/llama_stack/templates/remote-vllm/run.yaml +++ b/llama_stack/templates/remote-vllm/run.yaml @@ -28,7 +28,6 @@ providers: config: kvstore: type: sqlite - namespace: null db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/remote-vllm}/faiss_store.db safety: - provider_id: llama-guard @@ -41,7 +40,6 @@ providers: config: persistence_store: type: sqlite - namespace: null db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/remote-vllm}/agents_store.db responses_store: type: sqlite @@ -52,7 +50,6 @@ providers: config: kvstore: type: sqlite - namespace: null db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/remote-vllm}/meta_reference_eval.db datasetio: - provider_id: huggingface @@ -60,14 +57,12 @@ providers: config: kvstore: type: sqlite - namespace: null db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/remote-vllm}/huggingface_datasetio.db - provider_id: localfs provider_type: inline::localfs config: kvstore: type: sqlite - namespace: null db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/remote-vllm}/localfs_datasetio.db scoring: - provider_id: basic diff --git a/llama_stack/templates/sambanova/run.yaml b/llama_stack/templates/sambanova/run.yaml index 6163a58b3..ab6c70ae0 100644 --- a/llama_stack/templates/sambanova/run.yaml +++ b/llama_stack/templates/sambanova/run.yaml @@ -23,7 +23,6 @@ providers: config: kvstore: type: sqlite - namespace: null db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/sambanova}/faiss_store.db - provider_id: ${env.ENABLE_CHROMADB:+chromadb} provider_type: remote::chromadb @@ -49,7 +48,6 @@ providers: config: persistence_store: type: sqlite - namespace: null db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/sambanova}/agents_store.db responses_store: type: sqlite diff --git a/llama_stack/templates/starter/run.yaml b/llama_stack/templates/starter/run.yaml index 190030690..de8d35683 100644 --- a/llama_stack/templates/starter/run.yaml +++ b/llama_stack/templates/starter/run.yaml @@ -66,7 +66,6 @@ providers: config: kvstore: type: sqlite - namespace: null db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/faiss_store.db - provider_id: ${env.ENABLE_SQLITE_VEC:+sqlite-vec} provider_type: inline::sqlite-vec @@ -78,7 +77,6 @@ providers: db_path: ${env.MILVUS_DB_PATH:=~/.llama/distributions/starter}/milvus.db kvstore: type: sqlite - namespace: null db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/milvus_registry.db - provider_id: ${env.ENABLE_CHROMADB:+chromadb} provider_type: remote::chromadb @@ -111,7 +109,6 @@ providers: config: persistence_store: type: sqlite - namespace: null db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/agents_store.db responses_store: type: sqlite @@ -129,7 +126,6 @@ providers: config: kvstore: type: sqlite - namespace: null db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/meta_reference_eval.db datasetio: - provider_id: huggingface @@ -137,14 +133,12 @@ providers: config: kvstore: type: sqlite - namespace: null db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/huggingface_datasetio.db - provider_id: localfs provider_type: inline::localfs config: kvstore: type: sqlite - namespace: null db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/localfs_datasetio.db scoring: - provider_id: basic diff --git a/llama_stack/templates/starter/starter.py b/llama_stack/templates/starter/starter.py index 7914d4298..2a982bb62 100644 --- a/llama_stack/templates/starter/starter.py +++ b/llama_stack/templates/starter/starter.py @@ -234,7 +234,6 @@ def get_distribution_template() -> DistributionTemplate: default_models = get_model_registry(available_models) - postgres_store = PostgresSqlStoreConfig.sample_run_config() return DistributionTemplate( name=name, distro_type="self_hosted", @@ -243,7 +242,7 @@ def get_distribution_template() -> DistributionTemplate: template_path=None, providers=providers, available_models_by_provider=available_models, - additional_pip_packages=postgres_store.pip_packages, + additional_pip_packages=PostgresSqlStoreConfig.pip_packages(), run_configs={ "run.yaml": RunConfigSettings( provider_overrides={ diff --git a/llama_stack/templates/template.py b/llama_stack/templates/template.py index 7badff140..dceb13c8b 100644 --- a/llama_stack/templates/template.py +++ b/llama_stack/templates/template.py @@ -15,6 +15,7 @@ from pydantic import BaseModel, Field from llama_stack.apis.datasets import DatasetPurpose from llama_stack.apis.models import ModelType from llama_stack.distribution.datatypes import ( + LLAMA_STACK_RUN_CONFIG_VERSION, Api, BenchmarkInput, BuildConfig, @@ -23,14 +24,15 @@ from llama_stack.distribution.datatypes import ( ModelInput, Provider, ShieldInput, - StackRunConfig, ToolGroupInput, ) from llama_stack.distribution.distribution import get_provider_registry from llama_stack.distribution.utils.dynamic import instantiate_class_type from llama_stack.providers.utils.inference.model_registry import ProviderModelEntry -from llama_stack.providers.utils.kvstore.config import KVStoreConfig, SqliteKVStoreConfig -from llama_stack.providers.utils.sqlstore.sqlstore import SqliteSqlStoreConfig, SqlStoreConfig +from llama_stack.providers.utils.kvstore.config import SqliteKVStoreConfig +from llama_stack.providers.utils.kvstore.config import get_pip_packages as get_kv_pip_packages +from llama_stack.providers.utils.sqlstore.sqlstore import SqliteSqlStoreConfig +from llama_stack.providers.utils.sqlstore.sqlstore import get_pip_packages as get_sql_pip_packages def get_model_registry( @@ -87,21 +89,24 @@ class RunConfigSettings(BaseModel): default_tool_groups: list[ToolGroupInput] | None = None default_datasets: list[DatasetInput] | None = None default_benchmarks: list[BenchmarkInput] | None = None - metadata_store: KVStoreConfig | None = None - inference_store: SqlStoreConfig | None = None + metadata_store: dict | None = None + inference_store: dict | None = None def run_config( self, name: str, providers: dict[str, list[str]], container_image: str | None = None, - ) -> StackRunConfig: + ) -> dict: provider_registry = get_provider_registry() provider_configs = {} for api_str, provider_types in providers.items(): if api_providers := self.provider_overrides.get(api_str): - provider_configs[api_str] = api_providers + # Convert Provider objects to dicts for YAML serialization + provider_configs[api_str] = [ + p.model_dump(exclude_none=True) if isinstance(p, Provider) else p for p in api_providers + ] continue provider_configs[api_str] = [] @@ -128,33 +133,40 @@ class RunConfigSettings(BaseModel): provider_id=provider_id, provider_type=provider_type, config=config, - ) + ).model_dump(exclude_none=True) ) # Get unique set of APIs from providers apis = sorted(providers.keys()) - return StackRunConfig( - image_name=name, - container_image=container_image, - apis=apis, - providers=provider_configs, - metadata_store=self.metadata_store + # Return a dict that matches StackRunConfig structure + return { + "version": LLAMA_STACK_RUN_CONFIG_VERSION, + "image_name": name, + "container_image": container_image, + "apis": apis, + "providers": provider_configs, + "metadata_store": self.metadata_store or SqliteKVStoreConfig.sample_run_config( __distro_dir__=f"~/.llama/distributions/{name}", db_name="registry.db", ), - inference_store=self.inference_store + "inference_store": self.inference_store or SqliteSqlStoreConfig.sample_run_config( __distro_dir__=f"~/.llama/distributions/{name}", db_name="inference_store.db", ), - models=self.default_models or [], - shields=self.default_shields or [], - tool_groups=self.default_tool_groups or [], - datasets=self.default_datasets or [], - benchmarks=self.default_benchmarks or [], - ) + "models": [m.model_dump(exclude_none=True) for m in (self.default_models or [])], + "shields": [s.model_dump(exclude_none=True) for s in (self.default_shields or [])], + "vector_dbs": [], + "datasets": [d.model_dump(exclude_none=True) for d in (self.default_datasets or [])], + "scoring_fns": [], + "benchmarks": [b.model_dump(exclude_none=True) for b in (self.default_benchmarks or [])], + "tool_groups": [t.model_dump(exclude_none=True) for t in (self.default_tool_groups or [])], + "server": { + "port": 8321, + }, + } class DistributionTemplate(BaseModel): @@ -190,10 +202,12 @@ class DistributionTemplate(BaseModel): # TODO: This is a hack to get the dependencies for internal APIs into build # We should have a better way to do this by formalizing the concept of "internal" APIs # and providers, with a way to specify dependencies for them. - if run_config_.inference_store: - additional_pip_packages.extend(run_config_.inference_store.pip_packages) - if run_config_.metadata_store: - additional_pip_packages.extend(run_config_.metadata_store.pip_packages) + + if run_config_.get("inference_store"): + additional_pip_packages.extend(get_sql_pip_packages(run_config_["inference_store"])) + + if run_config_.get("metadata_store"): + additional_pip_packages.extend(get_kv_pip_packages(run_config_["metadata_store"])) if self.additional_pip_packages: additional_pip_packages.extend(self.additional_pip_packages) @@ -286,7 +300,7 @@ class DistributionTemplate(BaseModel): run_config = settings.run_config(self.name, self.providers, self.container_image) with open(yaml_output_dir / yaml_pth, "w") as f: yaml.safe_dump( - run_config.model_dump(exclude_none=True), + {k: v for k, v in run_config.items() if v is not None}, f, sort_keys=False, ) diff --git a/llama_stack/templates/tgi/run-with-safety.yaml b/llama_stack/templates/tgi/run-with-safety.yaml index c4f9ae7ef..c19b916d5 100644 --- a/llama_stack/templates/tgi/run-with-safety.yaml +++ b/llama_stack/templates/tgi/run-with-safety.yaml @@ -26,7 +26,6 @@ providers: config: kvstore: type: sqlite - namespace: null db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/tgi}/faiss_store.db safety: - provider_id: llama-guard @@ -39,7 +38,6 @@ providers: config: persistence_store: type: sqlite - namespace: null db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/tgi}/agents_store.db responses_store: type: sqlite @@ -57,7 +55,6 @@ providers: config: kvstore: type: sqlite - namespace: null db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/tgi}/meta_reference_eval.db datasetio: - provider_id: huggingface @@ -65,14 +62,12 @@ providers: config: kvstore: type: sqlite - namespace: null db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/tgi}/huggingface_datasetio.db - provider_id: localfs provider_type: inline::localfs config: kvstore: type: sqlite - namespace: null db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/tgi}/localfs_datasetio.db scoring: - provider_id: basic diff --git a/llama_stack/templates/tgi/run.yaml b/llama_stack/templates/tgi/run.yaml index 70e5872b3..f0197d74c 100644 --- a/llama_stack/templates/tgi/run.yaml +++ b/llama_stack/templates/tgi/run.yaml @@ -25,7 +25,6 @@ providers: config: kvstore: type: sqlite - namespace: null db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/tgi}/faiss_store.db safety: - provider_id: llama-guard @@ -38,7 +37,6 @@ providers: config: persistence_store: type: sqlite - namespace: null db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/tgi}/agents_store.db responses_store: type: sqlite @@ -56,7 +54,6 @@ providers: config: kvstore: type: sqlite - namespace: null db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/tgi}/meta_reference_eval.db datasetio: - provider_id: huggingface @@ -64,14 +61,12 @@ providers: config: kvstore: type: sqlite - namespace: null db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/tgi}/huggingface_datasetio.db - provider_id: localfs provider_type: inline::localfs config: kvstore: type: sqlite - namespace: null db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/tgi}/localfs_datasetio.db scoring: - provider_id: basic diff --git a/llama_stack/templates/together/run-with-safety.yaml b/llama_stack/templates/together/run-with-safety.yaml index 14f423855..b32c9ee8d 100644 --- a/llama_stack/templates/together/run-with-safety.yaml +++ b/llama_stack/templates/together/run-with-safety.yaml @@ -26,7 +26,6 @@ providers: config: kvstore: type: sqlite - namespace: null db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/together}/faiss_store.db safety: - provider_id: llama-guard @@ -44,7 +43,6 @@ providers: config: persistence_store: type: sqlite - namespace: null db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/together}/agents_store.db responses_store: type: sqlite @@ -62,7 +60,6 @@ providers: config: kvstore: type: sqlite - namespace: null db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/together}/meta_reference_eval.db datasetio: - provider_id: huggingface @@ -70,14 +67,12 @@ providers: config: kvstore: type: sqlite - namespace: null db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/together}/huggingface_datasetio.db - provider_id: localfs provider_type: inline::localfs config: kvstore: type: sqlite - namespace: null db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/together}/localfs_datasetio.db scoring: - provider_id: basic diff --git a/llama_stack/templates/together/run.yaml b/llama_stack/templates/together/run.yaml index 38f1922c0..22c99f6cf 100644 --- a/llama_stack/templates/together/run.yaml +++ b/llama_stack/templates/together/run.yaml @@ -26,7 +26,6 @@ providers: config: kvstore: type: sqlite - namespace: null db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/together}/faiss_store.db safety: - provider_id: llama-guard @@ -39,7 +38,6 @@ providers: config: persistence_store: type: sqlite - namespace: null db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/together}/agents_store.db responses_store: type: sqlite @@ -57,7 +55,6 @@ providers: config: kvstore: type: sqlite - namespace: null db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/together}/meta_reference_eval.db datasetio: - provider_id: huggingface @@ -65,14 +62,12 @@ providers: config: kvstore: type: sqlite - namespace: null db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/together}/huggingface_datasetio.db - provider_id: localfs provider_type: inline::localfs config: kvstore: type: sqlite - namespace: null db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/together}/localfs_datasetio.db scoring: - provider_id: basic diff --git a/llama_stack/templates/vllm-gpu/run.yaml b/llama_stack/templates/vllm-gpu/run.yaml index 6854ad05c..6d122e180 100644 --- a/llama_stack/templates/vllm-gpu/run.yaml +++ b/llama_stack/templates/vllm-gpu/run.yaml @@ -30,7 +30,6 @@ providers: config: kvstore: type: sqlite - namespace: null db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/vllm-gpu}/faiss_store.db safety: - provider_id: llama-guard @@ -43,7 +42,6 @@ providers: config: persistence_store: type: sqlite - namespace: null db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/vllm-gpu}/agents_store.db responses_store: type: sqlite @@ -61,7 +59,6 @@ providers: config: kvstore: type: sqlite - namespace: null db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/vllm-gpu}/meta_reference_eval.db datasetio: - provider_id: huggingface @@ -69,14 +66,12 @@ providers: config: kvstore: type: sqlite - namespace: null db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/vllm-gpu}/huggingface_datasetio.db - provider_id: localfs provider_type: inline::localfs config: kvstore: type: sqlite - namespace: null db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/vllm-gpu}/localfs_datasetio.db scoring: - provider_id: basic diff --git a/llama_stack/templates/watsonx/run.yaml b/llama_stack/templates/watsonx/run.yaml index 8b8fc09c4..d80ee6329 100644 --- a/llama_stack/templates/watsonx/run.yaml +++ b/llama_stack/templates/watsonx/run.yaml @@ -27,7 +27,6 @@ providers: config: kvstore: type: sqlite - namespace: null db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/watsonx}/faiss_store.db safety: - provider_id: llama-guard @@ -40,7 +39,6 @@ providers: config: persistence_store: type: sqlite - namespace: null db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/watsonx}/agents_store.db responses_store: type: sqlite @@ -58,7 +56,6 @@ providers: config: kvstore: type: sqlite - namespace: null db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/watsonx}/meta_reference_eval.db datasetio: - provider_id: huggingface @@ -66,14 +63,12 @@ providers: config: kvstore: type: sqlite - namespace: null db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/watsonx}/huggingface_datasetio.db - provider_id: localfs provider_type: inline::localfs config: kvstore: type: sqlite - namespace: null db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/watsonx}/localfs_datasetio.db scoring: - provider_id: basic diff --git a/tests/integration/README.md b/tests/integration/README.md index 31d58c83f..fc8612139 100644 --- a/tests/integration/README.md +++ b/tests/integration/README.md @@ -9,7 +9,9 @@ pytest --help ``` Here are the most important options: -- `--stack-config`: specify the stack config to use. You have three ways to point to a stack: +- `--stack-config`: specify the stack config to use. You have four ways to point to a stack: + - **`server:`** - automatically start a server with the given config (e.g., `server:fireworks`). This provides one-step testing by auto-starting the server if the port is available, or reusing an existing server if already running. + - **`server::`** - same as above but with a custom port (e.g., `server:together:8322`) - a URL which points to a Llama Stack distribution server - a template (e.g., `fireworks`, `together`) or a path to a `run.yaml` file - a comma-separated list of api=provider pairs, e.g. `inference=fireworks,safety=llama-guard,agents=meta-reference`. This is most useful for testing a single API surface. @@ -26,12 +28,39 @@ Model parameters can be influenced by the following options: Each of these are comma-separated lists and can be used to generate multiple parameter combinations. Note that tests will be skipped if no model is specified. -Experimental, under development, options: -- `--record-responses`: record new API responses instead of using cached ones - - ## Examples +### Testing against a Server + +Run all text inference tests by auto-starting a server with the `fireworks` config: + +```bash +pytest -s -v tests/integration/inference/test_text_inference.py \ + --stack-config=server:fireworks \ + --text-model=meta-llama/Llama-3.1-8B-Instruct +``` + +Run tests with auto-server startup on a custom port: + +```bash +pytest -s -v tests/integration/inference/ \ + --stack-config=server:together:8322 \ + --text-model=meta-llama/Llama-3.1-8B-Instruct +``` + +Run multiple test suites with auto-server (eliminates manual server management): + +```bash +# Auto-start server and run all integration tests +export FIREWORKS_API_KEY= + +pytest -s -v tests/integration/inference/ tests/integration/safety/ tests/integration/agents/ \ + --stack-config=server:fireworks \ + --text-model=meta-llama/Llama-3.1-8B-Instruct +``` + +### Testing with Library Client + Run all text inference tests with the `together` distribution: ```bash diff --git a/tests/integration/fixtures/common.py b/tests/integration/fixtures/common.py index 8b6b3ddbe..ecd29484b 100644 --- a/tests/integration/fixtures/common.py +++ b/tests/integration/fixtures/common.py @@ -6,9 +6,13 @@ import inspect import os +import socket +import subprocess import tempfile +import time import pytest +import requests import yaml from llama_stack_client import LlamaStackClient from openai import OpenAI @@ -17,6 +21,60 @@ from llama_stack import LlamaStackAsLibraryClient from llama_stack.distribution.stack import run_config_from_adhoc_config_spec from llama_stack.env import get_env_or_fail +DEFAULT_PORT = 8321 + + +def is_port_available(port: int, host: str = "localhost") -> bool: + """Check if a port is available for binding.""" + try: + with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as sock: + sock.bind((host, port)) + return True + except OSError: + return False + + +def start_llama_stack_server(config_name: str) -> subprocess.Popen: + """Start a llama stack server with the given config.""" + cmd = ["llama", "stack", "run", config_name] + devnull = open(os.devnull, "w") + process = subprocess.Popen( + cmd, + stdout=devnull, # redirect stdout to devnull to prevent deadlock + stderr=devnull, # redirect stderr to devnull to prevent deadlock + text=True, + env={**os.environ, "LLAMA_STACK_LOG_FILE": "server.log"}, + ) + return process + + +def wait_for_server_ready(base_url: str, timeout: int = 30, process: subprocess.Popen | None = None) -> bool: + """Wait for the server to be ready by polling the health endpoint.""" + health_url = f"{base_url}/v1/health" + start_time = time.time() + + while time.time() - start_time < timeout: + if process and process.poll() is not None: + print(f"Server process terminated with return code: {process.returncode}") + return False + + try: + response = requests.get(health_url, timeout=5) + if response.status_code == 200: + return True + except (requests.exceptions.ConnectionError, requests.exceptions.Timeout): + pass + + # Print progress every 5 seconds + elapsed = time.time() - start_time + if int(elapsed) % 5 == 0 and elapsed > 0: + print(f"Waiting for server at {base_url}... ({elapsed:.1f}s elapsed)") + + time.sleep(0.5) + + print(f"Server failed to respond within {timeout} seconds") + return False + @pytest.fixture(scope="session") def provider_data(): @@ -122,6 +180,41 @@ def llama_stack_client(request, provider_data): if not config: raise ValueError("You must specify either --stack-config or LLAMA_STACK_CONFIG") + # Handle server: format or server:: + if config.startswith("server:"): + parts = config.split(":") + config_name = parts[1] + port = int(parts[2]) if len(parts) > 2 else int(os.environ.get("LLAMA_STACK_PORT", DEFAULT_PORT)) + base_url = f"http://localhost:{port}" + + # Check if port is available + if is_port_available(port): + print(f"Starting llama stack server with config '{config_name}' on port {port}...") + + # Start server + server_process = start_llama_stack_server(config_name) + + # Wait for server to be ready + if not wait_for_server_ready(base_url, timeout=30, process=server_process): + print("Server failed to start within timeout") + server_process.terminate() + raise RuntimeError( + f"Server failed to start within timeout. Check that config '{config_name}' exists and is valid. " + f"See server.log for details." + ) + + print(f"Server is ready at {base_url}") + + # Store process for potential cleanup (pytest will handle termination at session end) + request.session._llama_stack_server_process = server_process + else: + print(f"Port {port} is already in use, assuming server is already running...") + + return LlamaStackClient( + base_url=base_url, + provider_data=provider_data, + ) + # check if this looks like a URL if config.startswith("http") or "//" in config: return LlamaStackClient( @@ -151,3 +244,31 @@ def llama_stack_client(request, provider_data): def openai_client(client_with_models): base_url = f"{client_with_models.base_url}/v1/openai/v1" return OpenAI(base_url=base_url, api_key="fake") + + +@pytest.fixture(scope="session", autouse=True) +def cleanup_server_process(request): + """Cleanup server process at the end of the test session.""" + yield # Run tests + + if hasattr(request.session, "_llama_stack_server_process"): + server_process = request.session._llama_stack_server_process + if server_process: + if server_process.poll() is None: + print("Terminating llama stack server process...") + else: + print(f"Server process already terminated with return code: {server_process.returncode}") + return + try: + server_process.terminate() + server_process.wait(timeout=10) + print("Server process terminated gracefully") + except subprocess.TimeoutExpired: + print("Server process did not terminate gracefully, killing it") + server_process.kill() + server_process.wait() + print("Server process killed") + except Exception as e: + print(f"Error during server cleanup: {e}") + else: + print("Server process not found - won't be able to cleanup") diff --git a/tests/unit/fixtures.py b/tests/unit/fixtures.py index 7174d2e78..4e50c5e08 100644 --- a/tests/unit/fixtures.py +++ b/tests/unit/fixtures.py @@ -4,14 +4,14 @@ # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. -import pytest +import pytest_asyncio from llama_stack.distribution.store.registry import CachedDiskDistributionRegistry, DiskDistributionRegistry from llama_stack.providers.utils.kvstore.config import SqliteKVStoreConfig from llama_stack.providers.utils.kvstore.sqlite import SqliteKVStoreImpl -@pytest.fixture(scope="function") +@pytest_asyncio.fixture(scope="function") async def sqlite_kvstore(tmp_path): db_path = tmp_path / "test_kv.db" kvstore_config = SqliteKVStoreConfig(db_path=db_path.as_posix()) @@ -20,14 +20,14 @@ async def sqlite_kvstore(tmp_path): yield kvstore -@pytest.fixture(scope="function") +@pytest_asyncio.fixture(scope="function") async def disk_dist_registry(sqlite_kvstore): registry = DiskDistributionRegistry(sqlite_kvstore) await registry.initialize() yield registry -@pytest.fixture(scope="function") +@pytest_asyncio.fixture(scope="function") async def cached_disk_dist_registry(sqlite_kvstore): registry = CachedDiskDistributionRegistry(sqlite_kvstore) await registry.initialize() diff --git a/tests/unit/providers/agents/test_persistence_access_control.py b/tests/unit/providers/agents/test_persistence_access_control.py index d5b876a09..656d1e53c 100644 --- a/tests/unit/providers/agents/test_persistence_access_control.py +++ b/tests/unit/providers/agents/test_persistence_access_control.py @@ -9,6 +9,7 @@ from datetime import datetime from unittest.mock import patch import pytest +import pytest_asyncio from llama_stack.apis.agents import Turn from llama_stack.apis.inference import CompletionMessage, StopReason @@ -16,7 +17,7 @@ from llama_stack.distribution.datatypes import User from llama_stack.providers.inline.agents.meta_reference.persistence import AgentPersistence, AgentSessionInfo -@pytest.fixture +@pytest_asyncio.fixture async def test_setup(sqlite_kvstore): agent_persistence = AgentPersistence(agent_id="test_agent", kvstore=sqlite_kvstore, policy={}) yield agent_persistence diff --git a/tests/unit/providers/vector_io/test_sqlite_vec.py b/tests/unit/providers/vector_io/test_sqlite_vec.py index bbac717c7..5d9d92cf3 100644 --- a/tests/unit/providers/vector_io/test_sqlite_vec.py +++ b/tests/unit/providers/vector_io/test_sqlite_vec.py @@ -148,7 +148,7 @@ async def test_chunk_id_conflict(sqlite_vec_index, sample_chunks, embedding_dime assert len(chunk_ids) == len(set(chunk_ids)), "Duplicate chunk IDs detected across batches!" -@pytest.fixture(scope="session") +@pytest_asyncio.fixture(scope="session") async def sqlite_vec_adapter(sqlite_connection): config = type("Config", (object,), {"db_path": ":memory:"}) # Mock config with in-memory database adapter = SQLiteVecVectorIOAdapter(config=config, inference_api=None) diff --git a/tests/unit/server/test_access_control.py b/tests/unit/server/test_access_control.py index f9ad47b0c..af03ddacb 100644 --- a/tests/unit/server/test_access_control.py +++ b/tests/unit/server/test_access_control.py @@ -7,6 +7,7 @@ from unittest.mock import MagicMock, Mock, patch import pytest +import pytest_asyncio import yaml from pydantic import TypeAdapter, ValidationError @@ -26,7 +27,7 @@ def _return_model(model): return model -@pytest.fixture +@pytest_asyncio.fixture async def test_setup(cached_disk_dist_registry): mock_inference = Mock() mock_inference.__provider_spec__ = MagicMock() @@ -245,7 +246,7 @@ async def test_automatic_access_attributes(mock_get_authenticated_user, test_set assert model.identifier == "auto-access-model" -@pytest.fixture +@pytest_asyncio.fixture async def test_setup_with_access_policy(cached_disk_dist_registry): mock_inference = Mock() mock_inference.__provider_spec__ = MagicMock() diff --git a/tests/unit/server/test_server.py b/tests/unit/server/test_server.py new file mode 100644 index 000000000..d17d58b8a --- /dev/null +++ b/tests/unit/server/test_server.py @@ -0,0 +1,187 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + +from unittest.mock import Mock + +from fastapi import HTTPException +from openai import BadRequestError +from pydantic import ValidationError + +from llama_stack.distribution.access_control.access_control import AccessDeniedError +from llama_stack.distribution.datatypes import AuthenticationRequiredError +from llama_stack.distribution.server.server import translate_exception + + +class TestTranslateException: + """Test cases for the translate_exception function.""" + + def test_translate_access_denied_error(self): + """Test that AccessDeniedError is translated to 403 HTTP status.""" + exc = AccessDeniedError() + result = translate_exception(exc) + + assert isinstance(result, HTTPException) + assert result.status_code == 403 + assert result.detail == "Permission denied: Insufficient permissions" + + def test_translate_access_denied_error_with_context(self): + """Test that AccessDeniedError with context includes detailed information.""" + from llama_stack.distribution.datatypes import User + + # Create mock user and resource + user = User("test-user", {"roles": ["user"], "teams": ["dev"]}) + + # Create a simple mock object that implements the ProtectedResource protocol + class MockResource: + def __init__(self, type: str, identifier: str, owner=None): + self.type = type + self.identifier = identifier + self.owner = owner + + resource = MockResource("vector_db", "test-db") + + exc = AccessDeniedError("create", resource, user) + result = translate_exception(exc) + + assert isinstance(result, HTTPException) + assert result.status_code == 403 + assert "test-user" in result.detail + assert "vector_db::test-db" in result.detail + assert "create" in result.detail + assert "roles=['user']" in result.detail + assert "teams=['dev']" in result.detail + + def test_translate_permission_error(self): + """Test that PermissionError is translated to 403 HTTP status.""" + exc = PermissionError("Permission denied") + result = translate_exception(exc) + + assert isinstance(result, HTTPException) + assert result.status_code == 403 + assert result.detail == "Permission denied: Permission denied" + + def test_translate_value_error(self): + """Test that ValueError is translated to 400 HTTP status.""" + exc = ValueError("Invalid input") + result = translate_exception(exc) + + assert isinstance(result, HTTPException) + assert result.status_code == 400 + assert result.detail == "Invalid value: Invalid input" + + def test_translate_bad_request_error(self): + """Test that BadRequestError is translated to 400 HTTP status.""" + # Create a mock response for BadRequestError + mock_response = Mock() + mock_response.status_code = 400 + mock_response.headers = {} + + exc = BadRequestError("Bad request", response=mock_response, body="Bad request") + result = translate_exception(exc) + + assert isinstance(result, HTTPException) + assert result.status_code == 400 + assert result.detail == "Bad request" + + def test_translate_authentication_required_error(self): + """Test that AuthenticationRequiredError is translated to 401 HTTP status.""" + exc = AuthenticationRequiredError("Authentication required") + result = translate_exception(exc) + + assert isinstance(result, HTTPException) + assert result.status_code == 401 + assert result.detail == "Authentication required: Authentication required" + + def test_translate_timeout_error(self): + """Test that TimeoutError is translated to 504 HTTP status.""" + exc = TimeoutError("Operation timed out") + result = translate_exception(exc) + + assert isinstance(result, HTTPException) + assert result.status_code == 504 + assert result.detail == "Operation timed out: Operation timed out" + + def test_translate_asyncio_timeout_error(self): + """Test that asyncio.TimeoutError is translated to 504 HTTP status.""" + exc = TimeoutError() + result = translate_exception(exc) + + assert isinstance(result, HTTPException) + assert result.status_code == 504 + assert result.detail == "Operation timed out: " + + def test_translate_not_implemented_error(self): + """Test that NotImplementedError is translated to 501 HTTP status.""" + exc = NotImplementedError("Not implemented") + result = translate_exception(exc) + + assert isinstance(result, HTTPException) + assert result.status_code == 501 + assert result.detail == "Not implemented: Not implemented" + + def test_translate_validation_error(self): + """Test that ValidationError is translated to 400 HTTP status with proper format.""" + # Create a mock validation error using proper Pydantic error format + exc = ValidationError.from_exception_data( + "TestModel", + [ + { + "loc": ("field", "nested"), + "msg": "field required", + "type": "missing", + } + ], + ) + + result = translate_exception(exc) + + assert isinstance(result, HTTPException) + assert result.status_code == 400 + assert "errors" in result.detail + assert len(result.detail["errors"]) == 1 + assert result.detail["errors"][0]["loc"] == ["field", "nested"] + assert result.detail["errors"][0]["msg"] == "Field required" + assert result.detail["errors"][0]["type"] == "missing" + + def test_translate_generic_exception(self): + """Test that generic exceptions are translated to 500 HTTP status.""" + exc = Exception("Unexpected error") + result = translate_exception(exc) + + assert isinstance(result, HTTPException) + assert result.status_code == 500 + assert result.detail == "Internal server error: An unexpected error occurred." + + def test_translate_runtime_error(self): + """Test that RuntimeError is translated to 500 HTTP status.""" + exc = RuntimeError("Runtime error") + result = translate_exception(exc) + + assert isinstance(result, HTTPException) + assert result.status_code == 500 + assert result.detail == "Internal server error: An unexpected error occurred." + + def test_multiple_access_denied_scenarios(self): + """Test various scenarios that should result in 403 status codes.""" + # Test AccessDeniedError (uses enhanced message) + exc1 = AccessDeniedError() + result1 = translate_exception(exc1) + assert isinstance(result1, HTTPException) + assert result1.status_code == 403 + assert result1.detail == "Permission denied: Insufficient permissions" + + # Test PermissionError (uses generic message) + exc2 = PermissionError("No permission") + result2 = translate_exception(exc2) + assert isinstance(result2, HTTPException) + assert result2.status_code == 403 + assert result2.detail == "Permission denied: No permission" + + exc3 = PermissionError("Access denied") + result3 = translate_exception(exc3) + assert isinstance(result3, HTTPException) + assert result3.status_code == 403 + assert result3.detail == "Permission denied: Access denied"