diff --git a/.github/workflows/integration-tests.yml b/.github/workflows/integration-tests.yml index 32e221128..0dc7a9889 100644 --- a/.github/workflows/integration-tests.yml +++ b/.github/workflows/integration-tests.yml @@ -25,7 +25,7 @@ jobs: # Listing tests manually since some of them currently fail # TODO: generate matrix list from tests/integration when fixed test-type: [agents, inference, datasets, inspect, scoring, post_training, providers, tool_runtime, vector_io] - client-type: [library, http] + client-type: [library, server] python-version: ["3.12", "3.13"] fail-fast: false # we want to run all tests regardless of failure @@ -45,39 +45,6 @@ jobs: run: | uv run llama stack build --template ollama --image-type venv - - name: Start Llama Stack server in background - if: matrix.client-type == 'http' - env: - INFERENCE_MODEL: "meta-llama/Llama-3.2-3B-Instruct" - run: | - LLAMA_STACK_LOG_FILE=server.log nohup uv run llama stack run ./llama_stack/templates/ollama/run.yaml --image-type venv --env OLLAMA_URL="http://0.0.0.0:11434" & - - - name: Wait for Llama Stack server to be ready - if: matrix.client-type == 'http' - run: | - echo "Waiting for Llama Stack server..." - for i in {1..30}; do - if curl -s http://localhost:8321/v1/health | grep -q "OK"; then - echo "Llama Stack server is up!" - exit 0 - fi - sleep 1 - done - echo "Llama Stack server failed to start" - cat server.log - exit 1 - - - name: Verify Ollama status is OK - if: matrix.client-type == 'http' - run: | - echo "Verifying Ollama status..." - ollama_status=$(curl -s -L http://127.0.0.1:8321/v1/providers/ollama|jq --raw-output .health.status) - echo "Ollama status: $ollama_status" - if [ "$ollama_status" != "OK" ]; then - echo "Ollama health check failed" - exit 1 - fi - - name: Check Storage and Memory Available Before Tests if: ${{ always() }} run: | @@ -92,12 +59,14 @@ jobs: if [ "${{ matrix.client-type }}" == "library" ]; then stack_config="ollama" else - stack_config="http://localhost:8321" + stack_config="server:ollama" fi uv run pytest -s -v tests/integration/${{ matrix.test-type }} --stack-config=${stack_config} \ -k "not(builtin_tool or safety_with_image or code_interpreter or test_rag)" \ --text-model="meta-llama/Llama-3.2-3B-Instruct" \ - --embedding-model=all-MiniLM-L6-v2 + --embedding-model=all-MiniLM-L6-v2 \ + --color=yes \ + --capture=tee-sys | tee pytest-${{ matrix.test-type }}.log - name: Check Storage and Memory Available After Tests if: ${{ always() }} diff --git a/README.md b/README.md index 7f34c3340..3b5358ec2 100644 --- a/README.md +++ b/README.md @@ -35,6 +35,8 @@ pip install llama-stack-client ### CLI ```bash # Run a chat completion +MODEL="Llama-4-Scout-17B-16E-Instruct" + llama-stack-client --endpoint http://localhost:8321 \ inference chat-completion \ --model-id meta-llama/$MODEL \ @@ -106,46 +108,59 @@ By reducing friction and complexity, Llama Stack empowers developers to focus on ### API Providers Here is a list of the various API providers and available distributions that can help developers get started easily with Llama Stack. +Please checkout for [full list](https://llama-stack.readthedocs.io/en/latest/providers/index.html) -| **API Provider Builder** | **Environments** | **Agents** | **Inference** | **Memory** | **Safety** | **Telemetry** | **Post Training** | -|:------------------------:|:----------------------:|:----------:|:-------------:|:----------:|:----------:|:-------------:|:-----------------:| -| Meta Reference | Single Node | ✅ | ✅ | ✅ | ✅ | ✅ | | -| SambaNova | Hosted | | ✅ | | ✅ | | | -| Cerebras | Hosted | | ✅ | | | | | -| Fireworks | Hosted | ✅ | ✅ | ✅ | | | | -| AWS Bedrock | Hosted | | ✅ | | ✅ | | | -| Together | Hosted | ✅ | ✅ | | ✅ | | | -| Groq | Hosted | | ✅ | | | | | -| Ollama | Single Node | | ✅ | | | | | -| TGI | Hosted and Single Node | | ✅ | | | | | -| NVIDIA NIM | Hosted and Single Node | | ✅ | | | | | -| Chroma | Single Node | | | ✅ | | | | -| PG Vector | Single Node | | | ✅ | | | | -| PyTorch ExecuTorch | On-device iOS | ✅ | ✅ | | | | | -| vLLM | Hosted and Single Node | | ✅ | | | | | -| OpenAI | Hosted | | ✅ | | | | | -| Anthropic | Hosted | | ✅ | | | | | -| Gemini | Hosted | | ✅ | | | | | -| watsonx | Hosted | | ✅ | | | | | -| HuggingFace | Single Node | | | | | | ✅ | -| TorchTune | Single Node | | | | | | ✅ | -| NVIDIA NEMO | Hosted | | | | | | ✅ | +| API Provider Builder | Environments | Agents | Inference | VectorIO | Safety | Telemetry | Post Training | Eval | DatasetIO | +|:-------------------:|:------------:|:------:|:---------:|:--------:|:------:|:---------:|:-------------:|:----:|:--------:| +| Meta Reference | Single Node | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | +| SambaNova | Hosted | | ✅ | | ✅ | | | | | +| Cerebras | Hosted | | ✅ | | | | | | | +| Fireworks | Hosted | ✅ | ✅ | ✅ | | | | | | +| AWS Bedrock | Hosted | | ✅ | | ✅ | | | | | +| Together | Hosted | ✅ | ✅ | | ✅ | | | | | +| Groq | Hosted | | ✅ | | | | | | | +| Ollama | Single Node | | ✅ | | | | | | | +| TGI | Hosted/Single Node | | ✅ | | | | | | | +| NVIDIA NIM | Hosted/Single Node | | ✅ | | ✅ | | | | | +| ChromaDB | Hosted/Single Node | | | ✅ | | | | | | +| PG Vector | Single Node | | | ✅ | | | | | | +| PyTorch ExecuTorch | On-device iOS | ✅ | ✅ | | | | | | | +| vLLM | Single Node | | ✅ | | | | | | | +| OpenAI | Hosted | | ✅ | | | | | | | +| Anthropic | Hosted | | ✅ | | | | | | | +| Gemini | Hosted | | ✅ | | | | | | | +| WatsonX | Hosted | | ✅ | | | | | | | +| HuggingFace | Single Node | | | | | | ✅ | | ✅ | +| TorchTune | Single Node | | | | | | ✅ | | | +| NVIDIA NEMO | Hosted | | ✅ | ✅ | | | ✅ | ✅ | ✅ | +| NVIDIA | Hosted | | | | | | ✅ | ✅ | ✅ | +> **Note**: Additional providers are available through external packages. See [External Providers](https://llama-stack.readthedocs.io/en/latest/providers/external.html) documentation. ### Distributions -A Llama Stack Distribution (or "distro") is a pre-configured bundle of provider implementations for each API component. Distributions make it easy to get started with a specific deployment scenario - you can begin with a local development setup (eg. ollama) and seamlessly transition to production (eg. Fireworks) without changing your application code. Here are some of the distributions we support: +A Llama Stack Distribution (or "distro") is a pre-configured bundle of provider implementations for each API component. Distributions make it easy to get started with a specific deployment scenario - you can begin with a local development setup (eg. ollama) and seamlessly transition to production (eg. Fireworks) without changing your application code. +Here are some of the distributions we support: | **Distribution** | **Llama Stack Docker** | Start This Distribution | |:---------------------------------------------:|:-------------------------------------------------------------------------------------------------------------------------------------------------------------:|:------------------------------------------------------------------------------------------------------------------------:| | Meta Reference | [llamastack/distribution-meta-reference-gpu](https://hub.docker.com/repository/docker/llamastack/distribution-meta-reference-gpu/general) | [Guide](https://llama-stack.readthedocs.io/en/latest/distributions/self_hosted_distro/meta-reference-gpu.html) | -| SambaNova | [llamastack/distribution-sambanova](https://hub.docker.com/repository/docker/llamastack/distribution-sambanova/general) | [Guide](https://llama-stack.readthedocs.io/en/latest/distributions/self_hosted_distro/sambanova.html) | -| Cerebras | [llamastack/distribution-cerebras](https://hub.docker.com/repository/docker/llamastack/distribution-cerebras/general) | [Guide](https://llama-stack.readthedocs.io/en/latest/distributions/self_hosted_distro/cerebras.html) | +| TGI | [llamastack/distribution-tgi](https://hub.docker.com/repository/docker/llamastack/distribution-tgi/general) | [Guide](https://llama-stack.readthedocs.io/en/latest/distributions/self_hosted_distro/tgi.html) +| vLLM | [llamastack/distribution-remote-vllm](https://hub.docker.com/repository/docker/llamastack/distribution-remote-vllm/general) | [Guide](https://llama-stack.readthedocs.io/en/latest/distributions/self_hosted_distro/remote-vllm.html) +| Starter | [llamastack/distribution-starter](https://hub.docker.com/repository/docker/llamastack/distribution-starter/general) | | +| PostgreSQL | [llamastack/distribution-postgres-demo](https://hub.docker.com/repository/docker/llamastack/distribution-postgres-demo/general) | | + + +Here are the ones out of support scope but still avaiable from Dockerhub: + +| **Distribution** | **Llama Stack Docker** | Start This Distribution | +|:---------------------------------------------:|:-------------------------------------------------------------------------------------------------------------------------------------------------------------:|:------------------------------------------------------------------------------------------------------------------------:| | Ollama | [llamastack/distribution-ollama](https://hub.docker.com/repository/docker/llamastack/distribution-ollama/general) | [Guide](https://llama-stack.readthedocs.io/en/latest/distributions/self_hosted_distro/ollama.html) | -| TGI | [llamastack/distribution-tgi](https://hub.docker.com/repository/docker/llamastack/distribution-tgi/general) | [Guide](https://llama-stack.readthedocs.io/en/latest/distributions/self_hosted_distro/tgi.html) | | Together | [llamastack/distribution-together](https://hub.docker.com/repository/docker/llamastack/distribution-together/general) | [Guide](https://llama-stack.readthedocs.io/en/latest/distributions/self_hosted_distro/together.html) | | Fireworks | [llamastack/distribution-fireworks](https://hub.docker.com/repository/docker/llamastack/distribution-fireworks/general) | [Guide](https://llama-stack.readthedocs.io/en/latest/distributions/self_hosted_distro/fireworks.html) | -| vLLM | [llamastack/distribution-remote-vllm](https://hub.docker.com/repository/docker/llamastack/distribution-remote-vllm/general) | [Guide](https://llama-stack.readthedocs.io/en/latest/distributions/self_hosted_distro/remote-vllm.html) | +| AWS Bedrock | [llamastack/distribution-bedrock](https://hub.docker.com/repository/docker/llamastack/distribution-bedrock/general) | [Guide](https://llama-stack.readthedocs.io/en/latest/distributions/self_hosted_distro/bedrock.html) | +| SambaNova | [llamastack/distribution-sambanova](https://hub.docker.com/repository/docker/llamastack/distribution-sambanova/general) | [Guide](https://llama-stack.readthedocs.io/en/latest/distributions/self_hosted_distro/sambanova.html) | +| Cerebras | [llamastack/distribution-cerebras](https://hub.docker.com/repository/docker/llamastack/distribution-cerebras/general) | [Guide](https://llama-stack.readthedocs.io/en/latest/distributions/self_hosted_distro/cerebras.html) | | | | ### Documentation diff --git a/docs/getting_started.ipynb b/docs/getting_started.ipynb index cdaf074b8..88878c9be 100644 --- a/docs/getting_started.ipynb +++ b/docs/getting_started.ipynb @@ -17,7 +17,9 @@ "\n", "Read more about the project here: https://llama-stack.readthedocs.io/en/latest/index.html\n", "\n", - "In this guide, we will showcase how you can build LLM-powered agentic applications using Llama Stack.\n" + "In this guide, we will showcase how you can build LLM-powered agentic applications using Llama Stack.\n", + "\n", + "**💡 Quick Start Option:** If you want a simpler and faster way to test out Llama Stack, check out the [quick_start.ipynb](quick_start.ipynb) notebook instead. It provides a streamlined experience for getting up and running in just a few steps.\n" ] }, { diff --git a/docs/getting_started_llama4.ipynb b/docs/getting_started_llama4.ipynb index d489b5d06..edefda28c 100644 --- a/docs/getting_started_llama4.ipynb +++ b/docs/getting_started_llama4.ipynb @@ -17,7 +17,9 @@ "\n", "Read more about the project here: https://llama-stack.readthedocs.io/en/latest/index.html\n", "\n", - "In this guide, we will showcase how you can get started with using Llama 4 in Llama Stack.\n" + "In this guide, we will showcase how you can get started with using Llama 4 in Llama Stack.\n", + "\n", + "**💡 Quick Start Option:** If you want a simpler and faster way to test out Llama Stack, check out the [quick_start.ipynb](quick_start.ipynb) notebook instead. It provides a streamlined experience for getting up and running in just a few steps.\n" ] }, { diff --git a/docs/getting_started_llama_api.ipynb b/docs/getting_started_llama_api.ipynb index 128e9114a..e6c74986b 100644 --- a/docs/getting_started_llama_api.ipynb +++ b/docs/getting_started_llama_api.ipynb @@ -17,7 +17,9 @@ "\n", "Read more about the project here: https://llama-stack.readthedocs.io/en/latest/index.html\n", "\n", - "In this guide, we will showcase how you can get started with using Llama 4 in Llama Stack.\n" + "In this guide, we will showcase how you can get started with using Llama 4 in Llama Stack.\n", + "\n", + "**💡 Quick Start Option:** If you want a simpler and faster way to test out Llama Stack, check out the [quick_start.ipynb](quick_start.ipynb) notebook instead. It provides a streamlined experience for getting up and running in just a few steps.\n" ] }, { diff --git a/docs/quick_start.ipynb b/docs/quick_start.ipynb new file mode 100644 index 000000000..4ae1dbe8d --- /dev/null +++ b/docs/quick_start.ipynb @@ -0,0 +1,367 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "c1e7571c", + "metadata": { + "id": "c1e7571c" + }, + "source": [ + "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/meta-llama/llama-stack/blob/main/docs/getting_started.ipynb)\n", + "\n", + "# Llama Stack - Building AI Applications\n", + "\n", + "\"drawing\"\n", + "\n", + "Get started with Llama Stack in minutes!\n", + "\n", + "[Llama Stack](https://github.com/meta-llama/llama-stack) is a stateful service with REST APIs to support the seamless transition of AI applications across different environments. You can build and test using a local server first and deploy to a hosted endpoint for production.\n", + "\n", + "In this guide, we'll walk through how to build a RAG application locally using Llama Stack with [Ollama](https://ollama.com/)\n", + "as the inference [provider](docs/source/providers/index.md#inference) for a Llama Model.\n" + ] + }, + { + "cell_type": "markdown", + "id": "4CV1Q19BDMVw", + "metadata": { + "id": "4CV1Q19BDMVw" + }, + "source": [ + "## Step 1: Install and setup" + ] + }, + { + "cell_type": "markdown", + "id": "K4AvfUAJZOeS", + "metadata": { + "id": "K4AvfUAJZOeS" + }, + "source": [ + "### 1.1. Install uv and test inference with Ollama\n", + "\n", + "We'll install [uv](https://docs.astral.sh/uv/) to setup the Python virtual environment, along with [colab-xterm](https://github.com/InfuseAI/colab-xterm) for running command-line tools, and [Ollama](https://ollama.com/download) as the inference provider." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "7a2d7b85", + "metadata": {}, + "outputs": [], + "source": [ + "%pip install uv llama_stack llama-stack-client\n", + "\n", + "## If running on Collab:\n", + "# !pip install colab-xterm\n", + "# %load_ext colabxterm\n", + "\n", + "!curl https://ollama.ai/install.sh | sh" + ] + }, + { + "cell_type": "markdown", + "id": "39fa584b", + "metadata": {}, + "source": [ + "### 1.2. Test inference with Ollama" + ] + }, + { + "cell_type": "markdown", + "id": "3bf81522", + "metadata": {}, + "source": [ + "We’ll now launch a terminal and run inference on a Llama model with Ollama to verify that the model is working correctly." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a7e8e0f1", + "metadata": {}, + "outputs": [], + "source": [ + "## If running on Colab:\n", + "# %xterm\n", + "\n", + "## To be ran in the terminal:\n", + "# ollama serve &\n", + "# ollama run llama3.2:3b --keepalive 60m" + ] + }, + { + "cell_type": "markdown", + "id": "f3c5f243", + "metadata": {}, + "source": [ + "If successful, you should see the model respond to a prompt.\n", + "\n", + "...\n", + "```\n", + ">>> hi\n", + "Hello! How can I assist you today?\n", + "```" + ] + }, + { + "cell_type": "markdown", + "id": "oDUB7M_qe-Gs", + "metadata": { + "id": "oDUB7M_qe-Gs" + }, + "source": [ + "## Step 2: Run the Llama Stack server\n", + "\n", + "In this showcase, we will start a Llama Stack server that is running locally." + ] + }, + { + "cell_type": "markdown", + "id": "732eadc6", + "metadata": {}, + "source": [ + "### 2.1. Setup the Llama Stack Server" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "J2kGed0R5PSf", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "collapsed": true, + "id": "J2kGed0R5PSf", + "outputId": "2478ea60-8d35-48a1-b011-f233831740c5" + }, + "outputs": [], + "source": [ + "import os \n", + "import subprocess\n", + "\n", + "if \"UV_SYSTEM_PYTHON\" in os.environ:\n", + " del os.environ[\"UV_SYSTEM_PYTHON\"]\n", + "\n", + "# this command installs all the dependencies needed for the llama stack server with the ollama inference provider\n", + "!uv run --with llama-stack llama stack build --template ollama --image-type venv --image-name myvenv\n", + "\n", + "def run_llama_stack_server_background():\n", + " log_file = open(\"llama_stack_server.log\", \"w\")\n", + " process = subprocess.Popen(\n", + " f\"uv run --with llama-stack llama stack run ollama --image-type venv --image-name myvenv --env INFERENCE_MODEL=llama3.2:3b\",\n", + " shell=True,\n", + " stdout=log_file,\n", + " stderr=log_file,\n", + " text=True\n", + " )\n", + " \n", + " print(f\"Starting Llama Stack server with PID: {process.pid}\")\n", + " return process\n", + "\n", + "def wait_for_server_to_start():\n", + " import requests\n", + " from requests.exceptions import ConnectionError\n", + " import time\n", + " \n", + " url = \"http://0.0.0.0:8321/v1/health\"\n", + " max_retries = 30\n", + " retry_interval = 1\n", + " \n", + " print(\"Waiting for server to start\", end=\"\")\n", + " for _ in range(max_retries):\n", + " try:\n", + " response = requests.get(url)\n", + " if response.status_code == 200:\n", + " print(\"\\nServer is ready!\")\n", + " return True\n", + " except ConnectionError:\n", + " print(\".\", end=\"\", flush=True)\n", + " time.sleep(retry_interval)\n", + " \n", + " print(\"\\nServer failed to start after\", max_retries * retry_interval, \"seconds\")\n", + " return False\n", + "\n", + "\n", + "# use this helper if needed to kill the server \n", + "def kill_llama_stack_server():\n", + " # Kill any existing llama stack server processes\n", + " os.system(\"ps aux | grep -v grep | grep llama_stack.distribution.server.server | awk '{print $2}' | xargs kill -9\")\n" + ] + }, + { + "cell_type": "markdown", + "id": "c40e9efd", + "metadata": {}, + "source": [ + "### 2.2. Start the Llama Stack Server" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "f779283d", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Starting Llama Stack server with PID: 787100\n", + "Waiting for server to start\n", + "Server is ready!\n" + ] + } + ], + "source": [ + "server_process = run_llama_stack_server_background()\n", + "assert wait_for_server_to_start()" + ] + }, + { + "cell_type": "markdown", + "id": "28477c03", + "metadata": {}, + "source": [ + "## Step 3: Run the demo" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "7da71011", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "rag_tool> Ingesting document: https://www.paulgraham.com/greatwork.html\n", + "prompt> How do you do great work?\n", + "\u001b[33minference> \u001b[0m\u001b[33m[k\u001b[0m\u001b[33mnowledge\u001b[0m\u001b[33m_search\u001b[0m\u001b[33m(query\u001b[0m\u001b[33m=\"\u001b[0m\u001b[33mWhat\u001b[0m\u001b[33m is\u001b[0m\u001b[33m the\u001b[0m\u001b[33m key\u001b[0m\u001b[33m to\u001b[0m\u001b[33m doing\u001b[0m\u001b[33m great\u001b[0m\u001b[33m work\u001b[0m\u001b[33m\")]\u001b[0m\u001b[97m\u001b[0m\n", + "\u001b[32mtool_execution> Tool:knowledge_search Args:{'query': 'What is the key to doing great work'}\u001b[0m\n", + "\u001b[32mtool_execution> Tool:knowledge_search Response:[TextContentItem(text='knowledge_search tool found 5 chunks:\\nBEGIN of knowledge_search tool results.\\n', type='text'), TextContentItem(text=\"Result 1:\\nDocument_id:docum\\nContent: work. Doing great work means doing something important\\nso well that you expand people's ideas of what's possible. But\\nthere's no threshold for importance. It's a matter of degree, and\\noften hard to judge at the time anyway.\\n\", type='text'), TextContentItem(text=\"Result 2:\\nDocument_id:docum\\nContent: work. Doing great work means doing something important\\nso well that you expand people's ideas of what's possible. But\\nthere's no threshold for importance. It's a matter of degree, and\\noften hard to judge at the time anyway.\\n\", type='text'), TextContentItem(text=\"Result 3:\\nDocument_id:docum\\nContent: work. Doing great work means doing something important\\nso well that you expand people's ideas of what's possible. But\\nthere's no threshold for importance. It's a matter of degree, and\\noften hard to judge at the time anyway.\\n\", type='text'), TextContentItem(text=\"Result 4:\\nDocument_id:docum\\nContent: work. Doing great work means doing something important\\nso well that you expand people's ideas of what's possible. But\\nthere's no threshold for importance. It's a matter of degree, and\\noften hard to judge at the time anyway.\\n\", type='text'), TextContentItem(text=\"Result 5:\\nDocument_id:docum\\nContent: work. Doing great work means doing something important\\nso well that you expand people's ideas of what's possible. But\\nthere's no threshold for importance. It's a matter of degree, and\\noften hard to judge at the time anyway.\\n\", type='text'), TextContentItem(text='END of knowledge_search tool results.\\n', type='text'), TextContentItem(text='The above results were retrieved to help answer the user\\'s query: \"What is the key to doing great work\". Use them as supporting information only in answering this query.\\n', type='text')]\u001b[0m\n", + "\u001b[33minference> \u001b[0m\u001b[33mDoing\u001b[0m\u001b[33m great\u001b[0m\u001b[33m work\u001b[0m\u001b[33m means\u001b[0m\u001b[33m doing\u001b[0m\u001b[33m something\u001b[0m\u001b[33m important\u001b[0m\u001b[33m so\u001b[0m\u001b[33m well\u001b[0m\u001b[33m that\u001b[0m\u001b[33m you\u001b[0m\u001b[33m expand\u001b[0m\u001b[33m people\u001b[0m\u001b[33m's\u001b[0m\u001b[33m ideas\u001b[0m\u001b[33m of\u001b[0m\u001b[33m what\u001b[0m\u001b[33m's\u001b[0m\u001b[33m possible\u001b[0m\u001b[33m.\u001b[0m\u001b[33m However\u001b[0m\u001b[33m,\u001b[0m\u001b[33m there\u001b[0m\u001b[33m's\u001b[0m\u001b[33m no\u001b[0m\u001b[33m threshold\u001b[0m\u001b[33m for\u001b[0m\u001b[33m importance\u001b[0m\u001b[33m,\u001b[0m\u001b[33m and\u001b[0m\u001b[33m it\u001b[0m\u001b[33m's\u001b[0m\u001b[33m often\u001b[0m\u001b[33m hard\u001b[0m\u001b[33m to\u001b[0m\u001b[33m judge\u001b[0m\u001b[33m at\u001b[0m\u001b[33m the\u001b[0m\u001b[33m time\u001b[0m\u001b[33m anyway\u001b[0m\u001b[33m.\u001b[0m\u001b[33m Great\u001b[0m\u001b[33m work\u001b[0m\u001b[33m is\u001b[0m\u001b[33m a\u001b[0m\u001b[33m matter\u001b[0m\u001b[33m of\u001b[0m\u001b[33m degree\u001b[0m\u001b[33m,\u001b[0m\u001b[33m and\u001b[0m\u001b[33m it\u001b[0m\u001b[33m can\u001b[0m\u001b[33m be\u001b[0m\u001b[33m difficult\u001b[0m\u001b[33m to\u001b[0m\u001b[33m determine\u001b[0m\u001b[33m whether\u001b[0m\u001b[33m someone\u001b[0m\u001b[33m has\u001b[0m\u001b[33m done\u001b[0m\u001b[33m great\u001b[0m\u001b[33m work\u001b[0m\u001b[33m until\u001b[0m\u001b[33m after\u001b[0m\u001b[33m the\u001b[0m\u001b[33m fact\u001b[0m\u001b[33m.\u001b[0m\u001b[97m\u001b[0m\n", + "\u001b[30m\u001b[0m" + ] + } + ], + "source": [ + "from llama_stack_client import Agent, AgentEventLogger, RAGDocument, LlamaStackClient\n", + "\n", + "vector_db_id = \"my_demo_vector_db\"\n", + "client = LlamaStackClient(base_url=\"http://0.0.0.0:8321\")\n", + "\n", + "models = client.models.list()\n", + "\n", + "# Select the first LLM and first embedding models\n", + "model_id = next(m for m in models if m.model_type == \"llm\").identifier\n", + "embedding_model_id = (\n", + " em := next(m for m in models if m.model_type == \"embedding\")\n", + ").identifier\n", + "embedding_dimension = em.metadata[\"embedding_dimension\"]\n", + "\n", + "_ = client.vector_dbs.register(\n", + " vector_db_id=vector_db_id,\n", + " embedding_model=embedding_model_id,\n", + " embedding_dimension=embedding_dimension,\n", + " provider_id=\"faiss\",\n", + ")\n", + "source = \"https://www.paulgraham.com/greatwork.html\"\n", + "print(\"rag_tool> Ingesting document:\", source)\n", + "document = RAGDocument(\n", + " document_id=\"document_1\",\n", + " content=source,\n", + " mime_type=\"text/html\",\n", + " metadata={},\n", + ")\n", + "client.tool_runtime.rag_tool.insert(\n", + " documents=[document],\n", + " vector_db_id=vector_db_id,\n", + " chunk_size_in_tokens=50,\n", + ")\n", + "agent = Agent(\n", + " client,\n", + " model=model_id,\n", + " instructions=\"You are a helpful assistant\",\n", + " tools=[\n", + " {\n", + " \"name\": \"builtin::rag/knowledge_search\",\n", + " \"args\": {\"vector_db_ids\": [vector_db_id]},\n", + " }\n", + " ],\n", + ")\n", + "\n", + "prompt = \"How do you do great work?\"\n", + "print(\"prompt>\", prompt)\n", + "\n", + "response = agent.create_turn(\n", + " messages=[{\"role\": \"user\", \"content\": prompt}],\n", + " session_id=agent.create_session(\"rag_session\"),\n", + " stream=True,\n", + ")\n", + "\n", + "for log in AgentEventLogger().log(response):\n", + " log.print()" + ] + }, + { + "cell_type": "markdown", + "id": "341aaadf", + "metadata": {}, + "source": [ + "Congratulations! You've successfully built your first RAG application using Llama Stack! 🎉🥳" + ] + }, + { + "cell_type": "markdown", + "id": "e88e1185", + "metadata": {}, + "source": [ + "## Next Steps" + ] + }, + { + "cell_type": "markdown", + "id": "bcb73600", + "metadata": {}, + "source": [ + "Now you're ready to dive deeper into Llama Stack!\n", + "- Explore the [Detailed Tutorial](./detailed_tutorial.md).\n", + "- Try the [Getting Started Notebook](https://github.com/meta-llama/llama-stack/blob/main/docs/getting_started.ipynb).\n", + "- Browse more [Notebooks on GitHub](https://github.com/meta-llama/llama-stack/tree/main/docs/notebooks).\n", + "- Learn about Llama Stack [Concepts](../concepts/index.md).\n", + "- Discover how to [Build Llama Stacks](../distributions/index.md).\n", + "- Refer to our [References](../references/index.md) for details on the Llama CLI and Python SDK.\n", + "- Check out the [llama-stack-apps](https://github.com/meta-llama/llama-stack-apps/tree/main/examples) repository for example applications and tutorials." + ] + } + ], + "metadata": { + "accelerator": "GPU", + "colab": { + "gpuType": "T4", + "provenance": [] + }, + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.6" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/docs/source/distributions/configuration.md b/docs/source/distributions/configuration.md index 0a0ce994f..1bba6677e 100644 --- a/docs/source/distributions/configuration.md +++ b/docs/source/distributions/configuration.md @@ -77,10 +77,10 @@ Next up is the most critical part: the set of providers that the stack will use ```yaml providers: inference: - # provider_id is a string you can choose freely + # provider_id is a string you can choose freely - provider_id: ollama # provider_type is a string that specifies the type of provider. - # in this case, the provider for inference is ollama and it is run remotely (outside of the distribution) + # in this case, the provider for inference is ollama and it runs remotely (outside of the distribution) provider_type: remote::ollama # config is a dictionary that contains the configuration for the provider. # in this case, the configuration is the url of the ollama server @@ -88,7 +88,7 @@ providers: url: ${env.OLLAMA_URL:=http://localhost:11434} ``` A few things to note: -- A _provider instance_ is identified with an (id, type, configuration) triplet. +- A _provider instance_ is identified with an (id, type, config) triplet. - The id is a string you can choose freely. - You can instantiate any number of provider instances of the same type. - The configuration dictionary is provider-specific. @@ -187,7 +187,7 @@ The environment variable substitution system is type-safe: ## Resources -Finally, let's look at the `models` section: +Let's look at the `models` section: ```yaml models: @@ -195,8 +195,9 @@ models: model_id: ${env.INFERENCE_MODEL} provider_id: ollama provider_model_id: null + model_type: llm ``` -A Model is an instance of a "Resource" (see [Concepts](../concepts/index)) and is associated with a specific inference provider (in this case, the provider with identifier `ollama`). This is an instance of a "pre-registered" model. While we always encourage the clients to always register models before using them, some Stack servers may come up a list of "already known and available" models. +A Model is an instance of a "Resource" (see [Concepts](../concepts/index)) and is associated with a specific inference provider (in this case, the provider with identifier `ollama`). This is an instance of a "pre-registered" model. While we always encourage the clients to register models before using them, some Stack servers may come up a list of "already known and available" models. What's with the `provider_model_id` field? This is an identifier for the model inside the provider's model catalog. Contrast it with `model_id` which is the identifier for the same model for Llama Stack's purposes. For example, you may want to name "llama3.2:vision-11b" as "image_captioning_model" when you use it in your Stack interactions. When omitted, the server will set `provider_model_id` to be the same as `model_id`. diff --git a/docs/source/getting_started/index.md b/docs/source/getting_started/index.md index 8382758cc..ea45da1f7 100644 --- a/docs/source/getting_started/index.md +++ b/docs/source/getting_started/index.md @@ -8,6 +8,8 @@ environments. You can build and test using a local server first and deploy to a In this guide, we'll walk through how to build a RAG application locally using Llama Stack with [Ollama](https://ollama.com/) as the inference [provider](../providers/inference/index) for a Llama Model. +**💡 Notebook Version:** You can also follow this quickstart guide in a Jupyter notebook format: [quick_start.ipynb](https://github.com/meta-llama/llama-stack/blob/main/docs/quick_start.ipynb) + #### Step 1: Install and setup 1. Install [uv](https://docs.astral.sh/uv/) 2. Run inference on a Llama model with [Ollama](https://ollama.com/download) diff --git a/docs/source/index.md b/docs/source/index.md index 1df5e8507..755b228e3 100644 --- a/docs/source/index.md +++ b/docs/source/index.md @@ -73,17 +73,26 @@ A number of "adapters" are available for some popular Inference and Vector Store | OpenAI | Hosted | | Anthropic | Hosted | | Gemini | Hosted | +| WatsonX | Hosted | +**Agents API** +| **Provider** | **Environments** | +| :----: | :----: | +| Meta Reference | Single Node | +| Fireworks | Hosted | +| Together | Hosted | +| PyTorch ExecuTorch | On-device iOS | **Vector IO API** | **Provider** | **Environments** | | :----: | :----: | | FAISS | Single Node | -| SQLite-Vec| Single Node | +| SQLite-Vec | Single Node | | Chroma | Hosted and Single Node | | Milvus | Hosted and Single Node | | Postgres (PGVector) | Hosted and Single Node | | Weaviate | Hosted | +| Qdrant | Hosted and Single Node | **Safety API** | **Provider** | **Environments** | @@ -93,6 +102,30 @@ A number of "adapters" are available for some popular Inference and Vector Store | Code Scanner | Single Node | | AWS Bedrock | Hosted | +**Post Training API** +| **Provider** | **Environments** | +| :----: | :----: | +| Meta Reference | Single Node | +| HuggingFace | Single Node | +| TorchTune | Single Node | +| NVIDIA NEMO | Hosted | + +**Eval API** +| **Provider** | **Environments** | +| :----: | :----: | +| Meta Reference | Single Node | +| NVIDIA NEMO | Hosted | + +**Telemetry API** +| **Provider** | **Environments** | +| :----: | :----: | +| Meta Reference | Single Node | + +**Tool Runtime API** +| **Provider** | **Environments** | +| :----: | :----: | +| Brave Search | Hosted | +| RAG Runtime | Single Node | ```{toctree} :hidden: diff --git a/docs/source/providers/agents/inline_meta-reference.md b/docs/source/providers/agents/inline_meta-reference.md index cfc0c6881..5f64f79e1 100644 --- a/docs/source/providers/agents/inline_meta-reference.md +++ b/docs/source/providers/agents/inline_meta-reference.md @@ -16,7 +16,6 @@ Meta's reference implementation of an agent system that can use tools, access ve ```yaml persistence_store: type: sqlite - namespace: null db_path: ${env.SQLITE_STORE_DIR:=~/.llama/dummy}/agents_store.db responses_store: type: sqlite diff --git a/docs/source/providers/datasetio/inline_localfs.md b/docs/source/providers/datasetio/inline_localfs.md index fbe4c40e3..87a0c795c 100644 --- a/docs/source/providers/datasetio/inline_localfs.md +++ b/docs/source/providers/datasetio/inline_localfs.md @@ -15,7 +15,6 @@ Local filesystem-based dataset I/O provider for reading and writing datasets to ```yaml kvstore: type: sqlite - namespace: null db_path: ${env.SQLITE_STORE_DIR:=~/.llama/dummy}/localfs_datasetio.db ``` diff --git a/docs/source/providers/datasetio/remote_huggingface.md b/docs/source/providers/datasetio/remote_huggingface.md index e2052602e..3711f7396 100644 --- a/docs/source/providers/datasetio/remote_huggingface.md +++ b/docs/source/providers/datasetio/remote_huggingface.md @@ -15,7 +15,6 @@ HuggingFace datasets provider for accessing and managing datasets from the Huggi ```yaml kvstore: type: sqlite - namespace: null db_path: ${env.SQLITE_STORE_DIR:=~/.llama/dummy}/huggingface_datasetio.db ``` diff --git a/docs/source/providers/eval/inline_meta-reference.md b/docs/source/providers/eval/inline_meta-reference.md index 704741b5a..606883c72 100644 --- a/docs/source/providers/eval/inline_meta-reference.md +++ b/docs/source/providers/eval/inline_meta-reference.md @@ -15,7 +15,6 @@ Meta's reference implementation of evaluation tasks with support for multiple la ```yaml kvstore: type: sqlite - namespace: null db_path: ${env.SQLITE_STORE_DIR:=~/.llama/dummy}/meta_reference_eval.db ``` diff --git a/docs/source/providers/index.md b/docs/source/providers/index.md index 3ea253685..f804582d7 100644 --- a/docs/source/providers/index.md +++ b/docs/source/providers/index.md @@ -1,9 +1,10 @@ # Providers Overview The goal of Llama Stack is to build an ecosystem where users can easily swap out different implementations for the same API. Examples for these include: -- LLM inference providers (e.g., Ollama, Fireworks, Together, AWS Bedrock, Groq, Cerebras, SambaNova, vLLM, etc.), -- Vector databases (e.g., ChromaDB, Weaviate, Qdrant, Milvus, FAISS, PGVector, SQLite-Vec, etc.), -- Safety providers (e.g., Meta's Llama Guard, AWS Bedrock Guardrails, etc.) +- LLM inference providers (e.g., Meta Reference, Ollama, Fireworks, Together, AWS Bedrock, Groq, Cerebras, SambaNova, vLLM, OpenAI, Anthropic, Gemini, WatsonX, etc.), +- Vector databases (e.g., FAISS, SQLite-Vec, ChromaDB, Weaviate, Qdrant, Milvus, PGVector, etc.), +- Safety providers (e.g., Meta's Llama Guard, Prompt Guard, Code Scanner, AWS Bedrock Guardrails, etc.), +- Tool Runtime providers (e.g., RAG Runtime, Brave Search, etc.) Providers come in two flavors: - **Remote**: the provider runs as a separate service external to the Llama Stack codebase. Llama Stack contains a small amount of adapter code. diff --git a/docs/source/providers/vector_io/inline_faiss.md b/docs/source/providers/vector_io/inline_faiss.md index 2dcf4625b..bcff66f3f 100644 --- a/docs/source/providers/vector_io/inline_faiss.md +++ b/docs/source/providers/vector_io/inline_faiss.md @@ -44,7 +44,6 @@ more details about Faiss in general. ```yaml kvstore: type: sqlite - namespace: null db_path: ${env.SQLITE_STORE_DIR:=~/.llama/dummy}/faiss_store.db ``` diff --git a/docs/source/providers/vector_io/inline_meta-reference.md b/docs/source/providers/vector_io/inline_meta-reference.md index c9ca12ff2..0aac445bd 100644 --- a/docs/source/providers/vector_io/inline_meta-reference.md +++ b/docs/source/providers/vector_io/inline_meta-reference.md @@ -15,7 +15,6 @@ Meta's reference implementation of a vector database. ```yaml kvstore: type: sqlite - namespace: null db_path: ${env.SQLITE_STORE_DIR:=~/.llama/dummy}/faiss_store.db ``` diff --git a/docs/source/providers/vector_io/inline_milvus.md b/docs/source/providers/vector_io/inline_milvus.md index 8e99d7f95..65c67f3ee 100644 --- a/docs/source/providers/vector_io/inline_milvus.md +++ b/docs/source/providers/vector_io/inline_milvus.md @@ -19,7 +19,6 @@ Please refer to the remote provider documentation. db_path: ${env.MILVUS_DB_PATH:=~/.llama/dummy}/milvus.db kvstore: type: sqlite - namespace: null db_path: ${env.SQLITE_STORE_DIR:=~/.llama/dummy}/milvus_registry.db ``` diff --git a/docs/zero_to_hero_guide/07_Agents101.ipynb b/docs/zero_to_hero_guide/07_Agents101.ipynb index b6df2a4c8..905799946 100644 --- a/docs/zero_to_hero_guide/07_Agents101.ipynb +++ b/docs/zero_to_hero_guide/07_Agents101.ipynb @@ -45,7 +45,7 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": 14, "metadata": {}, "outputs": [], "source": [ @@ -65,7 +65,7 @@ "from dotenv import load_dotenv\n", "\n", "load_dotenv()\n", - "BRAVE_SEARCH_API_KEY = os.environ[\"BRAVE_SEARCH_API_KEY\"]\n" + "TAVILY_SEARCH_API_KEY = os.environ[\"TAVILY_SEARCH_API_KEY\"]\n" ] }, { @@ -110,10 +110,17 @@ "from llama_stack_client import LlamaStackClient\n", "from llama_stack_client.lib.agents.agent import Agent\n", "from llama_stack_client.lib.agents.event_logger import EventLogger\n", + "from llama_stack_client.types import UserMessage\n", + "from typing import cast, Iterator\n", "\n", "\n", "async def agent_example():\n", - " client = LlamaStackClient(base_url=f\"http://{HOST}:{PORT}\")\n", + " client = LlamaStackClient(\n", + " base_url=f\"http://{HOST}:{PORT}\",\n", + " provider_data={\n", + " \"tavily_search_api_key\": TAVILY_SEARCH_API_KEY,\n", + " }\n", + " )\n", " agent = Agent(\n", " client,\n", " model=MODEL_NAME,\n", @@ -123,13 +130,7 @@ " \"type\": \"greedy\",\n", " },\n", " },\n", - " tools=[\n", - " {\n", - " \"type\": \"brave_search\",\n", - " \"engine\": \"brave\",\n", - " \"api_key\": BRAVE_SEARCH_API_KEY,\n", - " }\n", - " ],\n", + " tools=[\"builtin::websearch\"],\n", " )\n", " session_id = agent.create_session(\"test-session\")\n", " print(f\"Created session_id={session_id} for Agent({agent.agent_id})\")\n", @@ -142,15 +143,13 @@ " for prompt in user_prompts:\n", " response = agent.create_turn(\n", " messages=[\n", - " {\n", - " \"role\": \"user\",\n", - " \"content\": prompt,\n", - " }\n", + " UserMessage(role=\"user\", content=prompt)\n", " ],\n", " session_id=session_id,\n", + " stream=True,\n", " )\n", "\n", - " async for log in EventLogger().log(response):\n", + " for log in EventLogger().log(cast(Iterator, response)):\n", " log.print()\n", "\n", "\n", diff --git a/llama_stack/apis/telemetry/telemetry.py b/llama_stack/apis/telemetry/telemetry.py index 0eb53f397..d621e601e 100644 --- a/llama_stack/apis/telemetry/telemetry.py +++ b/llama_stack/apis/telemetry/telemetry.py @@ -101,7 +101,7 @@ class MetricInResponse(BaseModel): # This is a short term solution to allow inference API to return metrics # The ideal way to do this is to have a way for all response types to include metrics -# and all metric events logged to the telemetry API to be inlcuded with the response +# and all metric events logged to the telemetry API to be included with the response # To do this, we will need to augment all response types with a metrics field. # We have hit a blocker from stainless SDK that prevents us from doing this. # The blocker is that if we were to augment the response types that have a data field diff --git a/llama_stack/distribution/access_control/access_control.py b/llama_stack/distribution/access_control/access_control.py index 84d506d8f..075152ce4 100644 --- a/llama_stack/distribution/access_control/access_control.py +++ b/llama_stack/distribution/access_control/access_control.py @@ -106,4 +106,26 @@ def is_action_allowed( class AccessDeniedError(RuntimeError): - pass + def __init__(self, action: str | None = None, resource: ProtectedResource | None = None, user: User | None = None): + self.action = action + self.resource = resource + self.user = user + + message = _build_access_denied_message(action, resource, user) + super().__init__(message) + + +def _build_access_denied_message(action: str | None, resource: ProtectedResource | None, user: User | None) -> str: + """Build detailed error message for access denied scenarios.""" + if action and resource and user: + resource_info = f"{resource.type}::{resource.identifier}" + user_info = f"'{user.principal}'" + if user.attributes: + attrs = ", ".join([f"{k}={v}" for k, v in user.attributes.items()]) + user_info += f" (attributes: {attrs})" + + message = f"User {user_info} cannot perform action '{action}' on resource '{resource_info}'" + else: + message = "Insufficient permissions" + + return message diff --git a/llama_stack/distribution/configure.py b/llama_stack/distribution/configure.py index e58ea0338..35b216b30 100644 --- a/llama_stack/distribution/configure.py +++ b/llama_stack/distribution/configure.py @@ -17,6 +17,7 @@ from llama_stack.distribution.distribution import ( builtin_automatically_routed_apis, get_provider_registry, ) +from llama_stack.distribution.stack import replace_env_vars from llama_stack.distribution.utils.config_dirs import EXTERNAL_PROVIDERS_DIR from llama_stack.distribution.utils.dynamic import instantiate_class_type from llama_stack.distribution.utils.prompt_for_config import prompt_for_config @@ -163,7 +164,7 @@ def upgrade_from_routing_table( def parse_and_maybe_upgrade_config(config_dict: dict[str, Any]) -> StackRunConfig: version = config_dict.get("version", None) if version == LLAMA_STACK_RUN_CONFIG_VERSION: - return StackRunConfig(**config_dict) + return StackRunConfig(**replace_env_vars(config_dict)) if "routing_table" in config_dict: logger.info("Upgrading config...") @@ -174,4 +175,4 @@ def parse_and_maybe_upgrade_config(config_dict: dict[str, Any]) -> StackRunConfi if not config_dict.get("external_providers_dir", None): config_dict["external_providers_dir"] = EXTERNAL_PROVIDERS_DIR - return StackRunConfig(**config_dict) + return StackRunConfig(**replace_env_vars(config_dict)) diff --git a/llama_stack/distribution/routing_tables/common.py b/llama_stack/distribution/routing_tables/common.py index b79c8a2a8..7f7de32fe 100644 --- a/llama_stack/distribution/routing_tables/common.py +++ b/llama_stack/distribution/routing_tables/common.py @@ -175,8 +175,9 @@ class CommonRoutingTableImpl(RoutingTable): return obj async def unregister_object(self, obj: RoutableObjectWithProvider) -> None: - if not is_action_allowed(self.policy, "delete", obj, get_authenticated_user()): - raise AccessDeniedError() + user = get_authenticated_user() + if not is_action_allowed(self.policy, "delete", obj, user): + raise AccessDeniedError("delete", obj, user) await self.dist_registry.delete(obj.type, obj.identifier) await unregister_object_from_provider(obj, self.impls_by_provider_id[obj.provider_id]) @@ -193,7 +194,7 @@ class CommonRoutingTableImpl(RoutingTable): # If object supports access control but no attributes set, use creator's attributes creator = get_authenticated_user() if not is_action_allowed(self.policy, "create", obj, creator): - raise AccessDeniedError() + raise AccessDeniedError("create", obj, creator) if creator: obj.owner = creator logger.info(f"Setting owner for {obj.type} '{obj.identifier}' to {obj.owner.principal}") diff --git a/llama_stack/distribution/server/server.py b/llama_stack/distribution/server/server.py index 83407a25f..681ab320d 100644 --- a/llama_stack/distribution/server/server.py +++ b/llama_stack/distribution/server/server.py @@ -9,6 +9,7 @@ import asyncio import functools import inspect import json +import logging import os import ssl import sys @@ -31,6 +32,7 @@ from openai import BadRequestError from pydantic import BaseModel, ValidationError from llama_stack.apis.common.responses import PaginatedResponse +from llama_stack.distribution.access_control.access_control import AccessDeniedError from llama_stack.distribution.datatypes import AuthenticationRequiredError, LoggingConfig, StackRunConfig from llama_stack.distribution.distribution import builtin_automatically_routed_apis from llama_stack.distribution.request_headers import PROVIDER_DATA_VAR, User, request_provider_data_context @@ -116,7 +118,7 @@ def translate_exception(exc: Exception) -> HTTPException | RequestValidationErro return HTTPException(status_code=400, detail=f"Invalid value: {str(exc)}") elif isinstance(exc, BadRequestError): return HTTPException(status_code=400, detail=str(exc)) - elif isinstance(exc, PermissionError): + elif isinstance(exc, PermissionError | AccessDeniedError): return HTTPException(status_code=403, detail=f"Permission denied: {str(exc)}") elif isinstance(exc, asyncio.TimeoutError | TimeoutError): return HTTPException(status_code=504, detail=f"Operation timed out: {str(exc)}") @@ -236,7 +238,10 @@ def create_dynamic_typed_route(func: Any, method: str, route: str) -> Callable: result.url = route return result except Exception as e: - logger.exception(f"Error executing endpoint {route=} {method=}") + if logger.isEnabledFor(logging.DEBUG): + logger.exception(f"Error executing endpoint {route=} {method=}") + else: + logger.error(f"Error executing endpoint {route=} {method=}: {str(e)}") raise translate_exception(e) from e sig = inspect.signature(func) diff --git a/llama_stack/distribution/store/registry.py b/llama_stack/distribution/store/registry.py index 0e84854c2..cd7cd9f00 100644 --- a/llama_stack/distribution/store/registry.py +++ b/llama_stack/distribution/store/registry.py @@ -10,11 +10,11 @@ from typing import Protocol import pydantic -from llama_stack.distribution.datatypes import KVStoreConfig, RoutableObjectWithProvider +from llama_stack.distribution.datatypes import RoutableObjectWithProvider from llama_stack.distribution.utils.config_dirs import DISTRIBS_BASE_DIR from llama_stack.log import get_logger from llama_stack.providers.utils.kvstore import KVStore, kvstore_impl -from llama_stack.providers.utils.kvstore.config import SqliteKVStoreConfig +from llama_stack.providers.utils.kvstore.config import KVStoreConfig, SqliteKVStoreConfig logger = get_logger(__name__, category="core") diff --git a/llama_stack/providers/inline/agents/meta_reference/persistence.py b/llama_stack/providers/inline/agents/meta_reference/persistence.py index 717387008..cda535937 100644 --- a/llama_stack/providers/inline/agents/meta_reference/persistence.py +++ b/llama_stack/providers/inline/agents/meta_reference/persistence.py @@ -53,7 +53,7 @@ class AgentPersistence: identifier=name, # should this be qualified in any way? ) if not is_action_allowed(self.policy, "create", session_info, user): - raise AccessDeniedError() + raise AccessDeniedError("create", session_info, user) await self.kvstore.set( key=f"session:{self.agent_id}:{session_id}", diff --git a/llama_stack/providers/registry/agents.py b/llama_stack/providers/registry/agents.py index 834e81b96..6f8c05a67 100644 --- a/llama_stack/providers/registry/agents.py +++ b/llama_stack/providers/registry/agents.py @@ -23,6 +23,7 @@ def available_providers() -> list[ProviderSpec]: "pillow", "pandas", "scikit-learn", + "mcp", ] + kvstore_dependencies(), # TODO make this dynamic based on the kvstore config module="llama_stack.providers.inline.agents.meta_reference", diff --git a/llama_stack/providers/utils/kvstore/config.py b/llama_stack/providers/utils/kvstore/config.py index a45ff4ce8..0219bbebe 100644 --- a/llama_stack/providers/utils/kvstore/config.py +++ b/llama_stack/providers/utils/kvstore/config.py @@ -36,15 +36,14 @@ class RedisKVStoreConfig(CommonConfig): def url(self) -> str: return f"redis://{self.host}:{self.port}" - @property - def pip_packages(self) -> list[str]: + @classmethod + def pip_packages(cls) -> list[str]: return ["redis"] @classmethod def sample_run_config(cls): return { "type": "redis", - "namespace": None, "host": "${env.REDIS_HOST:=localhost}", "port": "${env.REDIS_PORT:=6379}", } @@ -57,15 +56,14 @@ class SqliteKVStoreConfig(CommonConfig): description="File path for the sqlite database", ) - @property - def pip_packages(self) -> list[str]: + @classmethod + def pip_packages(cls) -> list[str]: return ["aiosqlite"] @classmethod def sample_run_config(cls, __distro_dir__: str, db_name: str = "kvstore.db"): return { "type": "sqlite", - "namespace": None, "db_path": "${env.SQLITE_STORE_DIR:=" + __distro_dir__ + "}/" + db_name, } @@ -73,7 +71,7 @@ class SqliteKVStoreConfig(CommonConfig): class PostgresKVStoreConfig(CommonConfig): type: Literal[KVStoreType.postgres.value] = KVStoreType.postgres.value host: str = "localhost" - port: str = "5432" + port: int = 5432 db: str = "llamastack" user: str password: str | None = None @@ -83,7 +81,6 @@ class PostgresKVStoreConfig(CommonConfig): def sample_run_config(cls, table_name: str = "llamastack_kvstore", **kwargs): return { "type": "postgres", - "namespace": None, "host": "${env.POSTGRES_HOST:=localhost}", "port": "${env.POSTGRES_PORT:=5432}", "db": "${env.POSTGRES_DB:=llamastack}", @@ -108,8 +105,8 @@ class PostgresKVStoreConfig(CommonConfig): raise ValueError("Table name must be less than 63 characters") return v - @property - def pip_packages(self) -> list[str]: + @classmethod + def pip_packages(cls) -> list[str]: return ["psycopg2-binary"] @@ -122,15 +119,14 @@ class MongoDBKVStoreConfig(CommonConfig): password: str | None = None collection_name: str = "llamastack_kvstore" - @property - def pip_packages(self) -> list[str]: + @classmethod + def pip_packages(cls) -> list[str]: return ["pymongo"] @classmethod def sample_run_config(cls, collection_name: str = "llamastack_kvstore"): return { "type": "mongodb", - "namespace": None, "host": "${env.MONGODB_HOST:=localhost}", "port": "${env.MONGODB_PORT:=5432}", "db": "${env.MONGODB_DB}", @@ -144,3 +140,21 @@ KVStoreConfig = Annotated[ RedisKVStoreConfig | SqliteKVStoreConfig | PostgresKVStoreConfig | MongoDBKVStoreConfig, Field(discriminator="type", default=KVStoreType.sqlite.value), ] + + +def get_pip_packages(store_config: dict | KVStoreConfig) -> list[str]: + """Get pip packages for KV store config, handling both dict and object cases.""" + if isinstance(store_config, dict): + store_type = store_config.get("type") + if store_type == "sqlite": + return SqliteKVStoreConfig.pip_packages() + elif store_type == "postgres": + return PostgresKVStoreConfig.pip_packages() + elif store_type == "redis": + return RedisKVStoreConfig.pip_packages() + elif store_type == "mongodb": + return MongoDBKVStoreConfig.pip_packages() + else: + raise ValueError(f"Unknown KV store type: {store_type}") + else: + return store_config.pip_packages() diff --git a/llama_stack/providers/utils/sqlstore/sqlstore.py b/llama_stack/providers/utils/sqlstore/sqlstore.py index d558a2a26..227c5abcd 100644 --- a/llama_stack/providers/utils/sqlstore/sqlstore.py +++ b/llama_stack/providers/utils/sqlstore/sqlstore.py @@ -30,8 +30,8 @@ class SqlAlchemySqlStoreConfig(BaseModel): def engine_str(self) -> str: ... # TODO: move this when we have a better way to specify dependencies with internal APIs - @property - def pip_packages(self) -> list[str]: + @classmethod + def pip_packages(cls) -> list[str]: return ["sqlalchemy[asyncio]"] @@ -48,20 +48,20 @@ class SqliteSqlStoreConfig(SqlAlchemySqlStoreConfig): @classmethod def sample_run_config(cls, __distro_dir__: str, db_name: str = "sqlstore.db"): - return cls( - type="sqlite", - db_path="${env.SQLITE_STORE_DIR:=" + __distro_dir__ + "}/" + db_name, - ) + return { + "type": "sqlite", + "db_path": "${env.SQLITE_STORE_DIR:=" + __distro_dir__ + "}/" + db_name, + } - @property - def pip_packages(self) -> list[str]: - return super().pip_packages + ["aiosqlite"] + @classmethod + def pip_packages(cls) -> list[str]: + return super().pip_packages() + ["aiosqlite"] class PostgresSqlStoreConfig(SqlAlchemySqlStoreConfig): type: Literal["postgres"] = SqlStoreType.postgres.value host: str = "localhost" - port: str = "5432" + port: int = 5432 db: str = "llamastack" user: str password: str | None = None @@ -70,20 +70,20 @@ class PostgresSqlStoreConfig(SqlAlchemySqlStoreConfig): def engine_str(self) -> str: return f"postgresql+asyncpg://{self.user}:{self.password}@{self.host}:{self.port}/{self.db}" - @property - def pip_packages(self) -> list[str]: - return super().pip_packages + ["asyncpg"] + @classmethod + def pip_packages(cls) -> list[str]: + return super().pip_packages() + ["asyncpg"] @classmethod def sample_run_config(cls, **kwargs): - return cls( - type="postgres", - host="${env.POSTGRES_HOST:=localhost}", - port="${env.POSTGRES_PORT:=5432}", - db="${env.POSTGRES_DB:=llamastack}", - user="${env.POSTGRES_USER:=llamastack}", - password="${env.POSTGRES_PASSWORD:=llamastack}", - ) + return { + "type": "postgres", + "host": "${env.POSTGRES_HOST:=localhost}", + "port": "${env.POSTGRES_PORT:=5432}", + "db": "${env.POSTGRES_DB:=llamastack}", + "user": "${env.POSTGRES_USER:=llamastack}", + "password": "${env.POSTGRES_PASSWORD:=llamastack}", + } SqlStoreConfig = Annotated[ @@ -92,6 +92,20 @@ SqlStoreConfig = Annotated[ ] +def get_pip_packages(store_config: dict | SqlStoreConfig) -> list[str]: + """Get pip packages for SQL store config, handling both dict and object cases.""" + if isinstance(store_config, dict): + store_type = store_config.get("type") + if store_type == "sqlite": + return SqliteSqlStoreConfig.pip_packages() + elif store_type == "postgres": + return PostgresSqlStoreConfig.pip_packages() + else: + raise ValueError(f"Unknown SQL store type: {store_type}") + else: + return store_config.pip_packages() + + def sqlstore_impl(config: SqlStoreConfig) -> SqlStore: if config.type in [SqlStoreType.sqlite.value, SqlStoreType.postgres.value]: from .sqlalchemy_sqlstore import SqlAlchemySqlStoreImpl diff --git a/llama_stack/templates/bedrock/run.yaml b/llama_stack/templates/bedrock/run.yaml index f12c5bec5..068278c66 100644 --- a/llama_stack/templates/bedrock/run.yaml +++ b/llama_stack/templates/bedrock/run.yaml @@ -21,7 +21,6 @@ providers: config: kvstore: type: sqlite - namespace: null db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/bedrock}/faiss_store.db safety: - provider_id: bedrock @@ -33,7 +32,6 @@ providers: config: persistence_store: type: sqlite - namespace: null db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/bedrock}/agents_store.db responses_store: type: sqlite @@ -51,7 +49,6 @@ providers: config: kvstore: type: sqlite - namespace: null db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/bedrock}/meta_reference_eval.db datasetio: - provider_id: huggingface @@ -59,14 +56,12 @@ providers: config: kvstore: type: sqlite - namespace: null db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/bedrock}/huggingface_datasetio.db - provider_id: localfs provider_type: inline::localfs config: kvstore: type: sqlite - namespace: null db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/bedrock}/localfs_datasetio.db scoring: - provider_id: basic diff --git a/llama_stack/templates/cerebras/run.yaml b/llama_stack/templates/cerebras/run.yaml index c3877ddce..305e9a20f 100644 --- a/llama_stack/templates/cerebras/run.yaml +++ b/llama_stack/templates/cerebras/run.yaml @@ -31,7 +31,6 @@ providers: config: kvstore: type: sqlite - namespace: null db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/cerebras}/faiss_store.db agents: - provider_id: meta-reference @@ -39,7 +38,6 @@ providers: config: persistence_store: type: sqlite - namespace: null db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/cerebras}/agents_store.db responses_store: type: sqlite @@ -50,7 +48,6 @@ providers: config: kvstore: type: sqlite - namespace: null db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/cerebras}/meta_reference_eval.db datasetio: - provider_id: huggingface @@ -58,14 +55,12 @@ providers: config: kvstore: type: sqlite - namespace: null db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/cerebras}/huggingface_datasetio.db - provider_id: localfs provider_type: inline::localfs config: kvstore: type: sqlite - namespace: null db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/cerebras}/localfs_datasetio.db scoring: - provider_id: basic diff --git a/llama_stack/templates/ci-tests/run.yaml b/llama_stack/templates/ci-tests/run.yaml index a38d09324..5a68af3e6 100644 --- a/llama_stack/templates/ci-tests/run.yaml +++ b/llama_stack/templates/ci-tests/run.yaml @@ -36,7 +36,6 @@ providers: config: persistence_store: type: sqlite - namespace: null db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ci-tests}/agents_store.db responses_store: type: sqlite @@ -54,7 +53,6 @@ providers: config: kvstore: type: sqlite - namespace: null db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ci-tests}/meta_reference_eval.db datasetio: - provider_id: huggingface @@ -62,14 +60,12 @@ providers: config: kvstore: type: sqlite - namespace: null db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ci-tests}/huggingface_datasetio.db - provider_id: localfs provider_type: inline::localfs config: kvstore: type: sqlite - namespace: null db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ci-tests}/localfs_datasetio.db scoring: - provider_id: basic diff --git a/llama_stack/templates/dell/run-with-safety.yaml b/llama_stack/templates/dell/run-with-safety.yaml index 48639c772..1e1ef1ea9 100644 --- a/llama_stack/templates/dell/run-with-safety.yaml +++ b/llama_stack/templates/dell/run-with-safety.yaml @@ -39,7 +39,6 @@ providers: config: persistence_store: type: sqlite - namespace: null db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/dell}/agents_store.db responses_store: type: sqlite @@ -57,7 +56,6 @@ providers: config: kvstore: type: sqlite - namespace: null db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/dell}/meta_reference_eval.db datasetio: - provider_id: huggingface @@ -65,14 +63,12 @@ providers: config: kvstore: type: sqlite - namespace: null db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/dell}/huggingface_datasetio.db - provider_id: localfs provider_type: inline::localfs config: kvstore: type: sqlite - namespace: null db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/dell}/localfs_datasetio.db scoring: - provider_id: basic diff --git a/llama_stack/templates/dell/run.yaml b/llama_stack/templates/dell/run.yaml index 13d43530b..6f5c56dd3 100644 --- a/llama_stack/templates/dell/run.yaml +++ b/llama_stack/templates/dell/run.yaml @@ -35,7 +35,6 @@ providers: config: persistence_store: type: sqlite - namespace: null db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/dell}/agents_store.db responses_store: type: sqlite @@ -53,7 +52,6 @@ providers: config: kvstore: type: sqlite - namespace: null db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/dell}/meta_reference_eval.db datasetio: - provider_id: huggingface @@ -61,14 +59,12 @@ providers: config: kvstore: type: sqlite - namespace: null db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/dell}/huggingface_datasetio.db - provider_id: localfs provider_type: inline::localfs config: kvstore: type: sqlite - namespace: null db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/dell}/localfs_datasetio.db scoring: - provider_id: basic diff --git a/llama_stack/templates/fireworks/run-with-safety.yaml b/llama_stack/templates/fireworks/run-with-safety.yaml index ecb53a18d..1233e2271 100644 --- a/llama_stack/templates/fireworks/run-with-safety.yaml +++ b/llama_stack/templates/fireworks/run-with-safety.yaml @@ -27,7 +27,6 @@ providers: config: kvstore: type: sqlite - namespace: null db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/fireworks}/faiss_store.db safety: - provider_id: llama-guard @@ -45,7 +44,6 @@ providers: config: persistence_store: type: sqlite - namespace: null db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/fireworks}/agents_store.db responses_store: type: sqlite @@ -63,7 +61,6 @@ providers: config: kvstore: type: sqlite - namespace: null db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/fireworks}/meta_reference_eval.db datasetio: - provider_id: huggingface @@ -71,14 +68,12 @@ providers: config: kvstore: type: sqlite - namespace: null db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/fireworks}/huggingface_datasetio.db - provider_id: localfs provider_type: inline::localfs config: kvstore: type: sqlite - namespace: null db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/fireworks}/localfs_datasetio.db scoring: - provider_id: basic diff --git a/llama_stack/templates/fireworks/run.yaml b/llama_stack/templates/fireworks/run.yaml index 298d28d52..7f0bc49f5 100644 --- a/llama_stack/templates/fireworks/run.yaml +++ b/llama_stack/templates/fireworks/run.yaml @@ -27,7 +27,6 @@ providers: config: kvstore: type: sqlite - namespace: null db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/fireworks}/faiss_store.db safety: - provider_id: llama-guard @@ -40,7 +39,6 @@ providers: config: persistence_store: type: sqlite - namespace: null db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/fireworks}/agents_store.db responses_store: type: sqlite @@ -58,7 +56,6 @@ providers: config: kvstore: type: sqlite - namespace: null db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/fireworks}/meta_reference_eval.db datasetio: - provider_id: huggingface @@ -66,14 +63,12 @@ providers: config: kvstore: type: sqlite - namespace: null db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/fireworks}/huggingface_datasetio.db - provider_id: localfs provider_type: inline::localfs config: kvstore: type: sqlite - namespace: null db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/fireworks}/localfs_datasetio.db scoring: - provider_id: basic diff --git a/llama_stack/templates/groq/run.yaml b/llama_stack/templates/groq/run.yaml index 13bb65ed2..351ca74f7 100644 --- a/llama_stack/templates/groq/run.yaml +++ b/llama_stack/templates/groq/run.yaml @@ -26,7 +26,6 @@ providers: config: kvstore: type: sqlite - namespace: null db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/groq}/faiss_store.db safety: - provider_id: llama-guard @@ -39,7 +38,6 @@ providers: config: persistence_store: type: sqlite - namespace: null db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/groq}/agents_store.db responses_store: type: sqlite @@ -57,7 +55,6 @@ providers: config: kvstore: type: sqlite - namespace: null db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/groq}/meta_reference_eval.db datasetio: - provider_id: huggingface @@ -65,14 +62,12 @@ providers: config: kvstore: type: sqlite - namespace: null db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/groq}/huggingface_datasetio.db - provider_id: localfs provider_type: inline::localfs config: kvstore: type: sqlite - namespace: null db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/groq}/localfs_datasetio.db scoring: - provider_id: basic diff --git a/llama_stack/templates/hf-endpoint/run-with-safety.yaml b/llama_stack/templates/hf-endpoint/run-with-safety.yaml index b2bc6a8e9..63063ad91 100644 --- a/llama_stack/templates/hf-endpoint/run-with-safety.yaml +++ b/llama_stack/templates/hf-endpoint/run-with-safety.yaml @@ -31,7 +31,6 @@ providers: config: kvstore: type: sqlite - namespace: null db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/hf-endpoint}/faiss_store.db safety: - provider_id: llama-guard @@ -44,7 +43,6 @@ providers: config: persistence_store: type: sqlite - namespace: null db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/hf-endpoint}/agents_store.db responses_store: type: sqlite @@ -62,7 +60,6 @@ providers: config: kvstore: type: sqlite - namespace: null db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/hf-endpoint}/meta_reference_eval.db datasetio: - provider_id: huggingface @@ -70,14 +67,12 @@ providers: config: kvstore: type: sqlite - namespace: null db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/hf-endpoint}/huggingface_datasetio.db - provider_id: localfs provider_type: inline::localfs config: kvstore: type: sqlite - namespace: null db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/hf-endpoint}/localfs_datasetio.db scoring: - provider_id: basic diff --git a/llama_stack/templates/hf-endpoint/run.yaml b/llama_stack/templates/hf-endpoint/run.yaml index d62921ccc..4caf0db04 100644 --- a/llama_stack/templates/hf-endpoint/run.yaml +++ b/llama_stack/templates/hf-endpoint/run.yaml @@ -26,7 +26,6 @@ providers: config: kvstore: type: sqlite - namespace: null db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/hf-endpoint}/faiss_store.db safety: - provider_id: llama-guard @@ -39,7 +38,6 @@ providers: config: persistence_store: type: sqlite - namespace: null db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/hf-endpoint}/agents_store.db responses_store: type: sqlite @@ -57,7 +55,6 @@ providers: config: kvstore: type: sqlite - namespace: null db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/hf-endpoint}/meta_reference_eval.db datasetio: - provider_id: huggingface @@ -65,14 +62,12 @@ providers: config: kvstore: type: sqlite - namespace: null db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/hf-endpoint}/huggingface_datasetio.db - provider_id: localfs provider_type: inline::localfs config: kvstore: type: sqlite - namespace: null db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/hf-endpoint}/localfs_datasetio.db scoring: - provider_id: basic diff --git a/llama_stack/templates/hf-serverless/run-with-safety.yaml b/llama_stack/templates/hf-serverless/run-with-safety.yaml index d7ff4f446..a4bba1f76 100644 --- a/llama_stack/templates/hf-serverless/run-with-safety.yaml +++ b/llama_stack/templates/hf-serverless/run-with-safety.yaml @@ -31,7 +31,6 @@ providers: config: kvstore: type: sqlite - namespace: null db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/hf-serverless}/faiss_store.db safety: - provider_id: llama-guard @@ -44,7 +43,6 @@ providers: config: persistence_store: type: sqlite - namespace: null db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/hf-serverless}/agents_store.db responses_store: type: sqlite @@ -62,7 +60,6 @@ providers: config: kvstore: type: sqlite - namespace: null db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/hf-serverless}/meta_reference_eval.db datasetio: - provider_id: huggingface @@ -70,14 +67,12 @@ providers: config: kvstore: type: sqlite - namespace: null db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/hf-serverless}/huggingface_datasetio.db - provider_id: localfs provider_type: inline::localfs config: kvstore: type: sqlite - namespace: null db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/hf-serverless}/localfs_datasetio.db scoring: - provider_id: basic diff --git a/llama_stack/templates/hf-serverless/run.yaml b/llama_stack/templates/hf-serverless/run.yaml index 19484cba6..23e4c1f28 100644 --- a/llama_stack/templates/hf-serverless/run.yaml +++ b/llama_stack/templates/hf-serverless/run.yaml @@ -26,7 +26,6 @@ providers: config: kvstore: type: sqlite - namespace: null db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/hf-serverless}/faiss_store.db safety: - provider_id: llama-guard @@ -39,7 +38,6 @@ providers: config: persistence_store: type: sqlite - namespace: null db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/hf-serverless}/agents_store.db responses_store: type: sqlite @@ -57,7 +55,6 @@ providers: config: kvstore: type: sqlite - namespace: null db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/hf-serverless}/meta_reference_eval.db datasetio: - provider_id: huggingface @@ -65,14 +62,12 @@ providers: config: kvstore: type: sqlite - namespace: null db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/hf-serverless}/huggingface_datasetio.db - provider_id: localfs provider_type: inline::localfs config: kvstore: type: sqlite - namespace: null db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/hf-serverless}/localfs_datasetio.db scoring: - provider_id: basic diff --git a/llama_stack/templates/llama_api/run.yaml b/llama_stack/templates/llama_api/run.yaml index 3bfb284a3..77bbcfbc8 100644 --- a/llama_stack/templates/llama_api/run.yaml +++ b/llama_stack/templates/llama_api/run.yaml @@ -48,7 +48,6 @@ providers: config: persistence_store: type: sqlite - namespace: null db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/llama_api}/agents_store.db responses_store: type: sqlite @@ -66,7 +65,6 @@ providers: config: kvstore: type: sqlite - namespace: null db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/llama_api}/meta_reference_eval.db datasetio: - provider_id: huggingface @@ -74,14 +72,12 @@ providers: config: kvstore: type: sqlite - namespace: null db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/llama_api}/huggingface_datasetio.db - provider_id: localfs provider_type: inline::localfs config: kvstore: type: sqlite - namespace: null db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/llama_api}/localfs_datasetio.db scoring: - provider_id: basic diff --git a/llama_stack/templates/meta-reference-gpu/run-with-safety.yaml b/llama_stack/templates/meta-reference-gpu/run-with-safety.yaml index 46b3a33a6..2f5ee4062 100644 --- a/llama_stack/templates/meta-reference-gpu/run-with-safety.yaml +++ b/llama_stack/templates/meta-reference-gpu/run-with-safety.yaml @@ -41,7 +41,6 @@ providers: config: kvstore: type: sqlite - namespace: null db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/meta-reference-gpu}/faiss_store.db safety: - provider_id: llama-guard @@ -54,7 +53,6 @@ providers: config: persistence_store: type: sqlite - namespace: null db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/meta-reference-gpu}/agents_store.db responses_store: type: sqlite @@ -72,7 +70,6 @@ providers: config: kvstore: type: sqlite - namespace: null db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/meta-reference-gpu}/meta_reference_eval.db datasetio: - provider_id: huggingface @@ -80,14 +77,12 @@ providers: config: kvstore: type: sqlite - namespace: null db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/meta-reference-gpu}/huggingface_datasetio.db - provider_id: localfs provider_type: inline::localfs config: kvstore: type: sqlite - namespace: null db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/meta-reference-gpu}/localfs_datasetio.db scoring: - provider_id: basic diff --git a/llama_stack/templates/meta-reference-gpu/run.yaml b/llama_stack/templates/meta-reference-gpu/run.yaml index 033ec245a..cc119bf4d 100644 --- a/llama_stack/templates/meta-reference-gpu/run.yaml +++ b/llama_stack/templates/meta-reference-gpu/run.yaml @@ -31,7 +31,6 @@ providers: config: kvstore: type: sqlite - namespace: null db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/meta-reference-gpu}/faiss_store.db safety: - provider_id: llama-guard @@ -44,7 +43,6 @@ providers: config: persistence_store: type: sqlite - namespace: null db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/meta-reference-gpu}/agents_store.db responses_store: type: sqlite @@ -62,7 +60,6 @@ providers: config: kvstore: type: sqlite - namespace: null db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/meta-reference-gpu}/meta_reference_eval.db datasetio: - provider_id: huggingface @@ -70,14 +67,12 @@ providers: config: kvstore: type: sqlite - namespace: null db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/meta-reference-gpu}/huggingface_datasetio.db - provider_id: localfs provider_type: inline::localfs config: kvstore: type: sqlite - namespace: null db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/meta-reference-gpu}/localfs_datasetio.db scoring: - provider_id: basic diff --git a/llama_stack/templates/nvidia/run-with-safety.yaml b/llama_stack/templates/nvidia/run-with-safety.yaml index 73783be98..7dcfd196d 100644 --- a/llama_stack/templates/nvidia/run-with-safety.yaml +++ b/llama_stack/templates/nvidia/run-with-safety.yaml @@ -30,7 +30,6 @@ providers: config: kvstore: type: sqlite - namespace: null db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/nvidia}/faiss_store.db safety: - provider_id: nvidia @@ -44,7 +43,6 @@ providers: config: persistence_store: type: sqlite - namespace: null db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/nvidia}/agents_store.db responses_store: type: sqlite @@ -75,7 +73,6 @@ providers: config: kvstore: type: sqlite - namespace: null db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/nvidia}/localfs_datasetio.db - provider_id: nvidia provider_type: remote::nvidia diff --git a/llama_stack/templates/nvidia/run.yaml b/llama_stack/templates/nvidia/run.yaml index af9d5904a..f69270fb5 100644 --- a/llama_stack/templates/nvidia/run.yaml +++ b/llama_stack/templates/nvidia/run.yaml @@ -25,7 +25,6 @@ providers: config: kvstore: type: sqlite - namespace: null db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/nvidia}/faiss_store.db safety: - provider_id: nvidia @@ -39,7 +38,6 @@ providers: config: persistence_store: type: sqlite - namespace: null db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/nvidia}/agents_store.db responses_store: type: sqlite diff --git a/llama_stack/templates/ollama/run-with-safety.yaml b/llama_stack/templates/ollama/run-with-safety.yaml index bad51de09..98db5fc98 100644 --- a/llama_stack/templates/ollama/run-with-safety.yaml +++ b/llama_stack/templates/ollama/run-with-safety.yaml @@ -25,7 +25,6 @@ providers: config: kvstore: type: sqlite - namespace: null db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ollama}/faiss_store.db safety: - provider_id: llama-guard @@ -40,7 +39,6 @@ providers: config: persistence_store: type: sqlite - namespace: null db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ollama}/agents_store.db responses_store: type: sqlite @@ -58,7 +56,6 @@ providers: config: kvstore: type: sqlite - namespace: null db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ollama}/meta_reference_eval.db datasetio: - provider_id: huggingface @@ -66,14 +63,12 @@ providers: config: kvstore: type: sqlite - namespace: null db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ollama}/huggingface_datasetio.db - provider_id: localfs provider_type: inline::localfs config: kvstore: type: sqlite - namespace: null db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ollama}/localfs_datasetio.db scoring: - provider_id: basic diff --git a/llama_stack/templates/ollama/run.yaml b/llama_stack/templates/ollama/run.yaml index e1dea730e..38fb2bace 100644 --- a/llama_stack/templates/ollama/run.yaml +++ b/llama_stack/templates/ollama/run.yaml @@ -25,7 +25,6 @@ providers: config: kvstore: type: sqlite - namespace: null db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ollama}/faiss_store.db safety: - provider_id: llama-guard @@ -38,7 +37,6 @@ providers: config: persistence_store: type: sqlite - namespace: null db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ollama}/agents_store.db responses_store: type: sqlite @@ -56,7 +54,6 @@ providers: config: kvstore: type: sqlite - namespace: null db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ollama}/meta_reference_eval.db datasetio: - provider_id: huggingface @@ -64,14 +61,12 @@ providers: config: kvstore: type: sqlite - namespace: null db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ollama}/huggingface_datasetio.db - provider_id: localfs provider_type: inline::localfs config: kvstore: type: sqlite - namespace: null db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ollama}/localfs_datasetio.db scoring: - provider_id: basic diff --git a/llama_stack/templates/open-benchmark/run.yaml b/llama_stack/templates/open-benchmark/run.yaml index 57ae6b9be..7b1ef8f10 100644 --- a/llama_stack/templates/open-benchmark/run.yaml +++ b/llama_stack/templates/open-benchmark/run.yaml @@ -62,7 +62,6 @@ providers: config: persistence_store: type: sqlite - namespace: null db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/open-benchmark}/agents_store.db responses_store: type: sqlite @@ -80,7 +79,6 @@ providers: config: kvstore: type: sqlite - namespace: null db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/open-benchmark}/meta_reference_eval.db datasetio: - provider_id: huggingface @@ -88,14 +86,12 @@ providers: config: kvstore: type: sqlite - namespace: null db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/open-benchmark}/huggingface_datasetio.db - provider_id: localfs provider_type: inline::localfs config: kvstore: type: sqlite - namespace: null db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/open-benchmark}/localfs_datasetio.db scoring: - provider_id: basic diff --git a/llama_stack/templates/passthrough/run-with-safety.yaml b/llama_stack/templates/passthrough/run-with-safety.yaml index 7a30f665c..5cd8a2930 100644 --- a/llama_stack/templates/passthrough/run-with-safety.yaml +++ b/llama_stack/templates/passthrough/run-with-safety.yaml @@ -26,7 +26,6 @@ providers: config: kvstore: type: sqlite - namespace: null db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/passthrough}/faiss_store.db safety: - provider_id: llama-guard @@ -44,7 +43,6 @@ providers: config: persistence_store: type: sqlite - namespace: null db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/passthrough}/agents_store.db responses_store: type: sqlite @@ -62,7 +60,6 @@ providers: config: kvstore: type: sqlite - namespace: null db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/passthrough}/meta_reference_eval.db datasetio: - provider_id: huggingface @@ -70,14 +67,12 @@ providers: config: kvstore: type: sqlite - namespace: null db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/passthrough}/huggingface_datasetio.db - provider_id: localfs provider_type: inline::localfs config: kvstore: type: sqlite - namespace: null db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/passthrough}/localfs_datasetio.db scoring: - provider_id: basic diff --git a/llama_stack/templates/passthrough/run.yaml b/llama_stack/templates/passthrough/run.yaml index dc751ea20..5b6078953 100644 --- a/llama_stack/templates/passthrough/run.yaml +++ b/llama_stack/templates/passthrough/run.yaml @@ -26,7 +26,6 @@ providers: config: kvstore: type: sqlite - namespace: null db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/passthrough}/faiss_store.db safety: - provider_id: llama-guard @@ -39,7 +38,6 @@ providers: config: persistence_store: type: sqlite - namespace: null db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/passthrough}/agents_store.db responses_store: type: sqlite @@ -57,7 +55,6 @@ providers: config: kvstore: type: sqlite - namespace: null db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/passthrough}/meta_reference_eval.db datasetio: - provider_id: huggingface @@ -65,14 +62,12 @@ providers: config: kvstore: type: sqlite - namespace: null db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/passthrough}/huggingface_datasetio.db - provider_id: localfs provider_type: inline::localfs config: kvstore: type: sqlite - namespace: null db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/passthrough}/localfs_datasetio.db scoring: - provider_id: basic diff --git a/llama_stack/templates/postgres-demo/postgres_demo.py b/llama_stack/templates/postgres-demo/postgres_demo.py index 67ad35db7..ed69c22db 100644 --- a/llama_stack/templates/postgres-demo/postgres_demo.py +++ b/llama_stack/templates/postgres-demo/postgres_demo.py @@ -114,7 +114,7 @@ def get_distribution_template() -> DistributionTemplate: provider_id="meta-reference", provider_type="inline::meta-reference", config=dict( - service_name="${env.OTEL_SERVICE_NAME:=}", + service_name="${env.OTEL_SERVICE_NAME:=\u200b}", sinks="${env.TELEMETRY_SINKS:=console,otel_trace}", otel_trace_endpoint="${env.OTEL_TRACE_ENDPOINT:=http://localhost:4318/v1/traces}", ), diff --git a/llama_stack/templates/postgres-demo/run.yaml b/llama_stack/templates/postgres-demo/run.yaml index dd20cc6ac..2b6b1a64f 100644 --- a/llama_stack/templates/postgres-demo/run.yaml +++ b/llama_stack/templates/postgres-demo/run.yaml @@ -51,7 +51,7 @@ providers: - provider_id: meta-reference provider_type: inline::meta-reference config: - service_name: ${env.OTEL_SERVICE_NAME:=} + service_name: "${env.OTEL_SERVICE_NAME:=\u200B}" sinks: ${env.TELEMETRY_SINKS:=console,otel_trace} otel_trace_endpoint: ${env.OTEL_TRACE_ENDPOINT:=http://localhost:4318/v1/traces} tool_runtime: diff --git a/llama_stack/templates/remote-vllm/run-with-safety.yaml b/llama_stack/templates/remote-vllm/run-with-safety.yaml index 78fb22d38..a8d30904d 100644 --- a/llama_stack/templates/remote-vllm/run-with-safety.yaml +++ b/llama_stack/templates/remote-vllm/run-with-safety.yaml @@ -35,7 +35,6 @@ providers: config: kvstore: type: sqlite - namespace: null db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/remote-vllm}/faiss_store.db safety: - provider_id: llama-guard @@ -48,7 +47,6 @@ providers: config: persistence_store: type: sqlite - namespace: null db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/remote-vllm}/agents_store.db responses_store: type: sqlite @@ -59,7 +57,6 @@ providers: config: kvstore: type: sqlite - namespace: null db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/remote-vllm}/meta_reference_eval.db datasetio: - provider_id: huggingface @@ -67,14 +64,12 @@ providers: config: kvstore: type: sqlite - namespace: null db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/remote-vllm}/huggingface_datasetio.db - provider_id: localfs provider_type: inline::localfs config: kvstore: type: sqlite - namespace: null db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/remote-vllm}/localfs_datasetio.db scoring: - provider_id: basic diff --git a/llama_stack/templates/remote-vllm/run.yaml b/llama_stack/templates/remote-vllm/run.yaml index 1cc4596f3..58c4f867d 100644 --- a/llama_stack/templates/remote-vllm/run.yaml +++ b/llama_stack/templates/remote-vllm/run.yaml @@ -28,7 +28,6 @@ providers: config: kvstore: type: sqlite - namespace: null db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/remote-vllm}/faiss_store.db safety: - provider_id: llama-guard @@ -41,7 +40,6 @@ providers: config: persistence_store: type: sqlite - namespace: null db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/remote-vllm}/agents_store.db responses_store: type: sqlite @@ -52,7 +50,6 @@ providers: config: kvstore: type: sqlite - namespace: null db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/remote-vllm}/meta_reference_eval.db datasetio: - provider_id: huggingface @@ -60,14 +57,12 @@ providers: config: kvstore: type: sqlite - namespace: null db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/remote-vllm}/huggingface_datasetio.db - provider_id: localfs provider_type: inline::localfs config: kvstore: type: sqlite - namespace: null db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/remote-vllm}/localfs_datasetio.db scoring: - provider_id: basic diff --git a/llama_stack/templates/sambanova/run.yaml b/llama_stack/templates/sambanova/run.yaml index 6163a58b3..ab6c70ae0 100644 --- a/llama_stack/templates/sambanova/run.yaml +++ b/llama_stack/templates/sambanova/run.yaml @@ -23,7 +23,6 @@ providers: config: kvstore: type: sqlite - namespace: null db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/sambanova}/faiss_store.db - provider_id: ${env.ENABLE_CHROMADB:+chromadb} provider_type: remote::chromadb @@ -49,7 +48,6 @@ providers: config: persistence_store: type: sqlite - namespace: null db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/sambanova}/agents_store.db responses_store: type: sqlite diff --git a/llama_stack/templates/starter/run.yaml b/llama_stack/templates/starter/run.yaml index 190030690..de8d35683 100644 --- a/llama_stack/templates/starter/run.yaml +++ b/llama_stack/templates/starter/run.yaml @@ -66,7 +66,6 @@ providers: config: kvstore: type: sqlite - namespace: null db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/faiss_store.db - provider_id: ${env.ENABLE_SQLITE_VEC:+sqlite-vec} provider_type: inline::sqlite-vec @@ -78,7 +77,6 @@ providers: db_path: ${env.MILVUS_DB_PATH:=~/.llama/distributions/starter}/milvus.db kvstore: type: sqlite - namespace: null db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/milvus_registry.db - provider_id: ${env.ENABLE_CHROMADB:+chromadb} provider_type: remote::chromadb @@ -111,7 +109,6 @@ providers: config: persistence_store: type: sqlite - namespace: null db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/agents_store.db responses_store: type: sqlite @@ -129,7 +126,6 @@ providers: config: kvstore: type: sqlite - namespace: null db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/meta_reference_eval.db datasetio: - provider_id: huggingface @@ -137,14 +133,12 @@ providers: config: kvstore: type: sqlite - namespace: null db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/huggingface_datasetio.db - provider_id: localfs provider_type: inline::localfs config: kvstore: type: sqlite - namespace: null db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/localfs_datasetio.db scoring: - provider_id: basic diff --git a/llama_stack/templates/starter/starter.py b/llama_stack/templates/starter/starter.py index 7914d4298..2a982bb62 100644 --- a/llama_stack/templates/starter/starter.py +++ b/llama_stack/templates/starter/starter.py @@ -234,7 +234,6 @@ def get_distribution_template() -> DistributionTemplate: default_models = get_model_registry(available_models) - postgres_store = PostgresSqlStoreConfig.sample_run_config() return DistributionTemplate( name=name, distro_type="self_hosted", @@ -243,7 +242,7 @@ def get_distribution_template() -> DistributionTemplate: template_path=None, providers=providers, available_models_by_provider=available_models, - additional_pip_packages=postgres_store.pip_packages, + additional_pip_packages=PostgresSqlStoreConfig.pip_packages(), run_configs={ "run.yaml": RunConfigSettings( provider_overrides={ diff --git a/llama_stack/templates/template.py b/llama_stack/templates/template.py index 7badff140..dceb13c8b 100644 --- a/llama_stack/templates/template.py +++ b/llama_stack/templates/template.py @@ -15,6 +15,7 @@ from pydantic import BaseModel, Field from llama_stack.apis.datasets import DatasetPurpose from llama_stack.apis.models import ModelType from llama_stack.distribution.datatypes import ( + LLAMA_STACK_RUN_CONFIG_VERSION, Api, BenchmarkInput, BuildConfig, @@ -23,14 +24,15 @@ from llama_stack.distribution.datatypes import ( ModelInput, Provider, ShieldInput, - StackRunConfig, ToolGroupInput, ) from llama_stack.distribution.distribution import get_provider_registry from llama_stack.distribution.utils.dynamic import instantiate_class_type from llama_stack.providers.utils.inference.model_registry import ProviderModelEntry -from llama_stack.providers.utils.kvstore.config import KVStoreConfig, SqliteKVStoreConfig -from llama_stack.providers.utils.sqlstore.sqlstore import SqliteSqlStoreConfig, SqlStoreConfig +from llama_stack.providers.utils.kvstore.config import SqliteKVStoreConfig +from llama_stack.providers.utils.kvstore.config import get_pip_packages as get_kv_pip_packages +from llama_stack.providers.utils.sqlstore.sqlstore import SqliteSqlStoreConfig +from llama_stack.providers.utils.sqlstore.sqlstore import get_pip_packages as get_sql_pip_packages def get_model_registry( @@ -87,21 +89,24 @@ class RunConfigSettings(BaseModel): default_tool_groups: list[ToolGroupInput] | None = None default_datasets: list[DatasetInput] | None = None default_benchmarks: list[BenchmarkInput] | None = None - metadata_store: KVStoreConfig | None = None - inference_store: SqlStoreConfig | None = None + metadata_store: dict | None = None + inference_store: dict | None = None def run_config( self, name: str, providers: dict[str, list[str]], container_image: str | None = None, - ) -> StackRunConfig: + ) -> dict: provider_registry = get_provider_registry() provider_configs = {} for api_str, provider_types in providers.items(): if api_providers := self.provider_overrides.get(api_str): - provider_configs[api_str] = api_providers + # Convert Provider objects to dicts for YAML serialization + provider_configs[api_str] = [ + p.model_dump(exclude_none=True) if isinstance(p, Provider) else p for p in api_providers + ] continue provider_configs[api_str] = [] @@ -128,33 +133,40 @@ class RunConfigSettings(BaseModel): provider_id=provider_id, provider_type=provider_type, config=config, - ) + ).model_dump(exclude_none=True) ) # Get unique set of APIs from providers apis = sorted(providers.keys()) - return StackRunConfig( - image_name=name, - container_image=container_image, - apis=apis, - providers=provider_configs, - metadata_store=self.metadata_store + # Return a dict that matches StackRunConfig structure + return { + "version": LLAMA_STACK_RUN_CONFIG_VERSION, + "image_name": name, + "container_image": container_image, + "apis": apis, + "providers": provider_configs, + "metadata_store": self.metadata_store or SqliteKVStoreConfig.sample_run_config( __distro_dir__=f"~/.llama/distributions/{name}", db_name="registry.db", ), - inference_store=self.inference_store + "inference_store": self.inference_store or SqliteSqlStoreConfig.sample_run_config( __distro_dir__=f"~/.llama/distributions/{name}", db_name="inference_store.db", ), - models=self.default_models or [], - shields=self.default_shields or [], - tool_groups=self.default_tool_groups or [], - datasets=self.default_datasets or [], - benchmarks=self.default_benchmarks or [], - ) + "models": [m.model_dump(exclude_none=True) for m in (self.default_models or [])], + "shields": [s.model_dump(exclude_none=True) for s in (self.default_shields or [])], + "vector_dbs": [], + "datasets": [d.model_dump(exclude_none=True) for d in (self.default_datasets or [])], + "scoring_fns": [], + "benchmarks": [b.model_dump(exclude_none=True) for b in (self.default_benchmarks or [])], + "tool_groups": [t.model_dump(exclude_none=True) for t in (self.default_tool_groups or [])], + "server": { + "port": 8321, + }, + } class DistributionTemplate(BaseModel): @@ -190,10 +202,12 @@ class DistributionTemplate(BaseModel): # TODO: This is a hack to get the dependencies for internal APIs into build # We should have a better way to do this by formalizing the concept of "internal" APIs # and providers, with a way to specify dependencies for them. - if run_config_.inference_store: - additional_pip_packages.extend(run_config_.inference_store.pip_packages) - if run_config_.metadata_store: - additional_pip_packages.extend(run_config_.metadata_store.pip_packages) + + if run_config_.get("inference_store"): + additional_pip_packages.extend(get_sql_pip_packages(run_config_["inference_store"])) + + if run_config_.get("metadata_store"): + additional_pip_packages.extend(get_kv_pip_packages(run_config_["metadata_store"])) if self.additional_pip_packages: additional_pip_packages.extend(self.additional_pip_packages) @@ -286,7 +300,7 @@ class DistributionTemplate(BaseModel): run_config = settings.run_config(self.name, self.providers, self.container_image) with open(yaml_output_dir / yaml_pth, "w") as f: yaml.safe_dump( - run_config.model_dump(exclude_none=True), + {k: v for k, v in run_config.items() if v is not None}, f, sort_keys=False, ) diff --git a/llama_stack/templates/tgi/run-with-safety.yaml b/llama_stack/templates/tgi/run-with-safety.yaml index c4f9ae7ef..c19b916d5 100644 --- a/llama_stack/templates/tgi/run-with-safety.yaml +++ b/llama_stack/templates/tgi/run-with-safety.yaml @@ -26,7 +26,6 @@ providers: config: kvstore: type: sqlite - namespace: null db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/tgi}/faiss_store.db safety: - provider_id: llama-guard @@ -39,7 +38,6 @@ providers: config: persistence_store: type: sqlite - namespace: null db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/tgi}/agents_store.db responses_store: type: sqlite @@ -57,7 +55,6 @@ providers: config: kvstore: type: sqlite - namespace: null db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/tgi}/meta_reference_eval.db datasetio: - provider_id: huggingface @@ -65,14 +62,12 @@ providers: config: kvstore: type: sqlite - namespace: null db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/tgi}/huggingface_datasetio.db - provider_id: localfs provider_type: inline::localfs config: kvstore: type: sqlite - namespace: null db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/tgi}/localfs_datasetio.db scoring: - provider_id: basic diff --git a/llama_stack/templates/tgi/run.yaml b/llama_stack/templates/tgi/run.yaml index 70e5872b3..f0197d74c 100644 --- a/llama_stack/templates/tgi/run.yaml +++ b/llama_stack/templates/tgi/run.yaml @@ -25,7 +25,6 @@ providers: config: kvstore: type: sqlite - namespace: null db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/tgi}/faiss_store.db safety: - provider_id: llama-guard @@ -38,7 +37,6 @@ providers: config: persistence_store: type: sqlite - namespace: null db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/tgi}/agents_store.db responses_store: type: sqlite @@ -56,7 +54,6 @@ providers: config: kvstore: type: sqlite - namespace: null db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/tgi}/meta_reference_eval.db datasetio: - provider_id: huggingface @@ -64,14 +61,12 @@ providers: config: kvstore: type: sqlite - namespace: null db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/tgi}/huggingface_datasetio.db - provider_id: localfs provider_type: inline::localfs config: kvstore: type: sqlite - namespace: null db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/tgi}/localfs_datasetio.db scoring: - provider_id: basic diff --git a/llama_stack/templates/together/run-with-safety.yaml b/llama_stack/templates/together/run-with-safety.yaml index 14f423855..b32c9ee8d 100644 --- a/llama_stack/templates/together/run-with-safety.yaml +++ b/llama_stack/templates/together/run-with-safety.yaml @@ -26,7 +26,6 @@ providers: config: kvstore: type: sqlite - namespace: null db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/together}/faiss_store.db safety: - provider_id: llama-guard @@ -44,7 +43,6 @@ providers: config: persistence_store: type: sqlite - namespace: null db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/together}/agents_store.db responses_store: type: sqlite @@ -62,7 +60,6 @@ providers: config: kvstore: type: sqlite - namespace: null db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/together}/meta_reference_eval.db datasetio: - provider_id: huggingface @@ -70,14 +67,12 @@ providers: config: kvstore: type: sqlite - namespace: null db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/together}/huggingface_datasetio.db - provider_id: localfs provider_type: inline::localfs config: kvstore: type: sqlite - namespace: null db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/together}/localfs_datasetio.db scoring: - provider_id: basic diff --git a/llama_stack/templates/together/run.yaml b/llama_stack/templates/together/run.yaml index 38f1922c0..22c99f6cf 100644 --- a/llama_stack/templates/together/run.yaml +++ b/llama_stack/templates/together/run.yaml @@ -26,7 +26,6 @@ providers: config: kvstore: type: sqlite - namespace: null db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/together}/faiss_store.db safety: - provider_id: llama-guard @@ -39,7 +38,6 @@ providers: config: persistence_store: type: sqlite - namespace: null db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/together}/agents_store.db responses_store: type: sqlite @@ -57,7 +55,6 @@ providers: config: kvstore: type: sqlite - namespace: null db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/together}/meta_reference_eval.db datasetio: - provider_id: huggingface @@ -65,14 +62,12 @@ providers: config: kvstore: type: sqlite - namespace: null db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/together}/huggingface_datasetio.db - provider_id: localfs provider_type: inline::localfs config: kvstore: type: sqlite - namespace: null db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/together}/localfs_datasetio.db scoring: - provider_id: basic diff --git a/llama_stack/templates/vllm-gpu/run.yaml b/llama_stack/templates/vllm-gpu/run.yaml index 6854ad05c..6d122e180 100644 --- a/llama_stack/templates/vllm-gpu/run.yaml +++ b/llama_stack/templates/vllm-gpu/run.yaml @@ -30,7 +30,6 @@ providers: config: kvstore: type: sqlite - namespace: null db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/vllm-gpu}/faiss_store.db safety: - provider_id: llama-guard @@ -43,7 +42,6 @@ providers: config: persistence_store: type: sqlite - namespace: null db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/vllm-gpu}/agents_store.db responses_store: type: sqlite @@ -61,7 +59,6 @@ providers: config: kvstore: type: sqlite - namespace: null db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/vllm-gpu}/meta_reference_eval.db datasetio: - provider_id: huggingface @@ -69,14 +66,12 @@ providers: config: kvstore: type: sqlite - namespace: null db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/vllm-gpu}/huggingface_datasetio.db - provider_id: localfs provider_type: inline::localfs config: kvstore: type: sqlite - namespace: null db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/vllm-gpu}/localfs_datasetio.db scoring: - provider_id: basic diff --git a/llama_stack/templates/watsonx/run.yaml b/llama_stack/templates/watsonx/run.yaml index 8b8fc09c4..d80ee6329 100644 --- a/llama_stack/templates/watsonx/run.yaml +++ b/llama_stack/templates/watsonx/run.yaml @@ -27,7 +27,6 @@ providers: config: kvstore: type: sqlite - namespace: null db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/watsonx}/faiss_store.db safety: - provider_id: llama-guard @@ -40,7 +39,6 @@ providers: config: persistence_store: type: sqlite - namespace: null db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/watsonx}/agents_store.db responses_store: type: sqlite @@ -58,7 +56,6 @@ providers: config: kvstore: type: sqlite - namespace: null db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/watsonx}/meta_reference_eval.db datasetio: - provider_id: huggingface @@ -66,14 +63,12 @@ providers: config: kvstore: type: sqlite - namespace: null db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/watsonx}/huggingface_datasetio.db - provider_id: localfs provider_type: inline::localfs config: kvstore: type: sqlite - namespace: null db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/watsonx}/localfs_datasetio.db scoring: - provider_id: basic diff --git a/tests/integration/fixtures/common.py b/tests/integration/fixtures/common.py index 2d6092e44..ecd29484b 100644 --- a/tests/integration/fixtures/common.py +++ b/tests/integration/fixtures/common.py @@ -37,26 +37,42 @@ def is_port_available(port: int, host: str = "localhost") -> bool: def start_llama_stack_server(config_name: str) -> subprocess.Popen: """Start a llama stack server with the given config.""" cmd = ["llama", "stack", "run", config_name] - - # Start server in background - process = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True) + devnull = open(os.devnull, "w") + process = subprocess.Popen( + cmd, + stdout=devnull, # redirect stdout to devnull to prevent deadlock + stderr=devnull, # redirect stderr to devnull to prevent deadlock + text=True, + env={**os.environ, "LLAMA_STACK_LOG_FILE": "server.log"}, + ) return process -def wait_for_server_ready(base_url: str, timeout: int = 120) -> bool: +def wait_for_server_ready(base_url: str, timeout: int = 30, process: subprocess.Popen | None = None) -> bool: """Wait for the server to be ready by polling the health endpoint.""" health_url = f"{base_url}/v1/health" start_time = time.time() while time.time() - start_time < timeout: + if process and process.poll() is not None: + print(f"Server process terminated with return code: {process.returncode}") + return False + try: response = requests.get(health_url, timeout=5) if response.status_code == 200: return True except (requests.exceptions.ConnectionError, requests.exceptions.Timeout): pass + + # Print progress every 5 seconds + elapsed = time.time() - start_time + if int(elapsed) % 5 == 0 and elapsed > 0: + print(f"Waiting for server at {base_url}... ({elapsed:.1f}s elapsed)") + time.sleep(0.5) + print(f"Server failed to respond within {timeout} seconds") return False @@ -179,11 +195,12 @@ def llama_stack_client(request, provider_data): server_process = start_llama_stack_server(config_name) # Wait for server to be ready - if not wait_for_server_ready(base_url, timeout=120): + if not wait_for_server_ready(base_url, timeout=30, process=server_process): print("Server failed to start within timeout") server_process.terminate() raise RuntimeError( - f"Server failed to start within timeout. Check that config '{config_name}' exists and is valid." + f"Server failed to start within timeout. Check that config '{config_name}' exists and is valid. " + f"See server.log for details." ) print(f"Server is ready at {base_url}") @@ -227,3 +244,31 @@ def llama_stack_client(request, provider_data): def openai_client(client_with_models): base_url = f"{client_with_models.base_url}/v1/openai/v1" return OpenAI(base_url=base_url, api_key="fake") + + +@pytest.fixture(scope="session", autouse=True) +def cleanup_server_process(request): + """Cleanup server process at the end of the test session.""" + yield # Run tests + + if hasattr(request.session, "_llama_stack_server_process"): + server_process = request.session._llama_stack_server_process + if server_process: + if server_process.poll() is None: + print("Terminating llama stack server process...") + else: + print(f"Server process already terminated with return code: {server_process.returncode}") + return + try: + server_process.terminate() + server_process.wait(timeout=10) + print("Server process terminated gracefully") + except subprocess.TimeoutExpired: + print("Server process did not terminate gracefully, killing it") + server_process.kill() + server_process.wait() + print("Server process killed") + except Exception as e: + print(f"Error during server cleanup: {e}") + else: + print("Server process not found - won't be able to cleanup") diff --git a/tests/unit/fixtures.py b/tests/unit/fixtures.py index 7174d2e78..4e50c5e08 100644 --- a/tests/unit/fixtures.py +++ b/tests/unit/fixtures.py @@ -4,14 +4,14 @@ # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. -import pytest +import pytest_asyncio from llama_stack.distribution.store.registry import CachedDiskDistributionRegistry, DiskDistributionRegistry from llama_stack.providers.utils.kvstore.config import SqliteKVStoreConfig from llama_stack.providers.utils.kvstore.sqlite import SqliteKVStoreImpl -@pytest.fixture(scope="function") +@pytest_asyncio.fixture(scope="function") async def sqlite_kvstore(tmp_path): db_path = tmp_path / "test_kv.db" kvstore_config = SqliteKVStoreConfig(db_path=db_path.as_posix()) @@ -20,14 +20,14 @@ async def sqlite_kvstore(tmp_path): yield kvstore -@pytest.fixture(scope="function") +@pytest_asyncio.fixture(scope="function") async def disk_dist_registry(sqlite_kvstore): registry = DiskDistributionRegistry(sqlite_kvstore) await registry.initialize() yield registry -@pytest.fixture(scope="function") +@pytest_asyncio.fixture(scope="function") async def cached_disk_dist_registry(sqlite_kvstore): registry = CachedDiskDistributionRegistry(sqlite_kvstore) await registry.initialize() diff --git a/tests/unit/providers/agents/test_persistence_access_control.py b/tests/unit/providers/agents/test_persistence_access_control.py index d5b876a09..656d1e53c 100644 --- a/tests/unit/providers/agents/test_persistence_access_control.py +++ b/tests/unit/providers/agents/test_persistence_access_control.py @@ -9,6 +9,7 @@ from datetime import datetime from unittest.mock import patch import pytest +import pytest_asyncio from llama_stack.apis.agents import Turn from llama_stack.apis.inference import CompletionMessage, StopReason @@ -16,7 +17,7 @@ from llama_stack.distribution.datatypes import User from llama_stack.providers.inline.agents.meta_reference.persistence import AgentPersistence, AgentSessionInfo -@pytest.fixture +@pytest_asyncio.fixture async def test_setup(sqlite_kvstore): agent_persistence = AgentPersistence(agent_id="test_agent", kvstore=sqlite_kvstore, policy={}) yield agent_persistence diff --git a/tests/unit/providers/vector_io/test_sqlite_vec.py b/tests/unit/providers/vector_io/test_sqlite_vec.py index bbac717c7..5d9d92cf3 100644 --- a/tests/unit/providers/vector_io/test_sqlite_vec.py +++ b/tests/unit/providers/vector_io/test_sqlite_vec.py @@ -148,7 +148,7 @@ async def test_chunk_id_conflict(sqlite_vec_index, sample_chunks, embedding_dime assert len(chunk_ids) == len(set(chunk_ids)), "Duplicate chunk IDs detected across batches!" -@pytest.fixture(scope="session") +@pytest_asyncio.fixture(scope="session") async def sqlite_vec_adapter(sqlite_connection): config = type("Config", (object,), {"db_path": ":memory:"}) # Mock config with in-memory database adapter = SQLiteVecVectorIOAdapter(config=config, inference_api=None) diff --git a/tests/unit/server/test_access_control.py b/tests/unit/server/test_access_control.py index f9ad47b0c..af03ddacb 100644 --- a/tests/unit/server/test_access_control.py +++ b/tests/unit/server/test_access_control.py @@ -7,6 +7,7 @@ from unittest.mock import MagicMock, Mock, patch import pytest +import pytest_asyncio import yaml from pydantic import TypeAdapter, ValidationError @@ -26,7 +27,7 @@ def _return_model(model): return model -@pytest.fixture +@pytest_asyncio.fixture async def test_setup(cached_disk_dist_registry): mock_inference = Mock() mock_inference.__provider_spec__ = MagicMock() @@ -245,7 +246,7 @@ async def test_automatic_access_attributes(mock_get_authenticated_user, test_set assert model.identifier == "auto-access-model" -@pytest.fixture +@pytest_asyncio.fixture async def test_setup_with_access_policy(cached_disk_dist_registry): mock_inference = Mock() mock_inference.__provider_spec__ = MagicMock() diff --git a/tests/unit/server/test_server.py b/tests/unit/server/test_server.py new file mode 100644 index 000000000..d17d58b8a --- /dev/null +++ b/tests/unit/server/test_server.py @@ -0,0 +1,187 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + +from unittest.mock import Mock + +from fastapi import HTTPException +from openai import BadRequestError +from pydantic import ValidationError + +from llama_stack.distribution.access_control.access_control import AccessDeniedError +from llama_stack.distribution.datatypes import AuthenticationRequiredError +from llama_stack.distribution.server.server import translate_exception + + +class TestTranslateException: + """Test cases for the translate_exception function.""" + + def test_translate_access_denied_error(self): + """Test that AccessDeniedError is translated to 403 HTTP status.""" + exc = AccessDeniedError() + result = translate_exception(exc) + + assert isinstance(result, HTTPException) + assert result.status_code == 403 + assert result.detail == "Permission denied: Insufficient permissions" + + def test_translate_access_denied_error_with_context(self): + """Test that AccessDeniedError with context includes detailed information.""" + from llama_stack.distribution.datatypes import User + + # Create mock user and resource + user = User("test-user", {"roles": ["user"], "teams": ["dev"]}) + + # Create a simple mock object that implements the ProtectedResource protocol + class MockResource: + def __init__(self, type: str, identifier: str, owner=None): + self.type = type + self.identifier = identifier + self.owner = owner + + resource = MockResource("vector_db", "test-db") + + exc = AccessDeniedError("create", resource, user) + result = translate_exception(exc) + + assert isinstance(result, HTTPException) + assert result.status_code == 403 + assert "test-user" in result.detail + assert "vector_db::test-db" in result.detail + assert "create" in result.detail + assert "roles=['user']" in result.detail + assert "teams=['dev']" in result.detail + + def test_translate_permission_error(self): + """Test that PermissionError is translated to 403 HTTP status.""" + exc = PermissionError("Permission denied") + result = translate_exception(exc) + + assert isinstance(result, HTTPException) + assert result.status_code == 403 + assert result.detail == "Permission denied: Permission denied" + + def test_translate_value_error(self): + """Test that ValueError is translated to 400 HTTP status.""" + exc = ValueError("Invalid input") + result = translate_exception(exc) + + assert isinstance(result, HTTPException) + assert result.status_code == 400 + assert result.detail == "Invalid value: Invalid input" + + def test_translate_bad_request_error(self): + """Test that BadRequestError is translated to 400 HTTP status.""" + # Create a mock response for BadRequestError + mock_response = Mock() + mock_response.status_code = 400 + mock_response.headers = {} + + exc = BadRequestError("Bad request", response=mock_response, body="Bad request") + result = translate_exception(exc) + + assert isinstance(result, HTTPException) + assert result.status_code == 400 + assert result.detail == "Bad request" + + def test_translate_authentication_required_error(self): + """Test that AuthenticationRequiredError is translated to 401 HTTP status.""" + exc = AuthenticationRequiredError("Authentication required") + result = translate_exception(exc) + + assert isinstance(result, HTTPException) + assert result.status_code == 401 + assert result.detail == "Authentication required: Authentication required" + + def test_translate_timeout_error(self): + """Test that TimeoutError is translated to 504 HTTP status.""" + exc = TimeoutError("Operation timed out") + result = translate_exception(exc) + + assert isinstance(result, HTTPException) + assert result.status_code == 504 + assert result.detail == "Operation timed out: Operation timed out" + + def test_translate_asyncio_timeout_error(self): + """Test that asyncio.TimeoutError is translated to 504 HTTP status.""" + exc = TimeoutError() + result = translate_exception(exc) + + assert isinstance(result, HTTPException) + assert result.status_code == 504 + assert result.detail == "Operation timed out: " + + def test_translate_not_implemented_error(self): + """Test that NotImplementedError is translated to 501 HTTP status.""" + exc = NotImplementedError("Not implemented") + result = translate_exception(exc) + + assert isinstance(result, HTTPException) + assert result.status_code == 501 + assert result.detail == "Not implemented: Not implemented" + + def test_translate_validation_error(self): + """Test that ValidationError is translated to 400 HTTP status with proper format.""" + # Create a mock validation error using proper Pydantic error format + exc = ValidationError.from_exception_data( + "TestModel", + [ + { + "loc": ("field", "nested"), + "msg": "field required", + "type": "missing", + } + ], + ) + + result = translate_exception(exc) + + assert isinstance(result, HTTPException) + assert result.status_code == 400 + assert "errors" in result.detail + assert len(result.detail["errors"]) == 1 + assert result.detail["errors"][0]["loc"] == ["field", "nested"] + assert result.detail["errors"][0]["msg"] == "Field required" + assert result.detail["errors"][0]["type"] == "missing" + + def test_translate_generic_exception(self): + """Test that generic exceptions are translated to 500 HTTP status.""" + exc = Exception("Unexpected error") + result = translate_exception(exc) + + assert isinstance(result, HTTPException) + assert result.status_code == 500 + assert result.detail == "Internal server error: An unexpected error occurred." + + def test_translate_runtime_error(self): + """Test that RuntimeError is translated to 500 HTTP status.""" + exc = RuntimeError("Runtime error") + result = translate_exception(exc) + + assert isinstance(result, HTTPException) + assert result.status_code == 500 + assert result.detail == "Internal server error: An unexpected error occurred." + + def test_multiple_access_denied_scenarios(self): + """Test various scenarios that should result in 403 status codes.""" + # Test AccessDeniedError (uses enhanced message) + exc1 = AccessDeniedError() + result1 = translate_exception(exc1) + assert isinstance(result1, HTTPException) + assert result1.status_code == 403 + assert result1.detail == "Permission denied: Insufficient permissions" + + # Test PermissionError (uses generic message) + exc2 = PermissionError("No permission") + result2 = translate_exception(exc2) + assert isinstance(result2, HTTPException) + assert result2.status_code == 403 + assert result2.detail == "Permission denied: No permission" + + exc3 = PermissionError("Access denied") + result3 = translate_exception(exc3) + assert isinstance(result3, HTTPException) + assert result3.status_code == 403 + assert result3.detail == "Permission denied: Access denied"