mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-07-29 15:23:51 +00:00
Merge branch 'main' into add-llama-guard-4-model
This commit is contained in:
commit
bae3c766bc
72 changed files with 990 additions and 337 deletions
41
.github/workflows/integration-tests.yml
vendored
41
.github/workflows/integration-tests.yml
vendored
|
@ -25,7 +25,7 @@ jobs:
|
||||||
# Listing tests manually since some of them currently fail
|
# Listing tests manually since some of them currently fail
|
||||||
# TODO: generate matrix list from tests/integration when fixed
|
# TODO: generate matrix list from tests/integration when fixed
|
||||||
test-type: [agents, inference, datasets, inspect, scoring, post_training, providers, tool_runtime, vector_io]
|
test-type: [agents, inference, datasets, inspect, scoring, post_training, providers, tool_runtime, vector_io]
|
||||||
client-type: [library, http]
|
client-type: [library, server]
|
||||||
python-version: ["3.12", "3.13"]
|
python-version: ["3.12", "3.13"]
|
||||||
fail-fast: false # we want to run all tests regardless of failure
|
fail-fast: false # we want to run all tests regardless of failure
|
||||||
|
|
||||||
|
@ -45,39 +45,6 @@ jobs:
|
||||||
run: |
|
run: |
|
||||||
uv run llama stack build --template ollama --image-type venv
|
uv run llama stack build --template ollama --image-type venv
|
||||||
|
|
||||||
- name: Start Llama Stack server in background
|
|
||||||
if: matrix.client-type == 'http'
|
|
||||||
env:
|
|
||||||
INFERENCE_MODEL: "meta-llama/Llama-3.2-3B-Instruct"
|
|
||||||
run: |
|
|
||||||
LLAMA_STACK_LOG_FILE=server.log nohup uv run llama stack run ./llama_stack/templates/ollama/run.yaml --image-type venv --env OLLAMA_URL="http://0.0.0.0:11434" &
|
|
||||||
|
|
||||||
- name: Wait for Llama Stack server to be ready
|
|
||||||
if: matrix.client-type == 'http'
|
|
||||||
run: |
|
|
||||||
echo "Waiting for Llama Stack server..."
|
|
||||||
for i in {1..30}; do
|
|
||||||
if curl -s http://localhost:8321/v1/health | grep -q "OK"; then
|
|
||||||
echo "Llama Stack server is up!"
|
|
||||||
exit 0
|
|
||||||
fi
|
|
||||||
sleep 1
|
|
||||||
done
|
|
||||||
echo "Llama Stack server failed to start"
|
|
||||||
cat server.log
|
|
||||||
exit 1
|
|
||||||
|
|
||||||
- name: Verify Ollama status is OK
|
|
||||||
if: matrix.client-type == 'http'
|
|
||||||
run: |
|
|
||||||
echo "Verifying Ollama status..."
|
|
||||||
ollama_status=$(curl -s -L http://127.0.0.1:8321/v1/providers/ollama|jq --raw-output .health.status)
|
|
||||||
echo "Ollama status: $ollama_status"
|
|
||||||
if [ "$ollama_status" != "OK" ]; then
|
|
||||||
echo "Ollama health check failed"
|
|
||||||
exit 1
|
|
||||||
fi
|
|
||||||
|
|
||||||
- name: Check Storage and Memory Available Before Tests
|
- name: Check Storage and Memory Available Before Tests
|
||||||
if: ${{ always() }}
|
if: ${{ always() }}
|
||||||
run: |
|
run: |
|
||||||
|
@ -92,12 +59,14 @@ jobs:
|
||||||
if [ "${{ matrix.client-type }}" == "library" ]; then
|
if [ "${{ matrix.client-type }}" == "library" ]; then
|
||||||
stack_config="ollama"
|
stack_config="ollama"
|
||||||
else
|
else
|
||||||
stack_config="http://localhost:8321"
|
stack_config="server:ollama"
|
||||||
fi
|
fi
|
||||||
uv run pytest -s -v tests/integration/${{ matrix.test-type }} --stack-config=${stack_config} \
|
uv run pytest -s -v tests/integration/${{ matrix.test-type }} --stack-config=${stack_config} \
|
||||||
-k "not(builtin_tool or safety_with_image or code_interpreter or test_rag)" \
|
-k "not(builtin_tool or safety_with_image or code_interpreter or test_rag)" \
|
||||||
--text-model="meta-llama/Llama-3.2-3B-Instruct" \
|
--text-model="meta-llama/Llama-3.2-3B-Instruct" \
|
||||||
--embedding-model=all-MiniLM-L6-v2
|
--embedding-model=all-MiniLM-L6-v2 \
|
||||||
|
--color=yes \
|
||||||
|
--capture=tee-sys | tee pytest-${{ matrix.test-type }}.log
|
||||||
|
|
||||||
- name: Check Storage and Memory Available After Tests
|
- name: Check Storage and Memory Available After Tests
|
||||||
if: ${{ always() }}
|
if: ${{ always() }}
|
||||||
|
|
71
README.md
71
README.md
|
@ -35,6 +35,8 @@ pip install llama-stack-client
|
||||||
### CLI
|
### CLI
|
||||||
```bash
|
```bash
|
||||||
# Run a chat completion
|
# Run a chat completion
|
||||||
|
MODEL="Llama-4-Scout-17B-16E-Instruct"
|
||||||
|
|
||||||
llama-stack-client --endpoint http://localhost:8321 \
|
llama-stack-client --endpoint http://localhost:8321 \
|
||||||
inference chat-completion \
|
inference chat-completion \
|
||||||
--model-id meta-llama/$MODEL \
|
--model-id meta-llama/$MODEL \
|
||||||
|
@ -106,46 +108,59 @@ By reducing friction and complexity, Llama Stack empowers developers to focus on
|
||||||
|
|
||||||
### API Providers
|
### API Providers
|
||||||
Here is a list of the various API providers and available distributions that can help developers get started easily with Llama Stack.
|
Here is a list of the various API providers and available distributions that can help developers get started easily with Llama Stack.
|
||||||
|
Please checkout for [full list](https://llama-stack.readthedocs.io/en/latest/providers/index.html)
|
||||||
|
|
||||||
| **API Provider Builder** | **Environments** | **Agents** | **Inference** | **Memory** | **Safety** | **Telemetry** | **Post Training** |
|
| API Provider Builder | Environments | Agents | Inference | VectorIO | Safety | Telemetry | Post Training | Eval | DatasetIO |
|
||||||
|:------------------------:|:----------------------:|:----------:|:-------------:|:----------:|:----------:|:-------------:|:-----------------:|
|
|:-------------------:|:------------:|:------:|:---------:|:--------:|:------:|:---------:|:-------------:|:----:|:--------:|
|
||||||
| Meta Reference | Single Node | ✅ | ✅ | ✅ | ✅ | ✅ | |
|
| Meta Reference | Single Node | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ |
|
||||||
| SambaNova | Hosted | | ✅ | | ✅ | | |
|
| SambaNova | Hosted | | ✅ | | ✅ | | | | |
|
||||||
| Cerebras | Hosted | | ✅ | | | | |
|
| Cerebras | Hosted | | ✅ | | | | | | |
|
||||||
| Fireworks | Hosted | ✅ | ✅ | ✅ | | | |
|
| Fireworks | Hosted | ✅ | ✅ | ✅ | | | | | |
|
||||||
| AWS Bedrock | Hosted | | ✅ | | ✅ | | |
|
| AWS Bedrock | Hosted | | ✅ | | ✅ | | | | |
|
||||||
| Together | Hosted | ✅ | ✅ | | ✅ | | |
|
| Together | Hosted | ✅ | ✅ | | ✅ | | | | |
|
||||||
| Groq | Hosted | | ✅ | | | | |
|
| Groq | Hosted | | ✅ | | | | | | |
|
||||||
| Ollama | Single Node | | ✅ | | | | |
|
| Ollama | Single Node | | ✅ | | | | | | |
|
||||||
| TGI | Hosted and Single Node | | ✅ | | | | |
|
| TGI | Hosted/Single Node | | ✅ | | | | | | |
|
||||||
| NVIDIA NIM | Hosted and Single Node | | ✅ | | | | |
|
| NVIDIA NIM | Hosted/Single Node | | ✅ | | ✅ | | | | |
|
||||||
| Chroma | Single Node | | | ✅ | | | |
|
| ChromaDB | Hosted/Single Node | | | ✅ | | | | | |
|
||||||
| PG Vector | Single Node | | | ✅ | | | |
|
| PG Vector | Single Node | | | ✅ | | | | | |
|
||||||
| PyTorch ExecuTorch | On-device iOS | ✅ | ✅ | | | | |
|
| PyTorch ExecuTorch | On-device iOS | ✅ | ✅ | | | | | | |
|
||||||
| vLLM | Hosted and Single Node | | ✅ | | | | |
|
| vLLM | Single Node | | ✅ | | | | | | |
|
||||||
| OpenAI | Hosted | | ✅ | | | | |
|
| OpenAI | Hosted | | ✅ | | | | | | |
|
||||||
| Anthropic | Hosted | | ✅ | | | | |
|
| Anthropic | Hosted | | ✅ | | | | | | |
|
||||||
| Gemini | Hosted | | ✅ | | | | |
|
| Gemini | Hosted | | ✅ | | | | | | |
|
||||||
| watsonx | Hosted | | ✅ | | | | |
|
| WatsonX | Hosted | | ✅ | | | | | | |
|
||||||
| HuggingFace | Single Node | | | | | | ✅ |
|
| HuggingFace | Single Node | | | | | | ✅ | | ✅ |
|
||||||
| TorchTune | Single Node | | | | | | ✅ |
|
| TorchTune | Single Node | | | | | | ✅ | | |
|
||||||
| NVIDIA NEMO | Hosted | | | | | | ✅ |
|
| NVIDIA NEMO | Hosted | | ✅ | ✅ | | | ✅ | ✅ | ✅ |
|
||||||
|
| NVIDIA | Hosted | | | | | | ✅ | ✅ | ✅ |
|
||||||
|
|
||||||
|
> **Note**: Additional providers are available through external packages. See [External Providers](https://llama-stack.readthedocs.io/en/latest/providers/external.html) documentation.
|
||||||
|
|
||||||
### Distributions
|
### Distributions
|
||||||
|
|
||||||
A Llama Stack Distribution (or "distro") is a pre-configured bundle of provider implementations for each API component. Distributions make it easy to get started with a specific deployment scenario - you can begin with a local development setup (eg. ollama) and seamlessly transition to production (eg. Fireworks) without changing your application code. Here are some of the distributions we support:
|
A Llama Stack Distribution (or "distro") is a pre-configured bundle of provider implementations for each API component. Distributions make it easy to get started with a specific deployment scenario - you can begin with a local development setup (eg. ollama) and seamlessly transition to production (eg. Fireworks) without changing your application code.
|
||||||
|
Here are some of the distributions we support:
|
||||||
|
|
||||||
| **Distribution** | **Llama Stack Docker** | Start This Distribution |
|
| **Distribution** | **Llama Stack Docker** | Start This Distribution |
|
||||||
|:---------------------------------------------:|:-------------------------------------------------------------------------------------------------------------------------------------------------------------:|:------------------------------------------------------------------------------------------------------------------------:|
|
|:---------------------------------------------:|:-------------------------------------------------------------------------------------------------------------------------------------------------------------:|:------------------------------------------------------------------------------------------------------------------------:|
|
||||||
| Meta Reference | [llamastack/distribution-meta-reference-gpu](https://hub.docker.com/repository/docker/llamastack/distribution-meta-reference-gpu/general) | [Guide](https://llama-stack.readthedocs.io/en/latest/distributions/self_hosted_distro/meta-reference-gpu.html) |
|
| Meta Reference | [llamastack/distribution-meta-reference-gpu](https://hub.docker.com/repository/docker/llamastack/distribution-meta-reference-gpu/general) | [Guide](https://llama-stack.readthedocs.io/en/latest/distributions/self_hosted_distro/meta-reference-gpu.html) |
|
||||||
| SambaNova | [llamastack/distribution-sambanova](https://hub.docker.com/repository/docker/llamastack/distribution-sambanova/general) | [Guide](https://llama-stack.readthedocs.io/en/latest/distributions/self_hosted_distro/sambanova.html) |
|
| TGI | [llamastack/distribution-tgi](https://hub.docker.com/repository/docker/llamastack/distribution-tgi/general) | [Guide](https://llama-stack.readthedocs.io/en/latest/distributions/self_hosted_distro/tgi.html)
|
||||||
| Cerebras | [llamastack/distribution-cerebras](https://hub.docker.com/repository/docker/llamastack/distribution-cerebras/general) | [Guide](https://llama-stack.readthedocs.io/en/latest/distributions/self_hosted_distro/cerebras.html) |
|
| vLLM | [llamastack/distribution-remote-vllm](https://hub.docker.com/repository/docker/llamastack/distribution-remote-vllm/general) | [Guide](https://llama-stack.readthedocs.io/en/latest/distributions/self_hosted_distro/remote-vllm.html)
|
||||||
|
| Starter | [llamastack/distribution-starter](https://hub.docker.com/repository/docker/llamastack/distribution-starter/general) | |
|
||||||
|
| PostgreSQL | [llamastack/distribution-postgres-demo](https://hub.docker.com/repository/docker/llamastack/distribution-postgres-demo/general) | |
|
||||||
|
|
||||||
|
|
||||||
|
Here are the ones out of support scope but still avaiable from Dockerhub:
|
||||||
|
|
||||||
|
| **Distribution** | **Llama Stack Docker** | Start This Distribution |
|
||||||
|
|:---------------------------------------------:|:-------------------------------------------------------------------------------------------------------------------------------------------------------------:|:------------------------------------------------------------------------------------------------------------------------:|
|
||||||
| Ollama | [llamastack/distribution-ollama](https://hub.docker.com/repository/docker/llamastack/distribution-ollama/general) | [Guide](https://llama-stack.readthedocs.io/en/latest/distributions/self_hosted_distro/ollama.html) |
|
| Ollama | [llamastack/distribution-ollama](https://hub.docker.com/repository/docker/llamastack/distribution-ollama/general) | [Guide](https://llama-stack.readthedocs.io/en/latest/distributions/self_hosted_distro/ollama.html) |
|
||||||
| TGI | [llamastack/distribution-tgi](https://hub.docker.com/repository/docker/llamastack/distribution-tgi/general) | [Guide](https://llama-stack.readthedocs.io/en/latest/distributions/self_hosted_distro/tgi.html) |
|
|
||||||
| Together | [llamastack/distribution-together](https://hub.docker.com/repository/docker/llamastack/distribution-together/general) | [Guide](https://llama-stack.readthedocs.io/en/latest/distributions/self_hosted_distro/together.html) |
|
| Together | [llamastack/distribution-together](https://hub.docker.com/repository/docker/llamastack/distribution-together/general) | [Guide](https://llama-stack.readthedocs.io/en/latest/distributions/self_hosted_distro/together.html) |
|
||||||
| Fireworks | [llamastack/distribution-fireworks](https://hub.docker.com/repository/docker/llamastack/distribution-fireworks/general) | [Guide](https://llama-stack.readthedocs.io/en/latest/distributions/self_hosted_distro/fireworks.html) |
|
| Fireworks | [llamastack/distribution-fireworks](https://hub.docker.com/repository/docker/llamastack/distribution-fireworks/general) | [Guide](https://llama-stack.readthedocs.io/en/latest/distributions/self_hosted_distro/fireworks.html) |
|
||||||
| vLLM | [llamastack/distribution-remote-vllm](https://hub.docker.com/repository/docker/llamastack/distribution-remote-vllm/general) | [Guide](https://llama-stack.readthedocs.io/en/latest/distributions/self_hosted_distro/remote-vllm.html) |
|
| AWS Bedrock | [llamastack/distribution-bedrock](https://hub.docker.com/repository/docker/llamastack/distribution-bedrock/general) | [Guide](https://llama-stack.readthedocs.io/en/latest/distributions/self_hosted_distro/bedrock.html) |
|
||||||
|
| SambaNova | [llamastack/distribution-sambanova](https://hub.docker.com/repository/docker/llamastack/distribution-sambanova/general) | [Guide](https://llama-stack.readthedocs.io/en/latest/distributions/self_hosted_distro/sambanova.html) |
|
||||||
|
| Cerebras | [llamastack/distribution-cerebras](https://hub.docker.com/repository/docker/llamastack/distribution-cerebras/general) | [Guide](https://llama-stack.readthedocs.io/en/latest/distributions/self_hosted_distro/cerebras.html) | | | |
|
||||||
|
|
||||||
|
|
||||||
### Documentation
|
### Documentation
|
||||||
|
|
|
@ -17,7 +17,9 @@
|
||||||
"\n",
|
"\n",
|
||||||
"Read more about the project here: https://llama-stack.readthedocs.io/en/latest/index.html\n",
|
"Read more about the project here: https://llama-stack.readthedocs.io/en/latest/index.html\n",
|
||||||
"\n",
|
"\n",
|
||||||
"In this guide, we will showcase how you can build LLM-powered agentic applications using Llama Stack.\n"
|
"In this guide, we will showcase how you can build LLM-powered agentic applications using Llama Stack.\n",
|
||||||
|
"\n",
|
||||||
|
"**💡 Quick Start Option:** If you want a simpler and faster way to test out Llama Stack, check out the [quick_start.ipynb](quick_start.ipynb) notebook instead. It provides a streamlined experience for getting up and running in just a few steps.\n"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
|
|
@ -17,7 +17,9 @@
|
||||||
"\n",
|
"\n",
|
||||||
"Read more about the project here: https://llama-stack.readthedocs.io/en/latest/index.html\n",
|
"Read more about the project here: https://llama-stack.readthedocs.io/en/latest/index.html\n",
|
||||||
"\n",
|
"\n",
|
||||||
"In this guide, we will showcase how you can get started with using Llama 4 in Llama Stack.\n"
|
"In this guide, we will showcase how you can get started with using Llama 4 in Llama Stack.\n",
|
||||||
|
"\n",
|
||||||
|
"**💡 Quick Start Option:** If you want a simpler and faster way to test out Llama Stack, check out the [quick_start.ipynb](quick_start.ipynb) notebook instead. It provides a streamlined experience for getting up and running in just a few steps.\n"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
|
|
@ -17,7 +17,9 @@
|
||||||
"\n",
|
"\n",
|
||||||
"Read more about the project here: https://llama-stack.readthedocs.io/en/latest/index.html\n",
|
"Read more about the project here: https://llama-stack.readthedocs.io/en/latest/index.html\n",
|
||||||
"\n",
|
"\n",
|
||||||
"In this guide, we will showcase how you can get started with using Llama 4 in Llama Stack.\n"
|
"In this guide, we will showcase how you can get started with using Llama 4 in Llama Stack.\n",
|
||||||
|
"\n",
|
||||||
|
"**💡 Quick Start Option:** If you want a simpler and faster way to test out Llama Stack, check out the [quick_start.ipynb](quick_start.ipynb) notebook instead. It provides a streamlined experience for getting up and running in just a few steps.\n"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
|
367
docs/quick_start.ipynb
Normal file
367
docs/quick_start.ipynb
Normal file
|
@ -0,0 +1,367 @@
|
||||||
|
{
|
||||||
|
"cells": [
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"id": "c1e7571c",
|
||||||
|
"metadata": {
|
||||||
|
"id": "c1e7571c"
|
||||||
|
},
|
||||||
|
"source": [
|
||||||
|
"[](https://colab.research.google.com/github/meta-llama/llama-stack/blob/main/docs/getting_started.ipynb)\n",
|
||||||
|
"\n",
|
||||||
|
"# Llama Stack - Building AI Applications\n",
|
||||||
|
"\n",
|
||||||
|
"<img src=\"https://llama-stack.readthedocs.io/en/latest/_images/llama-stack.png\" alt=\"drawing\" width=\"500\"/>\n",
|
||||||
|
"\n",
|
||||||
|
"Get started with Llama Stack in minutes!\n",
|
||||||
|
"\n",
|
||||||
|
"[Llama Stack](https://github.com/meta-llama/llama-stack) is a stateful service with REST APIs to support the seamless transition of AI applications across different environments. You can build and test using a local server first and deploy to a hosted endpoint for production.\n",
|
||||||
|
"\n",
|
||||||
|
"In this guide, we'll walk through how to build a RAG application locally using Llama Stack with [Ollama](https://ollama.com/)\n",
|
||||||
|
"as the inference [provider](docs/source/providers/index.md#inference) for a Llama Model.\n"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"id": "4CV1Q19BDMVw",
|
||||||
|
"metadata": {
|
||||||
|
"id": "4CV1Q19BDMVw"
|
||||||
|
},
|
||||||
|
"source": [
|
||||||
|
"## Step 1: Install and setup"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"id": "K4AvfUAJZOeS",
|
||||||
|
"metadata": {
|
||||||
|
"id": "K4AvfUAJZOeS"
|
||||||
|
},
|
||||||
|
"source": [
|
||||||
|
"### 1.1. Install uv and test inference with Ollama\n",
|
||||||
|
"\n",
|
||||||
|
"We'll install [uv](https://docs.astral.sh/uv/) to setup the Python virtual environment, along with [colab-xterm](https://github.com/InfuseAI/colab-xterm) for running command-line tools, and [Ollama](https://ollama.com/download) as the inference provider."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"id": "7a2d7b85",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"%pip install uv llama_stack llama-stack-client\n",
|
||||||
|
"\n",
|
||||||
|
"## If running on Collab:\n",
|
||||||
|
"# !pip install colab-xterm\n",
|
||||||
|
"# %load_ext colabxterm\n",
|
||||||
|
"\n",
|
||||||
|
"!curl https://ollama.ai/install.sh | sh"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"id": "39fa584b",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"### 1.2. Test inference with Ollama"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"id": "3bf81522",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"We’ll now launch a terminal and run inference on a Llama model with Ollama to verify that the model is working correctly."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"id": "a7e8e0f1",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"## If running on Colab:\n",
|
||||||
|
"# %xterm\n",
|
||||||
|
"\n",
|
||||||
|
"## To be ran in the terminal:\n",
|
||||||
|
"# ollama serve &\n",
|
||||||
|
"# ollama run llama3.2:3b --keepalive 60m"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"id": "f3c5f243",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"If successful, you should see the model respond to a prompt.\n",
|
||||||
|
"\n",
|
||||||
|
"...\n",
|
||||||
|
"```\n",
|
||||||
|
">>> hi\n",
|
||||||
|
"Hello! How can I assist you today?\n",
|
||||||
|
"```"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"id": "oDUB7M_qe-Gs",
|
||||||
|
"metadata": {
|
||||||
|
"id": "oDUB7M_qe-Gs"
|
||||||
|
},
|
||||||
|
"source": [
|
||||||
|
"## Step 2: Run the Llama Stack server\n",
|
||||||
|
"\n",
|
||||||
|
"In this showcase, we will start a Llama Stack server that is running locally."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"id": "732eadc6",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"### 2.1. Setup the Llama Stack Server"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"id": "J2kGed0R5PSf",
|
||||||
|
"metadata": {
|
||||||
|
"colab": {
|
||||||
|
"base_uri": "https://localhost:8080/"
|
||||||
|
},
|
||||||
|
"collapsed": true,
|
||||||
|
"id": "J2kGed0R5PSf",
|
||||||
|
"outputId": "2478ea60-8d35-48a1-b011-f233831740c5"
|
||||||
|
},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"import os \n",
|
||||||
|
"import subprocess\n",
|
||||||
|
"\n",
|
||||||
|
"if \"UV_SYSTEM_PYTHON\" in os.environ:\n",
|
||||||
|
" del os.environ[\"UV_SYSTEM_PYTHON\"]\n",
|
||||||
|
"\n",
|
||||||
|
"# this command installs all the dependencies needed for the llama stack server with the ollama inference provider\n",
|
||||||
|
"!uv run --with llama-stack llama stack build --template ollama --image-type venv --image-name myvenv\n",
|
||||||
|
"\n",
|
||||||
|
"def run_llama_stack_server_background():\n",
|
||||||
|
" log_file = open(\"llama_stack_server.log\", \"w\")\n",
|
||||||
|
" process = subprocess.Popen(\n",
|
||||||
|
" f\"uv run --with llama-stack llama stack run ollama --image-type venv --image-name myvenv --env INFERENCE_MODEL=llama3.2:3b\",\n",
|
||||||
|
" shell=True,\n",
|
||||||
|
" stdout=log_file,\n",
|
||||||
|
" stderr=log_file,\n",
|
||||||
|
" text=True\n",
|
||||||
|
" )\n",
|
||||||
|
" \n",
|
||||||
|
" print(f\"Starting Llama Stack server with PID: {process.pid}\")\n",
|
||||||
|
" return process\n",
|
||||||
|
"\n",
|
||||||
|
"def wait_for_server_to_start():\n",
|
||||||
|
" import requests\n",
|
||||||
|
" from requests.exceptions import ConnectionError\n",
|
||||||
|
" import time\n",
|
||||||
|
" \n",
|
||||||
|
" url = \"http://0.0.0.0:8321/v1/health\"\n",
|
||||||
|
" max_retries = 30\n",
|
||||||
|
" retry_interval = 1\n",
|
||||||
|
" \n",
|
||||||
|
" print(\"Waiting for server to start\", end=\"\")\n",
|
||||||
|
" for _ in range(max_retries):\n",
|
||||||
|
" try:\n",
|
||||||
|
" response = requests.get(url)\n",
|
||||||
|
" if response.status_code == 200:\n",
|
||||||
|
" print(\"\\nServer is ready!\")\n",
|
||||||
|
" return True\n",
|
||||||
|
" except ConnectionError:\n",
|
||||||
|
" print(\".\", end=\"\", flush=True)\n",
|
||||||
|
" time.sleep(retry_interval)\n",
|
||||||
|
" \n",
|
||||||
|
" print(\"\\nServer failed to start after\", max_retries * retry_interval, \"seconds\")\n",
|
||||||
|
" return False\n",
|
||||||
|
"\n",
|
||||||
|
"\n",
|
||||||
|
"# use this helper if needed to kill the server \n",
|
||||||
|
"def kill_llama_stack_server():\n",
|
||||||
|
" # Kill any existing llama stack server processes\n",
|
||||||
|
" os.system(\"ps aux | grep -v grep | grep llama_stack.distribution.server.server | awk '{print $2}' | xargs kill -9\")\n"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"id": "c40e9efd",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"### 2.2. Start the Llama Stack Server"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 7,
|
||||||
|
"id": "f779283d",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "stdout",
|
||||||
|
"output_type": "stream",
|
||||||
|
"text": [
|
||||||
|
"Starting Llama Stack server with PID: 787100\n",
|
||||||
|
"Waiting for server to start\n",
|
||||||
|
"Server is ready!\n"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"server_process = run_llama_stack_server_background()\n",
|
||||||
|
"assert wait_for_server_to_start()"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"id": "28477c03",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"## Step 3: Run the demo"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 8,
|
||||||
|
"id": "7da71011",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "stdout",
|
||||||
|
"output_type": "stream",
|
||||||
|
"text": [
|
||||||
|
"rag_tool> Ingesting document: https://www.paulgraham.com/greatwork.html\n",
|
||||||
|
"prompt> How do you do great work?\n",
|
||||||
|
"\u001b[33minference> \u001b[0m\u001b[33m[k\u001b[0m\u001b[33mnowledge\u001b[0m\u001b[33m_search\u001b[0m\u001b[33m(query\u001b[0m\u001b[33m=\"\u001b[0m\u001b[33mWhat\u001b[0m\u001b[33m is\u001b[0m\u001b[33m the\u001b[0m\u001b[33m key\u001b[0m\u001b[33m to\u001b[0m\u001b[33m doing\u001b[0m\u001b[33m great\u001b[0m\u001b[33m work\u001b[0m\u001b[33m\")]\u001b[0m\u001b[97m\u001b[0m\n",
|
||||||
|
"\u001b[32mtool_execution> Tool:knowledge_search Args:{'query': 'What is the key to doing great work'}\u001b[0m\n",
|
||||||
|
"\u001b[32mtool_execution> Tool:knowledge_search Response:[TextContentItem(text='knowledge_search tool found 5 chunks:\\nBEGIN of knowledge_search tool results.\\n', type='text'), TextContentItem(text=\"Result 1:\\nDocument_id:docum\\nContent: work. Doing great work means doing something important\\nso well that you expand people's ideas of what's possible. But\\nthere's no threshold for importance. It's a matter of degree, and\\noften hard to judge at the time anyway.\\n\", type='text'), TextContentItem(text=\"Result 2:\\nDocument_id:docum\\nContent: work. Doing great work means doing something important\\nso well that you expand people's ideas of what's possible. But\\nthere's no threshold for importance. It's a matter of degree, and\\noften hard to judge at the time anyway.\\n\", type='text'), TextContentItem(text=\"Result 3:\\nDocument_id:docum\\nContent: work. Doing great work means doing something important\\nso well that you expand people's ideas of what's possible. But\\nthere's no threshold for importance. It's a matter of degree, and\\noften hard to judge at the time anyway.\\n\", type='text'), TextContentItem(text=\"Result 4:\\nDocument_id:docum\\nContent: work. Doing great work means doing something important\\nso well that you expand people's ideas of what's possible. But\\nthere's no threshold for importance. It's a matter of degree, and\\noften hard to judge at the time anyway.\\n\", type='text'), TextContentItem(text=\"Result 5:\\nDocument_id:docum\\nContent: work. Doing great work means doing something important\\nso well that you expand people's ideas of what's possible. But\\nthere's no threshold for importance. It's a matter of degree, and\\noften hard to judge at the time anyway.\\n\", type='text'), TextContentItem(text='END of knowledge_search tool results.\\n', type='text'), TextContentItem(text='The above results were retrieved to help answer the user\\'s query: \"What is the key to doing great work\". Use them as supporting information only in answering this query.\\n', type='text')]\u001b[0m\n",
|
||||||
|
"\u001b[33minference> \u001b[0m\u001b[33mDoing\u001b[0m\u001b[33m great\u001b[0m\u001b[33m work\u001b[0m\u001b[33m means\u001b[0m\u001b[33m doing\u001b[0m\u001b[33m something\u001b[0m\u001b[33m important\u001b[0m\u001b[33m so\u001b[0m\u001b[33m well\u001b[0m\u001b[33m that\u001b[0m\u001b[33m you\u001b[0m\u001b[33m expand\u001b[0m\u001b[33m people\u001b[0m\u001b[33m's\u001b[0m\u001b[33m ideas\u001b[0m\u001b[33m of\u001b[0m\u001b[33m what\u001b[0m\u001b[33m's\u001b[0m\u001b[33m possible\u001b[0m\u001b[33m.\u001b[0m\u001b[33m However\u001b[0m\u001b[33m,\u001b[0m\u001b[33m there\u001b[0m\u001b[33m's\u001b[0m\u001b[33m no\u001b[0m\u001b[33m threshold\u001b[0m\u001b[33m for\u001b[0m\u001b[33m importance\u001b[0m\u001b[33m,\u001b[0m\u001b[33m and\u001b[0m\u001b[33m it\u001b[0m\u001b[33m's\u001b[0m\u001b[33m often\u001b[0m\u001b[33m hard\u001b[0m\u001b[33m to\u001b[0m\u001b[33m judge\u001b[0m\u001b[33m at\u001b[0m\u001b[33m the\u001b[0m\u001b[33m time\u001b[0m\u001b[33m anyway\u001b[0m\u001b[33m.\u001b[0m\u001b[33m Great\u001b[0m\u001b[33m work\u001b[0m\u001b[33m is\u001b[0m\u001b[33m a\u001b[0m\u001b[33m matter\u001b[0m\u001b[33m of\u001b[0m\u001b[33m degree\u001b[0m\u001b[33m,\u001b[0m\u001b[33m and\u001b[0m\u001b[33m it\u001b[0m\u001b[33m can\u001b[0m\u001b[33m be\u001b[0m\u001b[33m difficult\u001b[0m\u001b[33m to\u001b[0m\u001b[33m determine\u001b[0m\u001b[33m whether\u001b[0m\u001b[33m someone\u001b[0m\u001b[33m has\u001b[0m\u001b[33m done\u001b[0m\u001b[33m great\u001b[0m\u001b[33m work\u001b[0m\u001b[33m until\u001b[0m\u001b[33m after\u001b[0m\u001b[33m the\u001b[0m\u001b[33m fact\u001b[0m\u001b[33m.\u001b[0m\u001b[97m\u001b[0m\n",
|
||||||
|
"\u001b[30m\u001b[0m"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"from llama_stack_client import Agent, AgentEventLogger, RAGDocument, LlamaStackClient\n",
|
||||||
|
"\n",
|
||||||
|
"vector_db_id = \"my_demo_vector_db\"\n",
|
||||||
|
"client = LlamaStackClient(base_url=\"http://0.0.0.0:8321\")\n",
|
||||||
|
"\n",
|
||||||
|
"models = client.models.list()\n",
|
||||||
|
"\n",
|
||||||
|
"# Select the first LLM and first embedding models\n",
|
||||||
|
"model_id = next(m for m in models if m.model_type == \"llm\").identifier\n",
|
||||||
|
"embedding_model_id = (\n",
|
||||||
|
" em := next(m for m in models if m.model_type == \"embedding\")\n",
|
||||||
|
").identifier\n",
|
||||||
|
"embedding_dimension = em.metadata[\"embedding_dimension\"]\n",
|
||||||
|
"\n",
|
||||||
|
"_ = client.vector_dbs.register(\n",
|
||||||
|
" vector_db_id=vector_db_id,\n",
|
||||||
|
" embedding_model=embedding_model_id,\n",
|
||||||
|
" embedding_dimension=embedding_dimension,\n",
|
||||||
|
" provider_id=\"faiss\",\n",
|
||||||
|
")\n",
|
||||||
|
"source = \"https://www.paulgraham.com/greatwork.html\"\n",
|
||||||
|
"print(\"rag_tool> Ingesting document:\", source)\n",
|
||||||
|
"document = RAGDocument(\n",
|
||||||
|
" document_id=\"document_1\",\n",
|
||||||
|
" content=source,\n",
|
||||||
|
" mime_type=\"text/html\",\n",
|
||||||
|
" metadata={},\n",
|
||||||
|
")\n",
|
||||||
|
"client.tool_runtime.rag_tool.insert(\n",
|
||||||
|
" documents=[document],\n",
|
||||||
|
" vector_db_id=vector_db_id,\n",
|
||||||
|
" chunk_size_in_tokens=50,\n",
|
||||||
|
")\n",
|
||||||
|
"agent = Agent(\n",
|
||||||
|
" client,\n",
|
||||||
|
" model=model_id,\n",
|
||||||
|
" instructions=\"You are a helpful assistant\",\n",
|
||||||
|
" tools=[\n",
|
||||||
|
" {\n",
|
||||||
|
" \"name\": \"builtin::rag/knowledge_search\",\n",
|
||||||
|
" \"args\": {\"vector_db_ids\": [vector_db_id]},\n",
|
||||||
|
" }\n",
|
||||||
|
" ],\n",
|
||||||
|
")\n",
|
||||||
|
"\n",
|
||||||
|
"prompt = \"How do you do great work?\"\n",
|
||||||
|
"print(\"prompt>\", prompt)\n",
|
||||||
|
"\n",
|
||||||
|
"response = agent.create_turn(\n",
|
||||||
|
" messages=[{\"role\": \"user\", \"content\": prompt}],\n",
|
||||||
|
" session_id=agent.create_session(\"rag_session\"),\n",
|
||||||
|
" stream=True,\n",
|
||||||
|
")\n",
|
||||||
|
"\n",
|
||||||
|
"for log in AgentEventLogger().log(response):\n",
|
||||||
|
" log.print()"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"id": "341aaadf",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"Congratulations! You've successfully built your first RAG application using Llama Stack! 🎉🥳"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"id": "e88e1185",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"## Next Steps"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"id": "bcb73600",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"Now you're ready to dive deeper into Llama Stack!\n",
|
||||||
|
"- Explore the [Detailed Tutorial](./detailed_tutorial.md).\n",
|
||||||
|
"- Try the [Getting Started Notebook](https://github.com/meta-llama/llama-stack/blob/main/docs/getting_started.ipynb).\n",
|
||||||
|
"- Browse more [Notebooks on GitHub](https://github.com/meta-llama/llama-stack/tree/main/docs/notebooks).\n",
|
||||||
|
"- Learn about Llama Stack [Concepts](../concepts/index.md).\n",
|
||||||
|
"- Discover how to [Build Llama Stacks](../distributions/index.md).\n",
|
||||||
|
"- Refer to our [References](../references/index.md) for details on the Llama CLI and Python SDK.\n",
|
||||||
|
"- Check out the [llama-stack-apps](https://github.com/meta-llama/llama-stack-apps/tree/main/examples) repository for example applications and tutorials."
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"metadata": {
|
||||||
|
"accelerator": "GPU",
|
||||||
|
"colab": {
|
||||||
|
"gpuType": "T4",
|
||||||
|
"provenance": []
|
||||||
|
},
|
||||||
|
"kernelspec": {
|
||||||
|
"display_name": "Python 3",
|
||||||
|
"language": "python",
|
||||||
|
"name": "python3"
|
||||||
|
},
|
||||||
|
"language_info": {
|
||||||
|
"codemirror_mode": {
|
||||||
|
"name": "ipython",
|
||||||
|
"version": 3
|
||||||
|
},
|
||||||
|
"file_extension": ".py",
|
||||||
|
"mimetype": "text/x-python",
|
||||||
|
"name": "python",
|
||||||
|
"nbconvert_exporter": "python",
|
||||||
|
"pygments_lexer": "ipython3",
|
||||||
|
"version": "3.10.6"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"nbformat": 4,
|
||||||
|
"nbformat_minor": 5
|
||||||
|
}
|
|
@ -77,10 +77,10 @@ Next up is the most critical part: the set of providers that the stack will use
|
||||||
```yaml
|
```yaml
|
||||||
providers:
|
providers:
|
||||||
inference:
|
inference:
|
||||||
# provider_id is a string you can choose freely
|
# provider_id is a string you can choose freely
|
||||||
- provider_id: ollama
|
- provider_id: ollama
|
||||||
# provider_type is a string that specifies the type of provider.
|
# provider_type is a string that specifies the type of provider.
|
||||||
# in this case, the provider for inference is ollama and it is run remotely (outside of the distribution)
|
# in this case, the provider for inference is ollama and it runs remotely (outside of the distribution)
|
||||||
provider_type: remote::ollama
|
provider_type: remote::ollama
|
||||||
# config is a dictionary that contains the configuration for the provider.
|
# config is a dictionary that contains the configuration for the provider.
|
||||||
# in this case, the configuration is the url of the ollama server
|
# in this case, the configuration is the url of the ollama server
|
||||||
|
@ -88,7 +88,7 @@ providers:
|
||||||
url: ${env.OLLAMA_URL:=http://localhost:11434}
|
url: ${env.OLLAMA_URL:=http://localhost:11434}
|
||||||
```
|
```
|
||||||
A few things to note:
|
A few things to note:
|
||||||
- A _provider instance_ is identified with an (id, type, configuration) triplet.
|
- A _provider instance_ is identified with an (id, type, config) triplet.
|
||||||
- The id is a string you can choose freely.
|
- The id is a string you can choose freely.
|
||||||
- You can instantiate any number of provider instances of the same type.
|
- You can instantiate any number of provider instances of the same type.
|
||||||
- The configuration dictionary is provider-specific.
|
- The configuration dictionary is provider-specific.
|
||||||
|
@ -187,7 +187,7 @@ The environment variable substitution system is type-safe:
|
||||||
|
|
||||||
## Resources
|
## Resources
|
||||||
|
|
||||||
Finally, let's look at the `models` section:
|
Let's look at the `models` section:
|
||||||
|
|
||||||
```yaml
|
```yaml
|
||||||
models:
|
models:
|
||||||
|
@ -195,8 +195,9 @@ models:
|
||||||
model_id: ${env.INFERENCE_MODEL}
|
model_id: ${env.INFERENCE_MODEL}
|
||||||
provider_id: ollama
|
provider_id: ollama
|
||||||
provider_model_id: null
|
provider_model_id: null
|
||||||
|
model_type: llm
|
||||||
```
|
```
|
||||||
A Model is an instance of a "Resource" (see [Concepts](../concepts/index)) and is associated with a specific inference provider (in this case, the provider with identifier `ollama`). This is an instance of a "pre-registered" model. While we always encourage the clients to always register models before using them, some Stack servers may come up a list of "already known and available" models.
|
A Model is an instance of a "Resource" (see [Concepts](../concepts/index)) and is associated with a specific inference provider (in this case, the provider with identifier `ollama`). This is an instance of a "pre-registered" model. While we always encourage the clients to register models before using them, some Stack servers may come up a list of "already known and available" models.
|
||||||
|
|
||||||
What's with the `provider_model_id` field? This is an identifier for the model inside the provider's model catalog. Contrast it with `model_id` which is the identifier for the same model for Llama Stack's purposes. For example, you may want to name "llama3.2:vision-11b" as "image_captioning_model" when you use it in your Stack interactions. When omitted, the server will set `provider_model_id` to be the same as `model_id`.
|
What's with the `provider_model_id` field? This is an identifier for the model inside the provider's model catalog. Contrast it with `model_id` which is the identifier for the same model for Llama Stack's purposes. For example, you may want to name "llama3.2:vision-11b" as "image_captioning_model" when you use it in your Stack interactions. When omitted, the server will set `provider_model_id` to be the same as `model_id`.
|
||||||
|
|
||||||
|
|
|
@ -8,6 +8,8 @@ environments. You can build and test using a local server first and deploy to a
|
||||||
In this guide, we'll walk through how to build a RAG application locally using Llama Stack with [Ollama](https://ollama.com/)
|
In this guide, we'll walk through how to build a RAG application locally using Llama Stack with [Ollama](https://ollama.com/)
|
||||||
as the inference [provider](../providers/inference/index) for a Llama Model.
|
as the inference [provider](../providers/inference/index) for a Llama Model.
|
||||||
|
|
||||||
|
**💡 Notebook Version:** You can also follow this quickstart guide in a Jupyter notebook format: [quick_start.ipynb](https://github.com/meta-llama/llama-stack/blob/main/docs/quick_start.ipynb)
|
||||||
|
|
||||||
#### Step 1: Install and setup
|
#### Step 1: Install and setup
|
||||||
1. Install [uv](https://docs.astral.sh/uv/)
|
1. Install [uv](https://docs.astral.sh/uv/)
|
||||||
2. Run inference on a Llama model with [Ollama](https://ollama.com/download)
|
2. Run inference on a Llama model with [Ollama](https://ollama.com/download)
|
||||||
|
|
|
@ -73,17 +73,26 @@ A number of "adapters" are available for some popular Inference and Vector Store
|
||||||
| OpenAI | Hosted |
|
| OpenAI | Hosted |
|
||||||
| Anthropic | Hosted |
|
| Anthropic | Hosted |
|
||||||
| Gemini | Hosted |
|
| Gemini | Hosted |
|
||||||
|
| WatsonX | Hosted |
|
||||||
|
|
||||||
|
**Agents API**
|
||||||
|
| **Provider** | **Environments** |
|
||||||
|
| :----: | :----: |
|
||||||
|
| Meta Reference | Single Node |
|
||||||
|
| Fireworks | Hosted |
|
||||||
|
| Together | Hosted |
|
||||||
|
| PyTorch ExecuTorch | On-device iOS |
|
||||||
|
|
||||||
**Vector IO API**
|
**Vector IO API**
|
||||||
| **Provider** | **Environments** |
|
| **Provider** | **Environments** |
|
||||||
| :----: | :----: |
|
| :----: | :----: |
|
||||||
| FAISS | Single Node |
|
| FAISS | Single Node |
|
||||||
| SQLite-Vec| Single Node |
|
| SQLite-Vec | Single Node |
|
||||||
| Chroma | Hosted and Single Node |
|
| Chroma | Hosted and Single Node |
|
||||||
| Milvus | Hosted and Single Node |
|
| Milvus | Hosted and Single Node |
|
||||||
| Postgres (PGVector) | Hosted and Single Node |
|
| Postgres (PGVector) | Hosted and Single Node |
|
||||||
| Weaviate | Hosted |
|
| Weaviate | Hosted |
|
||||||
|
| Qdrant | Hosted and Single Node |
|
||||||
|
|
||||||
**Safety API**
|
**Safety API**
|
||||||
| **Provider** | **Environments** |
|
| **Provider** | **Environments** |
|
||||||
|
@ -93,6 +102,30 @@ A number of "adapters" are available for some popular Inference and Vector Store
|
||||||
| Code Scanner | Single Node |
|
| Code Scanner | Single Node |
|
||||||
| AWS Bedrock | Hosted |
|
| AWS Bedrock | Hosted |
|
||||||
|
|
||||||
|
**Post Training API**
|
||||||
|
| **Provider** | **Environments** |
|
||||||
|
| :----: | :----: |
|
||||||
|
| Meta Reference | Single Node |
|
||||||
|
| HuggingFace | Single Node |
|
||||||
|
| TorchTune | Single Node |
|
||||||
|
| NVIDIA NEMO | Hosted |
|
||||||
|
|
||||||
|
**Eval API**
|
||||||
|
| **Provider** | **Environments** |
|
||||||
|
| :----: | :----: |
|
||||||
|
| Meta Reference | Single Node |
|
||||||
|
| NVIDIA NEMO | Hosted |
|
||||||
|
|
||||||
|
**Telemetry API**
|
||||||
|
| **Provider** | **Environments** |
|
||||||
|
| :----: | :----: |
|
||||||
|
| Meta Reference | Single Node |
|
||||||
|
|
||||||
|
**Tool Runtime API**
|
||||||
|
| **Provider** | **Environments** |
|
||||||
|
| :----: | :----: |
|
||||||
|
| Brave Search | Hosted |
|
||||||
|
| RAG Runtime | Single Node |
|
||||||
|
|
||||||
```{toctree}
|
```{toctree}
|
||||||
:hidden:
|
:hidden:
|
||||||
|
|
|
@ -16,7 +16,6 @@ Meta's reference implementation of an agent system that can use tools, access ve
|
||||||
```yaml
|
```yaml
|
||||||
persistence_store:
|
persistence_store:
|
||||||
type: sqlite
|
type: sqlite
|
||||||
namespace: null
|
|
||||||
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/dummy}/agents_store.db
|
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/dummy}/agents_store.db
|
||||||
responses_store:
|
responses_store:
|
||||||
type: sqlite
|
type: sqlite
|
||||||
|
|
|
@ -15,7 +15,6 @@ Local filesystem-based dataset I/O provider for reading and writing datasets to
|
||||||
```yaml
|
```yaml
|
||||||
kvstore:
|
kvstore:
|
||||||
type: sqlite
|
type: sqlite
|
||||||
namespace: null
|
|
||||||
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/dummy}/localfs_datasetio.db
|
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/dummy}/localfs_datasetio.db
|
||||||
|
|
||||||
```
|
```
|
||||||
|
|
|
@ -15,7 +15,6 @@ HuggingFace datasets provider for accessing and managing datasets from the Huggi
|
||||||
```yaml
|
```yaml
|
||||||
kvstore:
|
kvstore:
|
||||||
type: sqlite
|
type: sqlite
|
||||||
namespace: null
|
|
||||||
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/dummy}/huggingface_datasetio.db
|
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/dummy}/huggingface_datasetio.db
|
||||||
|
|
||||||
```
|
```
|
||||||
|
|
|
@ -15,7 +15,6 @@ Meta's reference implementation of evaluation tasks with support for multiple la
|
||||||
```yaml
|
```yaml
|
||||||
kvstore:
|
kvstore:
|
||||||
type: sqlite
|
type: sqlite
|
||||||
namespace: null
|
|
||||||
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/dummy}/meta_reference_eval.db
|
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/dummy}/meta_reference_eval.db
|
||||||
|
|
||||||
```
|
```
|
||||||
|
|
|
@ -1,9 +1,10 @@
|
||||||
# Providers Overview
|
# Providers Overview
|
||||||
|
|
||||||
The goal of Llama Stack is to build an ecosystem where users can easily swap out different implementations for the same API. Examples for these include:
|
The goal of Llama Stack is to build an ecosystem where users can easily swap out different implementations for the same API. Examples for these include:
|
||||||
- LLM inference providers (e.g., Ollama, Fireworks, Together, AWS Bedrock, Groq, Cerebras, SambaNova, vLLM, etc.),
|
- LLM inference providers (e.g., Meta Reference, Ollama, Fireworks, Together, AWS Bedrock, Groq, Cerebras, SambaNova, vLLM, OpenAI, Anthropic, Gemini, WatsonX, etc.),
|
||||||
- Vector databases (e.g., ChromaDB, Weaviate, Qdrant, Milvus, FAISS, PGVector, SQLite-Vec, etc.),
|
- Vector databases (e.g., FAISS, SQLite-Vec, ChromaDB, Weaviate, Qdrant, Milvus, PGVector, etc.),
|
||||||
- Safety providers (e.g., Meta's Llama Guard, AWS Bedrock Guardrails, etc.)
|
- Safety providers (e.g., Meta's Llama Guard, Prompt Guard, Code Scanner, AWS Bedrock Guardrails, etc.),
|
||||||
|
- Tool Runtime providers (e.g., RAG Runtime, Brave Search, etc.)
|
||||||
|
|
||||||
Providers come in two flavors:
|
Providers come in two flavors:
|
||||||
- **Remote**: the provider runs as a separate service external to the Llama Stack codebase. Llama Stack contains a small amount of adapter code.
|
- **Remote**: the provider runs as a separate service external to the Llama Stack codebase. Llama Stack contains a small amount of adapter code.
|
||||||
|
|
|
@ -44,7 +44,6 @@ more details about Faiss in general.
|
||||||
```yaml
|
```yaml
|
||||||
kvstore:
|
kvstore:
|
||||||
type: sqlite
|
type: sqlite
|
||||||
namespace: null
|
|
||||||
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/dummy}/faiss_store.db
|
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/dummy}/faiss_store.db
|
||||||
|
|
||||||
```
|
```
|
||||||
|
|
|
@ -15,7 +15,6 @@ Meta's reference implementation of a vector database.
|
||||||
```yaml
|
```yaml
|
||||||
kvstore:
|
kvstore:
|
||||||
type: sqlite
|
type: sqlite
|
||||||
namespace: null
|
|
||||||
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/dummy}/faiss_store.db
|
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/dummy}/faiss_store.db
|
||||||
|
|
||||||
```
|
```
|
||||||
|
|
|
@ -19,7 +19,6 @@ Please refer to the remote provider documentation.
|
||||||
db_path: ${env.MILVUS_DB_PATH:=~/.llama/dummy}/milvus.db
|
db_path: ${env.MILVUS_DB_PATH:=~/.llama/dummy}/milvus.db
|
||||||
kvstore:
|
kvstore:
|
||||||
type: sqlite
|
type: sqlite
|
||||||
namespace: null
|
|
||||||
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/dummy}/milvus_registry.db
|
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/dummy}/milvus_registry.db
|
||||||
|
|
||||||
```
|
```
|
||||||
|
|
|
@ -45,7 +45,7 @@
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 1,
|
"execution_count": 14,
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
|
@ -65,7 +65,7 @@
|
||||||
"from dotenv import load_dotenv\n",
|
"from dotenv import load_dotenv\n",
|
||||||
"\n",
|
"\n",
|
||||||
"load_dotenv()\n",
|
"load_dotenv()\n",
|
||||||
"BRAVE_SEARCH_API_KEY = os.environ[\"BRAVE_SEARCH_API_KEY\"]\n"
|
"TAVILY_SEARCH_API_KEY = os.environ[\"TAVILY_SEARCH_API_KEY\"]\n"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
@ -110,10 +110,17 @@
|
||||||
"from llama_stack_client import LlamaStackClient\n",
|
"from llama_stack_client import LlamaStackClient\n",
|
||||||
"from llama_stack_client.lib.agents.agent import Agent\n",
|
"from llama_stack_client.lib.agents.agent import Agent\n",
|
||||||
"from llama_stack_client.lib.agents.event_logger import EventLogger\n",
|
"from llama_stack_client.lib.agents.event_logger import EventLogger\n",
|
||||||
|
"from llama_stack_client.types import UserMessage\n",
|
||||||
|
"from typing import cast, Iterator\n",
|
||||||
"\n",
|
"\n",
|
||||||
"\n",
|
"\n",
|
||||||
"async def agent_example():\n",
|
"async def agent_example():\n",
|
||||||
" client = LlamaStackClient(base_url=f\"http://{HOST}:{PORT}\")\n",
|
" client = LlamaStackClient(\n",
|
||||||
|
" base_url=f\"http://{HOST}:{PORT}\",\n",
|
||||||
|
" provider_data={\n",
|
||||||
|
" \"tavily_search_api_key\": TAVILY_SEARCH_API_KEY,\n",
|
||||||
|
" }\n",
|
||||||
|
" )\n",
|
||||||
" agent = Agent(\n",
|
" agent = Agent(\n",
|
||||||
" client,\n",
|
" client,\n",
|
||||||
" model=MODEL_NAME,\n",
|
" model=MODEL_NAME,\n",
|
||||||
|
@ -123,13 +130,7 @@
|
||||||
" \"type\": \"greedy\",\n",
|
" \"type\": \"greedy\",\n",
|
||||||
" },\n",
|
" },\n",
|
||||||
" },\n",
|
" },\n",
|
||||||
" tools=[\n",
|
" tools=[\"builtin::websearch\"],\n",
|
||||||
" {\n",
|
|
||||||
" \"type\": \"brave_search\",\n",
|
|
||||||
" \"engine\": \"brave\",\n",
|
|
||||||
" \"api_key\": BRAVE_SEARCH_API_KEY,\n",
|
|
||||||
" }\n",
|
|
||||||
" ],\n",
|
|
||||||
" )\n",
|
" )\n",
|
||||||
" session_id = agent.create_session(\"test-session\")\n",
|
" session_id = agent.create_session(\"test-session\")\n",
|
||||||
" print(f\"Created session_id={session_id} for Agent({agent.agent_id})\")\n",
|
" print(f\"Created session_id={session_id} for Agent({agent.agent_id})\")\n",
|
||||||
|
@ -142,15 +143,13 @@
|
||||||
" for prompt in user_prompts:\n",
|
" for prompt in user_prompts:\n",
|
||||||
" response = agent.create_turn(\n",
|
" response = agent.create_turn(\n",
|
||||||
" messages=[\n",
|
" messages=[\n",
|
||||||
" {\n",
|
" UserMessage(role=\"user\", content=prompt)\n",
|
||||||
" \"role\": \"user\",\n",
|
|
||||||
" \"content\": prompt,\n",
|
|
||||||
" }\n",
|
|
||||||
" ],\n",
|
" ],\n",
|
||||||
" session_id=session_id,\n",
|
" session_id=session_id,\n",
|
||||||
|
" stream=True,\n",
|
||||||
" )\n",
|
" )\n",
|
||||||
"\n",
|
"\n",
|
||||||
" async for log in EventLogger().log(response):\n",
|
" for log in EventLogger().log(cast(Iterator, response)):\n",
|
||||||
" log.print()\n",
|
" log.print()\n",
|
||||||
"\n",
|
"\n",
|
||||||
"\n",
|
"\n",
|
||||||
|
|
|
@ -101,7 +101,7 @@ class MetricInResponse(BaseModel):
|
||||||
|
|
||||||
# This is a short term solution to allow inference API to return metrics
|
# This is a short term solution to allow inference API to return metrics
|
||||||
# The ideal way to do this is to have a way for all response types to include metrics
|
# The ideal way to do this is to have a way for all response types to include metrics
|
||||||
# and all metric events logged to the telemetry API to be inlcuded with the response
|
# and all metric events logged to the telemetry API to be included with the response
|
||||||
# To do this, we will need to augment all response types with a metrics field.
|
# To do this, we will need to augment all response types with a metrics field.
|
||||||
# We have hit a blocker from stainless SDK that prevents us from doing this.
|
# We have hit a blocker from stainless SDK that prevents us from doing this.
|
||||||
# The blocker is that if we were to augment the response types that have a data field
|
# The blocker is that if we were to augment the response types that have a data field
|
||||||
|
|
|
@ -106,4 +106,26 @@ def is_action_allowed(
|
||||||
|
|
||||||
|
|
||||||
class AccessDeniedError(RuntimeError):
|
class AccessDeniedError(RuntimeError):
|
||||||
pass
|
def __init__(self, action: str | None = None, resource: ProtectedResource | None = None, user: User | None = None):
|
||||||
|
self.action = action
|
||||||
|
self.resource = resource
|
||||||
|
self.user = user
|
||||||
|
|
||||||
|
message = _build_access_denied_message(action, resource, user)
|
||||||
|
super().__init__(message)
|
||||||
|
|
||||||
|
|
||||||
|
def _build_access_denied_message(action: str | None, resource: ProtectedResource | None, user: User | None) -> str:
|
||||||
|
"""Build detailed error message for access denied scenarios."""
|
||||||
|
if action and resource and user:
|
||||||
|
resource_info = f"{resource.type}::{resource.identifier}"
|
||||||
|
user_info = f"'{user.principal}'"
|
||||||
|
if user.attributes:
|
||||||
|
attrs = ", ".join([f"{k}={v}" for k, v in user.attributes.items()])
|
||||||
|
user_info += f" (attributes: {attrs})"
|
||||||
|
|
||||||
|
message = f"User {user_info} cannot perform action '{action}' on resource '{resource_info}'"
|
||||||
|
else:
|
||||||
|
message = "Insufficient permissions"
|
||||||
|
|
||||||
|
return message
|
||||||
|
|
|
@ -17,6 +17,7 @@ from llama_stack.distribution.distribution import (
|
||||||
builtin_automatically_routed_apis,
|
builtin_automatically_routed_apis,
|
||||||
get_provider_registry,
|
get_provider_registry,
|
||||||
)
|
)
|
||||||
|
from llama_stack.distribution.stack import replace_env_vars
|
||||||
from llama_stack.distribution.utils.config_dirs import EXTERNAL_PROVIDERS_DIR
|
from llama_stack.distribution.utils.config_dirs import EXTERNAL_PROVIDERS_DIR
|
||||||
from llama_stack.distribution.utils.dynamic import instantiate_class_type
|
from llama_stack.distribution.utils.dynamic import instantiate_class_type
|
||||||
from llama_stack.distribution.utils.prompt_for_config import prompt_for_config
|
from llama_stack.distribution.utils.prompt_for_config import prompt_for_config
|
||||||
|
@ -163,7 +164,7 @@ def upgrade_from_routing_table(
|
||||||
def parse_and_maybe_upgrade_config(config_dict: dict[str, Any]) -> StackRunConfig:
|
def parse_and_maybe_upgrade_config(config_dict: dict[str, Any]) -> StackRunConfig:
|
||||||
version = config_dict.get("version", None)
|
version = config_dict.get("version", None)
|
||||||
if version == LLAMA_STACK_RUN_CONFIG_VERSION:
|
if version == LLAMA_STACK_RUN_CONFIG_VERSION:
|
||||||
return StackRunConfig(**config_dict)
|
return StackRunConfig(**replace_env_vars(config_dict))
|
||||||
|
|
||||||
if "routing_table" in config_dict:
|
if "routing_table" in config_dict:
|
||||||
logger.info("Upgrading config...")
|
logger.info("Upgrading config...")
|
||||||
|
@ -174,4 +175,4 @@ def parse_and_maybe_upgrade_config(config_dict: dict[str, Any]) -> StackRunConfi
|
||||||
if not config_dict.get("external_providers_dir", None):
|
if not config_dict.get("external_providers_dir", None):
|
||||||
config_dict["external_providers_dir"] = EXTERNAL_PROVIDERS_DIR
|
config_dict["external_providers_dir"] = EXTERNAL_PROVIDERS_DIR
|
||||||
|
|
||||||
return StackRunConfig(**config_dict)
|
return StackRunConfig(**replace_env_vars(config_dict))
|
||||||
|
|
|
@ -175,8 +175,9 @@ class CommonRoutingTableImpl(RoutingTable):
|
||||||
return obj
|
return obj
|
||||||
|
|
||||||
async def unregister_object(self, obj: RoutableObjectWithProvider) -> None:
|
async def unregister_object(self, obj: RoutableObjectWithProvider) -> None:
|
||||||
if not is_action_allowed(self.policy, "delete", obj, get_authenticated_user()):
|
user = get_authenticated_user()
|
||||||
raise AccessDeniedError()
|
if not is_action_allowed(self.policy, "delete", obj, user):
|
||||||
|
raise AccessDeniedError("delete", obj, user)
|
||||||
await self.dist_registry.delete(obj.type, obj.identifier)
|
await self.dist_registry.delete(obj.type, obj.identifier)
|
||||||
await unregister_object_from_provider(obj, self.impls_by_provider_id[obj.provider_id])
|
await unregister_object_from_provider(obj, self.impls_by_provider_id[obj.provider_id])
|
||||||
|
|
||||||
|
@ -193,7 +194,7 @@ class CommonRoutingTableImpl(RoutingTable):
|
||||||
# If object supports access control but no attributes set, use creator's attributes
|
# If object supports access control but no attributes set, use creator's attributes
|
||||||
creator = get_authenticated_user()
|
creator = get_authenticated_user()
|
||||||
if not is_action_allowed(self.policy, "create", obj, creator):
|
if not is_action_allowed(self.policy, "create", obj, creator):
|
||||||
raise AccessDeniedError()
|
raise AccessDeniedError("create", obj, creator)
|
||||||
if creator:
|
if creator:
|
||||||
obj.owner = creator
|
obj.owner = creator
|
||||||
logger.info(f"Setting owner for {obj.type} '{obj.identifier}' to {obj.owner.principal}")
|
logger.info(f"Setting owner for {obj.type} '{obj.identifier}' to {obj.owner.principal}")
|
||||||
|
|
|
@ -9,6 +9,7 @@ import asyncio
|
||||||
import functools
|
import functools
|
||||||
import inspect
|
import inspect
|
||||||
import json
|
import json
|
||||||
|
import logging
|
||||||
import os
|
import os
|
||||||
import ssl
|
import ssl
|
||||||
import sys
|
import sys
|
||||||
|
@ -31,6 +32,7 @@ from openai import BadRequestError
|
||||||
from pydantic import BaseModel, ValidationError
|
from pydantic import BaseModel, ValidationError
|
||||||
|
|
||||||
from llama_stack.apis.common.responses import PaginatedResponse
|
from llama_stack.apis.common.responses import PaginatedResponse
|
||||||
|
from llama_stack.distribution.access_control.access_control import AccessDeniedError
|
||||||
from llama_stack.distribution.datatypes import AuthenticationRequiredError, LoggingConfig, StackRunConfig
|
from llama_stack.distribution.datatypes import AuthenticationRequiredError, LoggingConfig, StackRunConfig
|
||||||
from llama_stack.distribution.distribution import builtin_automatically_routed_apis
|
from llama_stack.distribution.distribution import builtin_automatically_routed_apis
|
||||||
from llama_stack.distribution.request_headers import PROVIDER_DATA_VAR, User, request_provider_data_context
|
from llama_stack.distribution.request_headers import PROVIDER_DATA_VAR, User, request_provider_data_context
|
||||||
|
@ -116,7 +118,7 @@ def translate_exception(exc: Exception) -> HTTPException | RequestValidationErro
|
||||||
return HTTPException(status_code=400, detail=f"Invalid value: {str(exc)}")
|
return HTTPException(status_code=400, detail=f"Invalid value: {str(exc)}")
|
||||||
elif isinstance(exc, BadRequestError):
|
elif isinstance(exc, BadRequestError):
|
||||||
return HTTPException(status_code=400, detail=str(exc))
|
return HTTPException(status_code=400, detail=str(exc))
|
||||||
elif isinstance(exc, PermissionError):
|
elif isinstance(exc, PermissionError | AccessDeniedError):
|
||||||
return HTTPException(status_code=403, detail=f"Permission denied: {str(exc)}")
|
return HTTPException(status_code=403, detail=f"Permission denied: {str(exc)}")
|
||||||
elif isinstance(exc, asyncio.TimeoutError | TimeoutError):
|
elif isinstance(exc, asyncio.TimeoutError | TimeoutError):
|
||||||
return HTTPException(status_code=504, detail=f"Operation timed out: {str(exc)}")
|
return HTTPException(status_code=504, detail=f"Operation timed out: {str(exc)}")
|
||||||
|
@ -236,7 +238,10 @@ def create_dynamic_typed_route(func: Any, method: str, route: str) -> Callable:
|
||||||
result.url = route
|
result.url = route
|
||||||
return result
|
return result
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.exception(f"Error executing endpoint {route=} {method=}")
|
if logger.isEnabledFor(logging.DEBUG):
|
||||||
|
logger.exception(f"Error executing endpoint {route=} {method=}")
|
||||||
|
else:
|
||||||
|
logger.error(f"Error executing endpoint {route=} {method=}: {str(e)}")
|
||||||
raise translate_exception(e) from e
|
raise translate_exception(e) from e
|
||||||
|
|
||||||
sig = inspect.signature(func)
|
sig = inspect.signature(func)
|
||||||
|
|
|
@ -10,11 +10,11 @@ from typing import Protocol
|
||||||
|
|
||||||
import pydantic
|
import pydantic
|
||||||
|
|
||||||
from llama_stack.distribution.datatypes import KVStoreConfig, RoutableObjectWithProvider
|
from llama_stack.distribution.datatypes import RoutableObjectWithProvider
|
||||||
from llama_stack.distribution.utils.config_dirs import DISTRIBS_BASE_DIR
|
from llama_stack.distribution.utils.config_dirs import DISTRIBS_BASE_DIR
|
||||||
from llama_stack.log import get_logger
|
from llama_stack.log import get_logger
|
||||||
from llama_stack.providers.utils.kvstore import KVStore, kvstore_impl
|
from llama_stack.providers.utils.kvstore import KVStore, kvstore_impl
|
||||||
from llama_stack.providers.utils.kvstore.config import SqliteKVStoreConfig
|
from llama_stack.providers.utils.kvstore.config import KVStoreConfig, SqliteKVStoreConfig
|
||||||
|
|
||||||
logger = get_logger(__name__, category="core")
|
logger = get_logger(__name__, category="core")
|
||||||
|
|
||||||
|
|
|
@ -53,7 +53,7 @@ class AgentPersistence:
|
||||||
identifier=name, # should this be qualified in any way?
|
identifier=name, # should this be qualified in any way?
|
||||||
)
|
)
|
||||||
if not is_action_allowed(self.policy, "create", session_info, user):
|
if not is_action_allowed(self.policy, "create", session_info, user):
|
||||||
raise AccessDeniedError()
|
raise AccessDeniedError("create", session_info, user)
|
||||||
|
|
||||||
await self.kvstore.set(
|
await self.kvstore.set(
|
||||||
key=f"session:{self.agent_id}:{session_id}",
|
key=f"session:{self.agent_id}:{session_id}",
|
||||||
|
|
|
@ -23,6 +23,7 @@ def available_providers() -> list[ProviderSpec]:
|
||||||
"pillow",
|
"pillow",
|
||||||
"pandas",
|
"pandas",
|
||||||
"scikit-learn",
|
"scikit-learn",
|
||||||
|
"mcp",
|
||||||
]
|
]
|
||||||
+ kvstore_dependencies(), # TODO make this dynamic based on the kvstore config
|
+ kvstore_dependencies(), # TODO make this dynamic based on the kvstore config
|
||||||
module="llama_stack.providers.inline.agents.meta_reference",
|
module="llama_stack.providers.inline.agents.meta_reference",
|
||||||
|
|
|
@ -36,15 +36,14 @@ class RedisKVStoreConfig(CommonConfig):
|
||||||
def url(self) -> str:
|
def url(self) -> str:
|
||||||
return f"redis://{self.host}:{self.port}"
|
return f"redis://{self.host}:{self.port}"
|
||||||
|
|
||||||
@property
|
@classmethod
|
||||||
def pip_packages(self) -> list[str]:
|
def pip_packages(cls) -> list[str]:
|
||||||
return ["redis"]
|
return ["redis"]
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def sample_run_config(cls):
|
def sample_run_config(cls):
|
||||||
return {
|
return {
|
||||||
"type": "redis",
|
"type": "redis",
|
||||||
"namespace": None,
|
|
||||||
"host": "${env.REDIS_HOST:=localhost}",
|
"host": "${env.REDIS_HOST:=localhost}",
|
||||||
"port": "${env.REDIS_PORT:=6379}",
|
"port": "${env.REDIS_PORT:=6379}",
|
||||||
}
|
}
|
||||||
|
@ -57,15 +56,14 @@ class SqliteKVStoreConfig(CommonConfig):
|
||||||
description="File path for the sqlite database",
|
description="File path for the sqlite database",
|
||||||
)
|
)
|
||||||
|
|
||||||
@property
|
@classmethod
|
||||||
def pip_packages(self) -> list[str]:
|
def pip_packages(cls) -> list[str]:
|
||||||
return ["aiosqlite"]
|
return ["aiosqlite"]
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def sample_run_config(cls, __distro_dir__: str, db_name: str = "kvstore.db"):
|
def sample_run_config(cls, __distro_dir__: str, db_name: str = "kvstore.db"):
|
||||||
return {
|
return {
|
||||||
"type": "sqlite",
|
"type": "sqlite",
|
||||||
"namespace": None,
|
|
||||||
"db_path": "${env.SQLITE_STORE_DIR:=" + __distro_dir__ + "}/" + db_name,
|
"db_path": "${env.SQLITE_STORE_DIR:=" + __distro_dir__ + "}/" + db_name,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -73,7 +71,7 @@ class SqliteKVStoreConfig(CommonConfig):
|
||||||
class PostgresKVStoreConfig(CommonConfig):
|
class PostgresKVStoreConfig(CommonConfig):
|
||||||
type: Literal[KVStoreType.postgres.value] = KVStoreType.postgres.value
|
type: Literal[KVStoreType.postgres.value] = KVStoreType.postgres.value
|
||||||
host: str = "localhost"
|
host: str = "localhost"
|
||||||
port: str = "5432"
|
port: int = 5432
|
||||||
db: str = "llamastack"
|
db: str = "llamastack"
|
||||||
user: str
|
user: str
|
||||||
password: str | None = None
|
password: str | None = None
|
||||||
|
@ -83,7 +81,6 @@ class PostgresKVStoreConfig(CommonConfig):
|
||||||
def sample_run_config(cls, table_name: str = "llamastack_kvstore", **kwargs):
|
def sample_run_config(cls, table_name: str = "llamastack_kvstore", **kwargs):
|
||||||
return {
|
return {
|
||||||
"type": "postgres",
|
"type": "postgres",
|
||||||
"namespace": None,
|
|
||||||
"host": "${env.POSTGRES_HOST:=localhost}",
|
"host": "${env.POSTGRES_HOST:=localhost}",
|
||||||
"port": "${env.POSTGRES_PORT:=5432}",
|
"port": "${env.POSTGRES_PORT:=5432}",
|
||||||
"db": "${env.POSTGRES_DB:=llamastack}",
|
"db": "${env.POSTGRES_DB:=llamastack}",
|
||||||
|
@ -108,8 +105,8 @@ class PostgresKVStoreConfig(CommonConfig):
|
||||||
raise ValueError("Table name must be less than 63 characters")
|
raise ValueError("Table name must be less than 63 characters")
|
||||||
return v
|
return v
|
||||||
|
|
||||||
@property
|
@classmethod
|
||||||
def pip_packages(self) -> list[str]:
|
def pip_packages(cls) -> list[str]:
|
||||||
return ["psycopg2-binary"]
|
return ["psycopg2-binary"]
|
||||||
|
|
||||||
|
|
||||||
|
@ -122,15 +119,14 @@ class MongoDBKVStoreConfig(CommonConfig):
|
||||||
password: str | None = None
|
password: str | None = None
|
||||||
collection_name: str = "llamastack_kvstore"
|
collection_name: str = "llamastack_kvstore"
|
||||||
|
|
||||||
@property
|
@classmethod
|
||||||
def pip_packages(self) -> list[str]:
|
def pip_packages(cls) -> list[str]:
|
||||||
return ["pymongo"]
|
return ["pymongo"]
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def sample_run_config(cls, collection_name: str = "llamastack_kvstore"):
|
def sample_run_config(cls, collection_name: str = "llamastack_kvstore"):
|
||||||
return {
|
return {
|
||||||
"type": "mongodb",
|
"type": "mongodb",
|
||||||
"namespace": None,
|
|
||||||
"host": "${env.MONGODB_HOST:=localhost}",
|
"host": "${env.MONGODB_HOST:=localhost}",
|
||||||
"port": "${env.MONGODB_PORT:=5432}",
|
"port": "${env.MONGODB_PORT:=5432}",
|
||||||
"db": "${env.MONGODB_DB}",
|
"db": "${env.MONGODB_DB}",
|
||||||
|
@ -144,3 +140,21 @@ KVStoreConfig = Annotated[
|
||||||
RedisKVStoreConfig | SqliteKVStoreConfig | PostgresKVStoreConfig | MongoDBKVStoreConfig,
|
RedisKVStoreConfig | SqliteKVStoreConfig | PostgresKVStoreConfig | MongoDBKVStoreConfig,
|
||||||
Field(discriminator="type", default=KVStoreType.sqlite.value),
|
Field(discriminator="type", default=KVStoreType.sqlite.value),
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|
||||||
|
def get_pip_packages(store_config: dict | KVStoreConfig) -> list[str]:
|
||||||
|
"""Get pip packages for KV store config, handling both dict and object cases."""
|
||||||
|
if isinstance(store_config, dict):
|
||||||
|
store_type = store_config.get("type")
|
||||||
|
if store_type == "sqlite":
|
||||||
|
return SqliteKVStoreConfig.pip_packages()
|
||||||
|
elif store_type == "postgres":
|
||||||
|
return PostgresKVStoreConfig.pip_packages()
|
||||||
|
elif store_type == "redis":
|
||||||
|
return RedisKVStoreConfig.pip_packages()
|
||||||
|
elif store_type == "mongodb":
|
||||||
|
return MongoDBKVStoreConfig.pip_packages()
|
||||||
|
else:
|
||||||
|
raise ValueError(f"Unknown KV store type: {store_type}")
|
||||||
|
else:
|
||||||
|
return store_config.pip_packages()
|
||||||
|
|
|
@ -30,8 +30,8 @@ class SqlAlchemySqlStoreConfig(BaseModel):
|
||||||
def engine_str(self) -> str: ...
|
def engine_str(self) -> str: ...
|
||||||
|
|
||||||
# TODO: move this when we have a better way to specify dependencies with internal APIs
|
# TODO: move this when we have a better way to specify dependencies with internal APIs
|
||||||
@property
|
@classmethod
|
||||||
def pip_packages(self) -> list[str]:
|
def pip_packages(cls) -> list[str]:
|
||||||
return ["sqlalchemy[asyncio]"]
|
return ["sqlalchemy[asyncio]"]
|
||||||
|
|
||||||
|
|
||||||
|
@ -48,20 +48,20 @@ class SqliteSqlStoreConfig(SqlAlchemySqlStoreConfig):
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def sample_run_config(cls, __distro_dir__: str, db_name: str = "sqlstore.db"):
|
def sample_run_config(cls, __distro_dir__: str, db_name: str = "sqlstore.db"):
|
||||||
return cls(
|
return {
|
||||||
type="sqlite",
|
"type": "sqlite",
|
||||||
db_path="${env.SQLITE_STORE_DIR:=" + __distro_dir__ + "}/" + db_name,
|
"db_path": "${env.SQLITE_STORE_DIR:=" + __distro_dir__ + "}/" + db_name,
|
||||||
)
|
}
|
||||||
|
|
||||||
@property
|
@classmethod
|
||||||
def pip_packages(self) -> list[str]:
|
def pip_packages(cls) -> list[str]:
|
||||||
return super().pip_packages + ["aiosqlite"]
|
return super().pip_packages() + ["aiosqlite"]
|
||||||
|
|
||||||
|
|
||||||
class PostgresSqlStoreConfig(SqlAlchemySqlStoreConfig):
|
class PostgresSqlStoreConfig(SqlAlchemySqlStoreConfig):
|
||||||
type: Literal["postgres"] = SqlStoreType.postgres.value
|
type: Literal["postgres"] = SqlStoreType.postgres.value
|
||||||
host: str = "localhost"
|
host: str = "localhost"
|
||||||
port: str = "5432"
|
port: int = 5432
|
||||||
db: str = "llamastack"
|
db: str = "llamastack"
|
||||||
user: str
|
user: str
|
||||||
password: str | None = None
|
password: str | None = None
|
||||||
|
@ -70,20 +70,20 @@ class PostgresSqlStoreConfig(SqlAlchemySqlStoreConfig):
|
||||||
def engine_str(self) -> str:
|
def engine_str(self) -> str:
|
||||||
return f"postgresql+asyncpg://{self.user}:{self.password}@{self.host}:{self.port}/{self.db}"
|
return f"postgresql+asyncpg://{self.user}:{self.password}@{self.host}:{self.port}/{self.db}"
|
||||||
|
|
||||||
@property
|
@classmethod
|
||||||
def pip_packages(self) -> list[str]:
|
def pip_packages(cls) -> list[str]:
|
||||||
return super().pip_packages + ["asyncpg"]
|
return super().pip_packages() + ["asyncpg"]
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def sample_run_config(cls, **kwargs):
|
def sample_run_config(cls, **kwargs):
|
||||||
return cls(
|
return {
|
||||||
type="postgres",
|
"type": "postgres",
|
||||||
host="${env.POSTGRES_HOST:=localhost}",
|
"host": "${env.POSTGRES_HOST:=localhost}",
|
||||||
port="${env.POSTGRES_PORT:=5432}",
|
"port": "${env.POSTGRES_PORT:=5432}",
|
||||||
db="${env.POSTGRES_DB:=llamastack}",
|
"db": "${env.POSTGRES_DB:=llamastack}",
|
||||||
user="${env.POSTGRES_USER:=llamastack}",
|
"user": "${env.POSTGRES_USER:=llamastack}",
|
||||||
password="${env.POSTGRES_PASSWORD:=llamastack}",
|
"password": "${env.POSTGRES_PASSWORD:=llamastack}",
|
||||||
)
|
}
|
||||||
|
|
||||||
|
|
||||||
SqlStoreConfig = Annotated[
|
SqlStoreConfig = Annotated[
|
||||||
|
@ -92,6 +92,20 @@ SqlStoreConfig = Annotated[
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|
||||||
|
def get_pip_packages(store_config: dict | SqlStoreConfig) -> list[str]:
|
||||||
|
"""Get pip packages for SQL store config, handling both dict and object cases."""
|
||||||
|
if isinstance(store_config, dict):
|
||||||
|
store_type = store_config.get("type")
|
||||||
|
if store_type == "sqlite":
|
||||||
|
return SqliteSqlStoreConfig.pip_packages()
|
||||||
|
elif store_type == "postgres":
|
||||||
|
return PostgresSqlStoreConfig.pip_packages()
|
||||||
|
else:
|
||||||
|
raise ValueError(f"Unknown SQL store type: {store_type}")
|
||||||
|
else:
|
||||||
|
return store_config.pip_packages()
|
||||||
|
|
||||||
|
|
||||||
def sqlstore_impl(config: SqlStoreConfig) -> SqlStore:
|
def sqlstore_impl(config: SqlStoreConfig) -> SqlStore:
|
||||||
if config.type in [SqlStoreType.sqlite.value, SqlStoreType.postgres.value]:
|
if config.type in [SqlStoreType.sqlite.value, SqlStoreType.postgres.value]:
|
||||||
from .sqlalchemy_sqlstore import SqlAlchemySqlStoreImpl
|
from .sqlalchemy_sqlstore import SqlAlchemySqlStoreImpl
|
||||||
|
|
|
@ -9,6 +9,11 @@ import uuid
|
||||||
|
|
||||||
|
|
||||||
def generate_chunk_id(document_id: str, chunk_text: str) -> str:
|
def generate_chunk_id(document_id: str, chunk_text: str) -> str:
|
||||||
"""Generate a unique chunk ID using a hash of document ID and chunk text."""
|
"""
|
||||||
|
Generate a unique chunk ID using a hash of the document ID and chunk text.
|
||||||
|
|
||||||
|
Note: MD5 is used only to calculate an identifier, not for security purposes.
|
||||||
|
Adding usedforsecurity=False for compatibility with FIPS environments.
|
||||||
|
"""
|
||||||
hash_input = f"{document_id}:{chunk_text}".encode()
|
hash_input = f"{document_id}:{chunk_text}".encode()
|
||||||
return str(uuid.UUID(hashlib.md5(hash_input).hexdigest()))
|
return str(uuid.UUID(hashlib.md5(hash_input, usedforsecurity=False).hexdigest()))
|
||||||
|
|
|
@ -21,7 +21,6 @@ providers:
|
||||||
config:
|
config:
|
||||||
kvstore:
|
kvstore:
|
||||||
type: sqlite
|
type: sqlite
|
||||||
namespace: null
|
|
||||||
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/bedrock}/faiss_store.db
|
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/bedrock}/faiss_store.db
|
||||||
safety:
|
safety:
|
||||||
- provider_id: bedrock
|
- provider_id: bedrock
|
||||||
|
@ -33,7 +32,6 @@ providers:
|
||||||
config:
|
config:
|
||||||
persistence_store:
|
persistence_store:
|
||||||
type: sqlite
|
type: sqlite
|
||||||
namespace: null
|
|
||||||
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/bedrock}/agents_store.db
|
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/bedrock}/agents_store.db
|
||||||
responses_store:
|
responses_store:
|
||||||
type: sqlite
|
type: sqlite
|
||||||
|
@ -51,7 +49,6 @@ providers:
|
||||||
config:
|
config:
|
||||||
kvstore:
|
kvstore:
|
||||||
type: sqlite
|
type: sqlite
|
||||||
namespace: null
|
|
||||||
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/bedrock}/meta_reference_eval.db
|
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/bedrock}/meta_reference_eval.db
|
||||||
datasetio:
|
datasetio:
|
||||||
- provider_id: huggingface
|
- provider_id: huggingface
|
||||||
|
@ -59,14 +56,12 @@ providers:
|
||||||
config:
|
config:
|
||||||
kvstore:
|
kvstore:
|
||||||
type: sqlite
|
type: sqlite
|
||||||
namespace: null
|
|
||||||
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/bedrock}/huggingface_datasetio.db
|
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/bedrock}/huggingface_datasetio.db
|
||||||
- provider_id: localfs
|
- provider_id: localfs
|
||||||
provider_type: inline::localfs
|
provider_type: inline::localfs
|
||||||
config:
|
config:
|
||||||
kvstore:
|
kvstore:
|
||||||
type: sqlite
|
type: sqlite
|
||||||
namespace: null
|
|
||||||
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/bedrock}/localfs_datasetio.db
|
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/bedrock}/localfs_datasetio.db
|
||||||
scoring:
|
scoring:
|
||||||
- provider_id: basic
|
- provider_id: basic
|
||||||
|
|
|
@ -31,7 +31,6 @@ providers:
|
||||||
config:
|
config:
|
||||||
kvstore:
|
kvstore:
|
||||||
type: sqlite
|
type: sqlite
|
||||||
namespace: null
|
|
||||||
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/cerebras}/faiss_store.db
|
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/cerebras}/faiss_store.db
|
||||||
agents:
|
agents:
|
||||||
- provider_id: meta-reference
|
- provider_id: meta-reference
|
||||||
|
@ -39,7 +38,6 @@ providers:
|
||||||
config:
|
config:
|
||||||
persistence_store:
|
persistence_store:
|
||||||
type: sqlite
|
type: sqlite
|
||||||
namespace: null
|
|
||||||
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/cerebras}/agents_store.db
|
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/cerebras}/agents_store.db
|
||||||
responses_store:
|
responses_store:
|
||||||
type: sqlite
|
type: sqlite
|
||||||
|
@ -50,7 +48,6 @@ providers:
|
||||||
config:
|
config:
|
||||||
kvstore:
|
kvstore:
|
||||||
type: sqlite
|
type: sqlite
|
||||||
namespace: null
|
|
||||||
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/cerebras}/meta_reference_eval.db
|
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/cerebras}/meta_reference_eval.db
|
||||||
datasetio:
|
datasetio:
|
||||||
- provider_id: huggingface
|
- provider_id: huggingface
|
||||||
|
@ -58,14 +55,12 @@ providers:
|
||||||
config:
|
config:
|
||||||
kvstore:
|
kvstore:
|
||||||
type: sqlite
|
type: sqlite
|
||||||
namespace: null
|
|
||||||
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/cerebras}/huggingface_datasetio.db
|
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/cerebras}/huggingface_datasetio.db
|
||||||
- provider_id: localfs
|
- provider_id: localfs
|
||||||
provider_type: inline::localfs
|
provider_type: inline::localfs
|
||||||
config:
|
config:
|
||||||
kvstore:
|
kvstore:
|
||||||
type: sqlite
|
type: sqlite
|
||||||
namespace: null
|
|
||||||
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/cerebras}/localfs_datasetio.db
|
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/cerebras}/localfs_datasetio.db
|
||||||
scoring:
|
scoring:
|
||||||
- provider_id: basic
|
- provider_id: basic
|
||||||
|
|
|
@ -36,7 +36,6 @@ providers:
|
||||||
config:
|
config:
|
||||||
persistence_store:
|
persistence_store:
|
||||||
type: sqlite
|
type: sqlite
|
||||||
namespace: null
|
|
||||||
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ci-tests}/agents_store.db
|
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ci-tests}/agents_store.db
|
||||||
responses_store:
|
responses_store:
|
||||||
type: sqlite
|
type: sqlite
|
||||||
|
@ -54,7 +53,6 @@ providers:
|
||||||
config:
|
config:
|
||||||
kvstore:
|
kvstore:
|
||||||
type: sqlite
|
type: sqlite
|
||||||
namespace: null
|
|
||||||
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ci-tests}/meta_reference_eval.db
|
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ci-tests}/meta_reference_eval.db
|
||||||
datasetio:
|
datasetio:
|
||||||
- provider_id: huggingface
|
- provider_id: huggingface
|
||||||
|
@ -62,14 +60,12 @@ providers:
|
||||||
config:
|
config:
|
||||||
kvstore:
|
kvstore:
|
||||||
type: sqlite
|
type: sqlite
|
||||||
namespace: null
|
|
||||||
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ci-tests}/huggingface_datasetio.db
|
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ci-tests}/huggingface_datasetio.db
|
||||||
- provider_id: localfs
|
- provider_id: localfs
|
||||||
provider_type: inline::localfs
|
provider_type: inline::localfs
|
||||||
config:
|
config:
|
||||||
kvstore:
|
kvstore:
|
||||||
type: sqlite
|
type: sqlite
|
||||||
namespace: null
|
|
||||||
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ci-tests}/localfs_datasetio.db
|
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ci-tests}/localfs_datasetio.db
|
||||||
scoring:
|
scoring:
|
||||||
- provider_id: basic
|
- provider_id: basic
|
||||||
|
|
|
@ -39,7 +39,6 @@ providers:
|
||||||
config:
|
config:
|
||||||
persistence_store:
|
persistence_store:
|
||||||
type: sqlite
|
type: sqlite
|
||||||
namespace: null
|
|
||||||
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/dell}/agents_store.db
|
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/dell}/agents_store.db
|
||||||
responses_store:
|
responses_store:
|
||||||
type: sqlite
|
type: sqlite
|
||||||
|
@ -57,7 +56,6 @@ providers:
|
||||||
config:
|
config:
|
||||||
kvstore:
|
kvstore:
|
||||||
type: sqlite
|
type: sqlite
|
||||||
namespace: null
|
|
||||||
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/dell}/meta_reference_eval.db
|
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/dell}/meta_reference_eval.db
|
||||||
datasetio:
|
datasetio:
|
||||||
- provider_id: huggingface
|
- provider_id: huggingface
|
||||||
|
@ -65,14 +63,12 @@ providers:
|
||||||
config:
|
config:
|
||||||
kvstore:
|
kvstore:
|
||||||
type: sqlite
|
type: sqlite
|
||||||
namespace: null
|
|
||||||
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/dell}/huggingface_datasetio.db
|
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/dell}/huggingface_datasetio.db
|
||||||
- provider_id: localfs
|
- provider_id: localfs
|
||||||
provider_type: inline::localfs
|
provider_type: inline::localfs
|
||||||
config:
|
config:
|
||||||
kvstore:
|
kvstore:
|
||||||
type: sqlite
|
type: sqlite
|
||||||
namespace: null
|
|
||||||
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/dell}/localfs_datasetio.db
|
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/dell}/localfs_datasetio.db
|
||||||
scoring:
|
scoring:
|
||||||
- provider_id: basic
|
- provider_id: basic
|
||||||
|
|
|
@ -35,7 +35,6 @@ providers:
|
||||||
config:
|
config:
|
||||||
persistence_store:
|
persistence_store:
|
||||||
type: sqlite
|
type: sqlite
|
||||||
namespace: null
|
|
||||||
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/dell}/agents_store.db
|
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/dell}/agents_store.db
|
||||||
responses_store:
|
responses_store:
|
||||||
type: sqlite
|
type: sqlite
|
||||||
|
@ -53,7 +52,6 @@ providers:
|
||||||
config:
|
config:
|
||||||
kvstore:
|
kvstore:
|
||||||
type: sqlite
|
type: sqlite
|
||||||
namespace: null
|
|
||||||
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/dell}/meta_reference_eval.db
|
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/dell}/meta_reference_eval.db
|
||||||
datasetio:
|
datasetio:
|
||||||
- provider_id: huggingface
|
- provider_id: huggingface
|
||||||
|
@ -61,14 +59,12 @@ providers:
|
||||||
config:
|
config:
|
||||||
kvstore:
|
kvstore:
|
||||||
type: sqlite
|
type: sqlite
|
||||||
namespace: null
|
|
||||||
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/dell}/huggingface_datasetio.db
|
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/dell}/huggingface_datasetio.db
|
||||||
- provider_id: localfs
|
- provider_id: localfs
|
||||||
provider_type: inline::localfs
|
provider_type: inline::localfs
|
||||||
config:
|
config:
|
||||||
kvstore:
|
kvstore:
|
||||||
type: sqlite
|
type: sqlite
|
||||||
namespace: null
|
|
||||||
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/dell}/localfs_datasetio.db
|
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/dell}/localfs_datasetio.db
|
||||||
scoring:
|
scoring:
|
||||||
- provider_id: basic
|
- provider_id: basic
|
||||||
|
|
|
@ -27,7 +27,6 @@ providers:
|
||||||
config:
|
config:
|
||||||
kvstore:
|
kvstore:
|
||||||
type: sqlite
|
type: sqlite
|
||||||
namespace: null
|
|
||||||
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/fireworks}/faiss_store.db
|
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/fireworks}/faiss_store.db
|
||||||
safety:
|
safety:
|
||||||
- provider_id: llama-guard
|
- provider_id: llama-guard
|
||||||
|
@ -45,7 +44,6 @@ providers:
|
||||||
config:
|
config:
|
||||||
persistence_store:
|
persistence_store:
|
||||||
type: sqlite
|
type: sqlite
|
||||||
namespace: null
|
|
||||||
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/fireworks}/agents_store.db
|
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/fireworks}/agents_store.db
|
||||||
responses_store:
|
responses_store:
|
||||||
type: sqlite
|
type: sqlite
|
||||||
|
@ -63,7 +61,6 @@ providers:
|
||||||
config:
|
config:
|
||||||
kvstore:
|
kvstore:
|
||||||
type: sqlite
|
type: sqlite
|
||||||
namespace: null
|
|
||||||
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/fireworks}/meta_reference_eval.db
|
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/fireworks}/meta_reference_eval.db
|
||||||
datasetio:
|
datasetio:
|
||||||
- provider_id: huggingface
|
- provider_id: huggingface
|
||||||
|
@ -71,14 +68,12 @@ providers:
|
||||||
config:
|
config:
|
||||||
kvstore:
|
kvstore:
|
||||||
type: sqlite
|
type: sqlite
|
||||||
namespace: null
|
|
||||||
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/fireworks}/huggingface_datasetio.db
|
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/fireworks}/huggingface_datasetio.db
|
||||||
- provider_id: localfs
|
- provider_id: localfs
|
||||||
provider_type: inline::localfs
|
provider_type: inline::localfs
|
||||||
config:
|
config:
|
||||||
kvstore:
|
kvstore:
|
||||||
type: sqlite
|
type: sqlite
|
||||||
namespace: null
|
|
||||||
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/fireworks}/localfs_datasetio.db
|
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/fireworks}/localfs_datasetio.db
|
||||||
scoring:
|
scoring:
|
||||||
- provider_id: basic
|
- provider_id: basic
|
||||||
|
|
|
@ -27,7 +27,6 @@ providers:
|
||||||
config:
|
config:
|
||||||
kvstore:
|
kvstore:
|
||||||
type: sqlite
|
type: sqlite
|
||||||
namespace: null
|
|
||||||
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/fireworks}/faiss_store.db
|
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/fireworks}/faiss_store.db
|
||||||
safety:
|
safety:
|
||||||
- provider_id: llama-guard
|
- provider_id: llama-guard
|
||||||
|
@ -40,7 +39,6 @@ providers:
|
||||||
config:
|
config:
|
||||||
persistence_store:
|
persistence_store:
|
||||||
type: sqlite
|
type: sqlite
|
||||||
namespace: null
|
|
||||||
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/fireworks}/agents_store.db
|
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/fireworks}/agents_store.db
|
||||||
responses_store:
|
responses_store:
|
||||||
type: sqlite
|
type: sqlite
|
||||||
|
@ -58,7 +56,6 @@ providers:
|
||||||
config:
|
config:
|
||||||
kvstore:
|
kvstore:
|
||||||
type: sqlite
|
type: sqlite
|
||||||
namespace: null
|
|
||||||
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/fireworks}/meta_reference_eval.db
|
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/fireworks}/meta_reference_eval.db
|
||||||
datasetio:
|
datasetio:
|
||||||
- provider_id: huggingface
|
- provider_id: huggingface
|
||||||
|
@ -66,14 +63,12 @@ providers:
|
||||||
config:
|
config:
|
||||||
kvstore:
|
kvstore:
|
||||||
type: sqlite
|
type: sqlite
|
||||||
namespace: null
|
|
||||||
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/fireworks}/huggingface_datasetio.db
|
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/fireworks}/huggingface_datasetio.db
|
||||||
- provider_id: localfs
|
- provider_id: localfs
|
||||||
provider_type: inline::localfs
|
provider_type: inline::localfs
|
||||||
config:
|
config:
|
||||||
kvstore:
|
kvstore:
|
||||||
type: sqlite
|
type: sqlite
|
||||||
namespace: null
|
|
||||||
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/fireworks}/localfs_datasetio.db
|
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/fireworks}/localfs_datasetio.db
|
||||||
scoring:
|
scoring:
|
||||||
- provider_id: basic
|
- provider_id: basic
|
||||||
|
|
|
@ -26,7 +26,6 @@ providers:
|
||||||
config:
|
config:
|
||||||
kvstore:
|
kvstore:
|
||||||
type: sqlite
|
type: sqlite
|
||||||
namespace: null
|
|
||||||
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/groq}/faiss_store.db
|
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/groq}/faiss_store.db
|
||||||
safety:
|
safety:
|
||||||
- provider_id: llama-guard
|
- provider_id: llama-guard
|
||||||
|
@ -39,7 +38,6 @@ providers:
|
||||||
config:
|
config:
|
||||||
persistence_store:
|
persistence_store:
|
||||||
type: sqlite
|
type: sqlite
|
||||||
namespace: null
|
|
||||||
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/groq}/agents_store.db
|
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/groq}/agents_store.db
|
||||||
responses_store:
|
responses_store:
|
||||||
type: sqlite
|
type: sqlite
|
||||||
|
@ -57,7 +55,6 @@ providers:
|
||||||
config:
|
config:
|
||||||
kvstore:
|
kvstore:
|
||||||
type: sqlite
|
type: sqlite
|
||||||
namespace: null
|
|
||||||
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/groq}/meta_reference_eval.db
|
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/groq}/meta_reference_eval.db
|
||||||
datasetio:
|
datasetio:
|
||||||
- provider_id: huggingface
|
- provider_id: huggingface
|
||||||
|
@ -65,14 +62,12 @@ providers:
|
||||||
config:
|
config:
|
||||||
kvstore:
|
kvstore:
|
||||||
type: sqlite
|
type: sqlite
|
||||||
namespace: null
|
|
||||||
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/groq}/huggingface_datasetio.db
|
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/groq}/huggingface_datasetio.db
|
||||||
- provider_id: localfs
|
- provider_id: localfs
|
||||||
provider_type: inline::localfs
|
provider_type: inline::localfs
|
||||||
config:
|
config:
|
||||||
kvstore:
|
kvstore:
|
||||||
type: sqlite
|
type: sqlite
|
||||||
namespace: null
|
|
||||||
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/groq}/localfs_datasetio.db
|
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/groq}/localfs_datasetio.db
|
||||||
scoring:
|
scoring:
|
||||||
- provider_id: basic
|
- provider_id: basic
|
||||||
|
|
|
@ -31,7 +31,6 @@ providers:
|
||||||
config:
|
config:
|
||||||
kvstore:
|
kvstore:
|
||||||
type: sqlite
|
type: sqlite
|
||||||
namespace: null
|
|
||||||
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/hf-endpoint}/faiss_store.db
|
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/hf-endpoint}/faiss_store.db
|
||||||
safety:
|
safety:
|
||||||
- provider_id: llama-guard
|
- provider_id: llama-guard
|
||||||
|
@ -44,7 +43,6 @@ providers:
|
||||||
config:
|
config:
|
||||||
persistence_store:
|
persistence_store:
|
||||||
type: sqlite
|
type: sqlite
|
||||||
namespace: null
|
|
||||||
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/hf-endpoint}/agents_store.db
|
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/hf-endpoint}/agents_store.db
|
||||||
responses_store:
|
responses_store:
|
||||||
type: sqlite
|
type: sqlite
|
||||||
|
@ -62,7 +60,6 @@ providers:
|
||||||
config:
|
config:
|
||||||
kvstore:
|
kvstore:
|
||||||
type: sqlite
|
type: sqlite
|
||||||
namespace: null
|
|
||||||
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/hf-endpoint}/meta_reference_eval.db
|
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/hf-endpoint}/meta_reference_eval.db
|
||||||
datasetio:
|
datasetio:
|
||||||
- provider_id: huggingface
|
- provider_id: huggingface
|
||||||
|
@ -70,14 +67,12 @@ providers:
|
||||||
config:
|
config:
|
||||||
kvstore:
|
kvstore:
|
||||||
type: sqlite
|
type: sqlite
|
||||||
namespace: null
|
|
||||||
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/hf-endpoint}/huggingface_datasetio.db
|
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/hf-endpoint}/huggingface_datasetio.db
|
||||||
- provider_id: localfs
|
- provider_id: localfs
|
||||||
provider_type: inline::localfs
|
provider_type: inline::localfs
|
||||||
config:
|
config:
|
||||||
kvstore:
|
kvstore:
|
||||||
type: sqlite
|
type: sqlite
|
||||||
namespace: null
|
|
||||||
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/hf-endpoint}/localfs_datasetio.db
|
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/hf-endpoint}/localfs_datasetio.db
|
||||||
scoring:
|
scoring:
|
||||||
- provider_id: basic
|
- provider_id: basic
|
||||||
|
|
|
@ -26,7 +26,6 @@ providers:
|
||||||
config:
|
config:
|
||||||
kvstore:
|
kvstore:
|
||||||
type: sqlite
|
type: sqlite
|
||||||
namespace: null
|
|
||||||
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/hf-endpoint}/faiss_store.db
|
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/hf-endpoint}/faiss_store.db
|
||||||
safety:
|
safety:
|
||||||
- provider_id: llama-guard
|
- provider_id: llama-guard
|
||||||
|
@ -39,7 +38,6 @@ providers:
|
||||||
config:
|
config:
|
||||||
persistence_store:
|
persistence_store:
|
||||||
type: sqlite
|
type: sqlite
|
||||||
namespace: null
|
|
||||||
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/hf-endpoint}/agents_store.db
|
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/hf-endpoint}/agents_store.db
|
||||||
responses_store:
|
responses_store:
|
||||||
type: sqlite
|
type: sqlite
|
||||||
|
@ -57,7 +55,6 @@ providers:
|
||||||
config:
|
config:
|
||||||
kvstore:
|
kvstore:
|
||||||
type: sqlite
|
type: sqlite
|
||||||
namespace: null
|
|
||||||
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/hf-endpoint}/meta_reference_eval.db
|
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/hf-endpoint}/meta_reference_eval.db
|
||||||
datasetio:
|
datasetio:
|
||||||
- provider_id: huggingface
|
- provider_id: huggingface
|
||||||
|
@ -65,14 +62,12 @@ providers:
|
||||||
config:
|
config:
|
||||||
kvstore:
|
kvstore:
|
||||||
type: sqlite
|
type: sqlite
|
||||||
namespace: null
|
|
||||||
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/hf-endpoint}/huggingface_datasetio.db
|
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/hf-endpoint}/huggingface_datasetio.db
|
||||||
- provider_id: localfs
|
- provider_id: localfs
|
||||||
provider_type: inline::localfs
|
provider_type: inline::localfs
|
||||||
config:
|
config:
|
||||||
kvstore:
|
kvstore:
|
||||||
type: sqlite
|
type: sqlite
|
||||||
namespace: null
|
|
||||||
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/hf-endpoint}/localfs_datasetio.db
|
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/hf-endpoint}/localfs_datasetio.db
|
||||||
scoring:
|
scoring:
|
||||||
- provider_id: basic
|
- provider_id: basic
|
||||||
|
|
|
@ -31,7 +31,6 @@ providers:
|
||||||
config:
|
config:
|
||||||
kvstore:
|
kvstore:
|
||||||
type: sqlite
|
type: sqlite
|
||||||
namespace: null
|
|
||||||
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/hf-serverless}/faiss_store.db
|
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/hf-serverless}/faiss_store.db
|
||||||
safety:
|
safety:
|
||||||
- provider_id: llama-guard
|
- provider_id: llama-guard
|
||||||
|
@ -44,7 +43,6 @@ providers:
|
||||||
config:
|
config:
|
||||||
persistence_store:
|
persistence_store:
|
||||||
type: sqlite
|
type: sqlite
|
||||||
namespace: null
|
|
||||||
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/hf-serverless}/agents_store.db
|
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/hf-serverless}/agents_store.db
|
||||||
responses_store:
|
responses_store:
|
||||||
type: sqlite
|
type: sqlite
|
||||||
|
@ -62,7 +60,6 @@ providers:
|
||||||
config:
|
config:
|
||||||
kvstore:
|
kvstore:
|
||||||
type: sqlite
|
type: sqlite
|
||||||
namespace: null
|
|
||||||
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/hf-serverless}/meta_reference_eval.db
|
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/hf-serverless}/meta_reference_eval.db
|
||||||
datasetio:
|
datasetio:
|
||||||
- provider_id: huggingface
|
- provider_id: huggingface
|
||||||
|
@ -70,14 +67,12 @@ providers:
|
||||||
config:
|
config:
|
||||||
kvstore:
|
kvstore:
|
||||||
type: sqlite
|
type: sqlite
|
||||||
namespace: null
|
|
||||||
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/hf-serverless}/huggingface_datasetio.db
|
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/hf-serverless}/huggingface_datasetio.db
|
||||||
- provider_id: localfs
|
- provider_id: localfs
|
||||||
provider_type: inline::localfs
|
provider_type: inline::localfs
|
||||||
config:
|
config:
|
||||||
kvstore:
|
kvstore:
|
||||||
type: sqlite
|
type: sqlite
|
||||||
namespace: null
|
|
||||||
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/hf-serverless}/localfs_datasetio.db
|
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/hf-serverless}/localfs_datasetio.db
|
||||||
scoring:
|
scoring:
|
||||||
- provider_id: basic
|
- provider_id: basic
|
||||||
|
|
|
@ -26,7 +26,6 @@ providers:
|
||||||
config:
|
config:
|
||||||
kvstore:
|
kvstore:
|
||||||
type: sqlite
|
type: sqlite
|
||||||
namespace: null
|
|
||||||
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/hf-serverless}/faiss_store.db
|
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/hf-serverless}/faiss_store.db
|
||||||
safety:
|
safety:
|
||||||
- provider_id: llama-guard
|
- provider_id: llama-guard
|
||||||
|
@ -39,7 +38,6 @@ providers:
|
||||||
config:
|
config:
|
||||||
persistence_store:
|
persistence_store:
|
||||||
type: sqlite
|
type: sqlite
|
||||||
namespace: null
|
|
||||||
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/hf-serverless}/agents_store.db
|
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/hf-serverless}/agents_store.db
|
||||||
responses_store:
|
responses_store:
|
||||||
type: sqlite
|
type: sqlite
|
||||||
|
@ -57,7 +55,6 @@ providers:
|
||||||
config:
|
config:
|
||||||
kvstore:
|
kvstore:
|
||||||
type: sqlite
|
type: sqlite
|
||||||
namespace: null
|
|
||||||
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/hf-serverless}/meta_reference_eval.db
|
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/hf-serverless}/meta_reference_eval.db
|
||||||
datasetio:
|
datasetio:
|
||||||
- provider_id: huggingface
|
- provider_id: huggingface
|
||||||
|
@ -65,14 +62,12 @@ providers:
|
||||||
config:
|
config:
|
||||||
kvstore:
|
kvstore:
|
||||||
type: sqlite
|
type: sqlite
|
||||||
namespace: null
|
|
||||||
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/hf-serverless}/huggingface_datasetio.db
|
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/hf-serverless}/huggingface_datasetio.db
|
||||||
- provider_id: localfs
|
- provider_id: localfs
|
||||||
provider_type: inline::localfs
|
provider_type: inline::localfs
|
||||||
config:
|
config:
|
||||||
kvstore:
|
kvstore:
|
||||||
type: sqlite
|
type: sqlite
|
||||||
namespace: null
|
|
||||||
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/hf-serverless}/localfs_datasetio.db
|
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/hf-serverless}/localfs_datasetio.db
|
||||||
scoring:
|
scoring:
|
||||||
- provider_id: basic
|
- provider_id: basic
|
||||||
|
|
|
@ -48,7 +48,6 @@ providers:
|
||||||
config:
|
config:
|
||||||
persistence_store:
|
persistence_store:
|
||||||
type: sqlite
|
type: sqlite
|
||||||
namespace: null
|
|
||||||
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/llama_api}/agents_store.db
|
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/llama_api}/agents_store.db
|
||||||
responses_store:
|
responses_store:
|
||||||
type: sqlite
|
type: sqlite
|
||||||
|
@ -66,7 +65,6 @@ providers:
|
||||||
config:
|
config:
|
||||||
kvstore:
|
kvstore:
|
||||||
type: sqlite
|
type: sqlite
|
||||||
namespace: null
|
|
||||||
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/llama_api}/meta_reference_eval.db
|
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/llama_api}/meta_reference_eval.db
|
||||||
datasetio:
|
datasetio:
|
||||||
- provider_id: huggingface
|
- provider_id: huggingface
|
||||||
|
@ -74,14 +72,12 @@ providers:
|
||||||
config:
|
config:
|
||||||
kvstore:
|
kvstore:
|
||||||
type: sqlite
|
type: sqlite
|
||||||
namespace: null
|
|
||||||
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/llama_api}/huggingface_datasetio.db
|
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/llama_api}/huggingface_datasetio.db
|
||||||
- provider_id: localfs
|
- provider_id: localfs
|
||||||
provider_type: inline::localfs
|
provider_type: inline::localfs
|
||||||
config:
|
config:
|
||||||
kvstore:
|
kvstore:
|
||||||
type: sqlite
|
type: sqlite
|
||||||
namespace: null
|
|
||||||
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/llama_api}/localfs_datasetio.db
|
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/llama_api}/localfs_datasetio.db
|
||||||
scoring:
|
scoring:
|
||||||
- provider_id: basic
|
- provider_id: basic
|
||||||
|
|
|
@ -41,7 +41,6 @@ providers:
|
||||||
config:
|
config:
|
||||||
kvstore:
|
kvstore:
|
||||||
type: sqlite
|
type: sqlite
|
||||||
namespace: null
|
|
||||||
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/meta-reference-gpu}/faiss_store.db
|
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/meta-reference-gpu}/faiss_store.db
|
||||||
safety:
|
safety:
|
||||||
- provider_id: llama-guard
|
- provider_id: llama-guard
|
||||||
|
@ -54,7 +53,6 @@ providers:
|
||||||
config:
|
config:
|
||||||
persistence_store:
|
persistence_store:
|
||||||
type: sqlite
|
type: sqlite
|
||||||
namespace: null
|
|
||||||
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/meta-reference-gpu}/agents_store.db
|
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/meta-reference-gpu}/agents_store.db
|
||||||
responses_store:
|
responses_store:
|
||||||
type: sqlite
|
type: sqlite
|
||||||
|
@ -72,7 +70,6 @@ providers:
|
||||||
config:
|
config:
|
||||||
kvstore:
|
kvstore:
|
||||||
type: sqlite
|
type: sqlite
|
||||||
namespace: null
|
|
||||||
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/meta-reference-gpu}/meta_reference_eval.db
|
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/meta-reference-gpu}/meta_reference_eval.db
|
||||||
datasetio:
|
datasetio:
|
||||||
- provider_id: huggingface
|
- provider_id: huggingface
|
||||||
|
@ -80,14 +77,12 @@ providers:
|
||||||
config:
|
config:
|
||||||
kvstore:
|
kvstore:
|
||||||
type: sqlite
|
type: sqlite
|
||||||
namespace: null
|
|
||||||
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/meta-reference-gpu}/huggingface_datasetio.db
|
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/meta-reference-gpu}/huggingface_datasetio.db
|
||||||
- provider_id: localfs
|
- provider_id: localfs
|
||||||
provider_type: inline::localfs
|
provider_type: inline::localfs
|
||||||
config:
|
config:
|
||||||
kvstore:
|
kvstore:
|
||||||
type: sqlite
|
type: sqlite
|
||||||
namespace: null
|
|
||||||
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/meta-reference-gpu}/localfs_datasetio.db
|
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/meta-reference-gpu}/localfs_datasetio.db
|
||||||
scoring:
|
scoring:
|
||||||
- provider_id: basic
|
- provider_id: basic
|
||||||
|
|
|
@ -31,7 +31,6 @@ providers:
|
||||||
config:
|
config:
|
||||||
kvstore:
|
kvstore:
|
||||||
type: sqlite
|
type: sqlite
|
||||||
namespace: null
|
|
||||||
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/meta-reference-gpu}/faiss_store.db
|
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/meta-reference-gpu}/faiss_store.db
|
||||||
safety:
|
safety:
|
||||||
- provider_id: llama-guard
|
- provider_id: llama-guard
|
||||||
|
@ -44,7 +43,6 @@ providers:
|
||||||
config:
|
config:
|
||||||
persistence_store:
|
persistence_store:
|
||||||
type: sqlite
|
type: sqlite
|
||||||
namespace: null
|
|
||||||
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/meta-reference-gpu}/agents_store.db
|
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/meta-reference-gpu}/agents_store.db
|
||||||
responses_store:
|
responses_store:
|
||||||
type: sqlite
|
type: sqlite
|
||||||
|
@ -62,7 +60,6 @@ providers:
|
||||||
config:
|
config:
|
||||||
kvstore:
|
kvstore:
|
||||||
type: sqlite
|
type: sqlite
|
||||||
namespace: null
|
|
||||||
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/meta-reference-gpu}/meta_reference_eval.db
|
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/meta-reference-gpu}/meta_reference_eval.db
|
||||||
datasetio:
|
datasetio:
|
||||||
- provider_id: huggingface
|
- provider_id: huggingface
|
||||||
|
@ -70,14 +67,12 @@ providers:
|
||||||
config:
|
config:
|
||||||
kvstore:
|
kvstore:
|
||||||
type: sqlite
|
type: sqlite
|
||||||
namespace: null
|
|
||||||
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/meta-reference-gpu}/huggingface_datasetio.db
|
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/meta-reference-gpu}/huggingface_datasetio.db
|
||||||
- provider_id: localfs
|
- provider_id: localfs
|
||||||
provider_type: inline::localfs
|
provider_type: inline::localfs
|
||||||
config:
|
config:
|
||||||
kvstore:
|
kvstore:
|
||||||
type: sqlite
|
type: sqlite
|
||||||
namespace: null
|
|
||||||
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/meta-reference-gpu}/localfs_datasetio.db
|
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/meta-reference-gpu}/localfs_datasetio.db
|
||||||
scoring:
|
scoring:
|
||||||
- provider_id: basic
|
- provider_id: basic
|
||||||
|
|
|
@ -30,7 +30,6 @@ providers:
|
||||||
config:
|
config:
|
||||||
kvstore:
|
kvstore:
|
||||||
type: sqlite
|
type: sqlite
|
||||||
namespace: null
|
|
||||||
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/nvidia}/faiss_store.db
|
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/nvidia}/faiss_store.db
|
||||||
safety:
|
safety:
|
||||||
- provider_id: nvidia
|
- provider_id: nvidia
|
||||||
|
@ -44,7 +43,6 @@ providers:
|
||||||
config:
|
config:
|
||||||
persistence_store:
|
persistence_store:
|
||||||
type: sqlite
|
type: sqlite
|
||||||
namespace: null
|
|
||||||
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/nvidia}/agents_store.db
|
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/nvidia}/agents_store.db
|
||||||
responses_store:
|
responses_store:
|
||||||
type: sqlite
|
type: sqlite
|
||||||
|
@ -75,7 +73,6 @@ providers:
|
||||||
config:
|
config:
|
||||||
kvstore:
|
kvstore:
|
||||||
type: sqlite
|
type: sqlite
|
||||||
namespace: null
|
|
||||||
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/nvidia}/localfs_datasetio.db
|
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/nvidia}/localfs_datasetio.db
|
||||||
- provider_id: nvidia
|
- provider_id: nvidia
|
||||||
provider_type: remote::nvidia
|
provider_type: remote::nvidia
|
||||||
|
|
|
@ -25,7 +25,6 @@ providers:
|
||||||
config:
|
config:
|
||||||
kvstore:
|
kvstore:
|
||||||
type: sqlite
|
type: sqlite
|
||||||
namespace: null
|
|
||||||
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/nvidia}/faiss_store.db
|
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/nvidia}/faiss_store.db
|
||||||
safety:
|
safety:
|
||||||
- provider_id: nvidia
|
- provider_id: nvidia
|
||||||
|
@ -39,7 +38,6 @@ providers:
|
||||||
config:
|
config:
|
||||||
persistence_store:
|
persistence_store:
|
||||||
type: sqlite
|
type: sqlite
|
||||||
namespace: null
|
|
||||||
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/nvidia}/agents_store.db
|
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/nvidia}/agents_store.db
|
||||||
responses_store:
|
responses_store:
|
||||||
type: sqlite
|
type: sqlite
|
||||||
|
|
|
@ -25,7 +25,6 @@ providers:
|
||||||
config:
|
config:
|
||||||
kvstore:
|
kvstore:
|
||||||
type: sqlite
|
type: sqlite
|
||||||
namespace: null
|
|
||||||
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ollama}/faiss_store.db
|
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ollama}/faiss_store.db
|
||||||
safety:
|
safety:
|
||||||
- provider_id: llama-guard
|
- provider_id: llama-guard
|
||||||
|
@ -40,7 +39,6 @@ providers:
|
||||||
config:
|
config:
|
||||||
persistence_store:
|
persistence_store:
|
||||||
type: sqlite
|
type: sqlite
|
||||||
namespace: null
|
|
||||||
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ollama}/agents_store.db
|
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ollama}/agents_store.db
|
||||||
responses_store:
|
responses_store:
|
||||||
type: sqlite
|
type: sqlite
|
||||||
|
@ -58,7 +56,6 @@ providers:
|
||||||
config:
|
config:
|
||||||
kvstore:
|
kvstore:
|
||||||
type: sqlite
|
type: sqlite
|
||||||
namespace: null
|
|
||||||
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ollama}/meta_reference_eval.db
|
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ollama}/meta_reference_eval.db
|
||||||
datasetio:
|
datasetio:
|
||||||
- provider_id: huggingface
|
- provider_id: huggingface
|
||||||
|
@ -66,14 +63,12 @@ providers:
|
||||||
config:
|
config:
|
||||||
kvstore:
|
kvstore:
|
||||||
type: sqlite
|
type: sqlite
|
||||||
namespace: null
|
|
||||||
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ollama}/huggingface_datasetio.db
|
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ollama}/huggingface_datasetio.db
|
||||||
- provider_id: localfs
|
- provider_id: localfs
|
||||||
provider_type: inline::localfs
|
provider_type: inline::localfs
|
||||||
config:
|
config:
|
||||||
kvstore:
|
kvstore:
|
||||||
type: sqlite
|
type: sqlite
|
||||||
namespace: null
|
|
||||||
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ollama}/localfs_datasetio.db
|
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ollama}/localfs_datasetio.db
|
||||||
scoring:
|
scoring:
|
||||||
- provider_id: basic
|
- provider_id: basic
|
||||||
|
|
|
@ -25,7 +25,6 @@ providers:
|
||||||
config:
|
config:
|
||||||
kvstore:
|
kvstore:
|
||||||
type: sqlite
|
type: sqlite
|
||||||
namespace: null
|
|
||||||
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ollama}/faiss_store.db
|
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ollama}/faiss_store.db
|
||||||
safety:
|
safety:
|
||||||
- provider_id: llama-guard
|
- provider_id: llama-guard
|
||||||
|
@ -38,7 +37,6 @@ providers:
|
||||||
config:
|
config:
|
||||||
persistence_store:
|
persistence_store:
|
||||||
type: sqlite
|
type: sqlite
|
||||||
namespace: null
|
|
||||||
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ollama}/agents_store.db
|
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ollama}/agents_store.db
|
||||||
responses_store:
|
responses_store:
|
||||||
type: sqlite
|
type: sqlite
|
||||||
|
@ -56,7 +54,6 @@ providers:
|
||||||
config:
|
config:
|
||||||
kvstore:
|
kvstore:
|
||||||
type: sqlite
|
type: sqlite
|
||||||
namespace: null
|
|
||||||
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ollama}/meta_reference_eval.db
|
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ollama}/meta_reference_eval.db
|
||||||
datasetio:
|
datasetio:
|
||||||
- provider_id: huggingface
|
- provider_id: huggingface
|
||||||
|
@ -64,14 +61,12 @@ providers:
|
||||||
config:
|
config:
|
||||||
kvstore:
|
kvstore:
|
||||||
type: sqlite
|
type: sqlite
|
||||||
namespace: null
|
|
||||||
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ollama}/huggingface_datasetio.db
|
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ollama}/huggingface_datasetio.db
|
||||||
- provider_id: localfs
|
- provider_id: localfs
|
||||||
provider_type: inline::localfs
|
provider_type: inline::localfs
|
||||||
config:
|
config:
|
||||||
kvstore:
|
kvstore:
|
||||||
type: sqlite
|
type: sqlite
|
||||||
namespace: null
|
|
||||||
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ollama}/localfs_datasetio.db
|
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ollama}/localfs_datasetio.db
|
||||||
scoring:
|
scoring:
|
||||||
- provider_id: basic
|
- provider_id: basic
|
||||||
|
|
|
@ -62,7 +62,6 @@ providers:
|
||||||
config:
|
config:
|
||||||
persistence_store:
|
persistence_store:
|
||||||
type: sqlite
|
type: sqlite
|
||||||
namespace: null
|
|
||||||
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/open-benchmark}/agents_store.db
|
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/open-benchmark}/agents_store.db
|
||||||
responses_store:
|
responses_store:
|
||||||
type: sqlite
|
type: sqlite
|
||||||
|
@ -80,7 +79,6 @@ providers:
|
||||||
config:
|
config:
|
||||||
kvstore:
|
kvstore:
|
||||||
type: sqlite
|
type: sqlite
|
||||||
namespace: null
|
|
||||||
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/open-benchmark}/meta_reference_eval.db
|
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/open-benchmark}/meta_reference_eval.db
|
||||||
datasetio:
|
datasetio:
|
||||||
- provider_id: huggingface
|
- provider_id: huggingface
|
||||||
|
@ -88,14 +86,12 @@ providers:
|
||||||
config:
|
config:
|
||||||
kvstore:
|
kvstore:
|
||||||
type: sqlite
|
type: sqlite
|
||||||
namespace: null
|
|
||||||
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/open-benchmark}/huggingface_datasetio.db
|
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/open-benchmark}/huggingface_datasetio.db
|
||||||
- provider_id: localfs
|
- provider_id: localfs
|
||||||
provider_type: inline::localfs
|
provider_type: inline::localfs
|
||||||
config:
|
config:
|
||||||
kvstore:
|
kvstore:
|
||||||
type: sqlite
|
type: sqlite
|
||||||
namespace: null
|
|
||||||
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/open-benchmark}/localfs_datasetio.db
|
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/open-benchmark}/localfs_datasetio.db
|
||||||
scoring:
|
scoring:
|
||||||
- provider_id: basic
|
- provider_id: basic
|
||||||
|
|
|
@ -26,7 +26,6 @@ providers:
|
||||||
config:
|
config:
|
||||||
kvstore:
|
kvstore:
|
||||||
type: sqlite
|
type: sqlite
|
||||||
namespace: null
|
|
||||||
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/passthrough}/faiss_store.db
|
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/passthrough}/faiss_store.db
|
||||||
safety:
|
safety:
|
||||||
- provider_id: llama-guard
|
- provider_id: llama-guard
|
||||||
|
@ -44,7 +43,6 @@ providers:
|
||||||
config:
|
config:
|
||||||
persistence_store:
|
persistence_store:
|
||||||
type: sqlite
|
type: sqlite
|
||||||
namespace: null
|
|
||||||
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/passthrough}/agents_store.db
|
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/passthrough}/agents_store.db
|
||||||
responses_store:
|
responses_store:
|
||||||
type: sqlite
|
type: sqlite
|
||||||
|
@ -62,7 +60,6 @@ providers:
|
||||||
config:
|
config:
|
||||||
kvstore:
|
kvstore:
|
||||||
type: sqlite
|
type: sqlite
|
||||||
namespace: null
|
|
||||||
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/passthrough}/meta_reference_eval.db
|
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/passthrough}/meta_reference_eval.db
|
||||||
datasetio:
|
datasetio:
|
||||||
- provider_id: huggingface
|
- provider_id: huggingface
|
||||||
|
@ -70,14 +67,12 @@ providers:
|
||||||
config:
|
config:
|
||||||
kvstore:
|
kvstore:
|
||||||
type: sqlite
|
type: sqlite
|
||||||
namespace: null
|
|
||||||
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/passthrough}/huggingface_datasetio.db
|
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/passthrough}/huggingface_datasetio.db
|
||||||
- provider_id: localfs
|
- provider_id: localfs
|
||||||
provider_type: inline::localfs
|
provider_type: inline::localfs
|
||||||
config:
|
config:
|
||||||
kvstore:
|
kvstore:
|
||||||
type: sqlite
|
type: sqlite
|
||||||
namespace: null
|
|
||||||
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/passthrough}/localfs_datasetio.db
|
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/passthrough}/localfs_datasetio.db
|
||||||
scoring:
|
scoring:
|
||||||
- provider_id: basic
|
- provider_id: basic
|
||||||
|
|
|
@ -26,7 +26,6 @@ providers:
|
||||||
config:
|
config:
|
||||||
kvstore:
|
kvstore:
|
||||||
type: sqlite
|
type: sqlite
|
||||||
namespace: null
|
|
||||||
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/passthrough}/faiss_store.db
|
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/passthrough}/faiss_store.db
|
||||||
safety:
|
safety:
|
||||||
- provider_id: llama-guard
|
- provider_id: llama-guard
|
||||||
|
@ -39,7 +38,6 @@ providers:
|
||||||
config:
|
config:
|
||||||
persistence_store:
|
persistence_store:
|
||||||
type: sqlite
|
type: sqlite
|
||||||
namespace: null
|
|
||||||
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/passthrough}/agents_store.db
|
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/passthrough}/agents_store.db
|
||||||
responses_store:
|
responses_store:
|
||||||
type: sqlite
|
type: sqlite
|
||||||
|
@ -57,7 +55,6 @@ providers:
|
||||||
config:
|
config:
|
||||||
kvstore:
|
kvstore:
|
||||||
type: sqlite
|
type: sqlite
|
||||||
namespace: null
|
|
||||||
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/passthrough}/meta_reference_eval.db
|
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/passthrough}/meta_reference_eval.db
|
||||||
datasetio:
|
datasetio:
|
||||||
- provider_id: huggingface
|
- provider_id: huggingface
|
||||||
|
@ -65,14 +62,12 @@ providers:
|
||||||
config:
|
config:
|
||||||
kvstore:
|
kvstore:
|
||||||
type: sqlite
|
type: sqlite
|
||||||
namespace: null
|
|
||||||
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/passthrough}/huggingface_datasetio.db
|
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/passthrough}/huggingface_datasetio.db
|
||||||
- provider_id: localfs
|
- provider_id: localfs
|
||||||
provider_type: inline::localfs
|
provider_type: inline::localfs
|
||||||
config:
|
config:
|
||||||
kvstore:
|
kvstore:
|
||||||
type: sqlite
|
type: sqlite
|
||||||
namespace: null
|
|
||||||
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/passthrough}/localfs_datasetio.db
|
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/passthrough}/localfs_datasetio.db
|
||||||
scoring:
|
scoring:
|
||||||
- provider_id: basic
|
- provider_id: basic
|
||||||
|
|
|
@ -114,7 +114,7 @@ def get_distribution_template() -> DistributionTemplate:
|
||||||
provider_id="meta-reference",
|
provider_id="meta-reference",
|
||||||
provider_type="inline::meta-reference",
|
provider_type="inline::meta-reference",
|
||||||
config=dict(
|
config=dict(
|
||||||
service_name="${env.OTEL_SERVICE_NAME:=}",
|
service_name="${env.OTEL_SERVICE_NAME:=\u200b}",
|
||||||
sinks="${env.TELEMETRY_SINKS:=console,otel_trace}",
|
sinks="${env.TELEMETRY_SINKS:=console,otel_trace}",
|
||||||
otel_trace_endpoint="${env.OTEL_TRACE_ENDPOINT:=http://localhost:4318/v1/traces}",
|
otel_trace_endpoint="${env.OTEL_TRACE_ENDPOINT:=http://localhost:4318/v1/traces}",
|
||||||
),
|
),
|
||||||
|
|
|
@ -51,7 +51,7 @@ providers:
|
||||||
- provider_id: meta-reference
|
- provider_id: meta-reference
|
||||||
provider_type: inline::meta-reference
|
provider_type: inline::meta-reference
|
||||||
config:
|
config:
|
||||||
service_name: ${env.OTEL_SERVICE_NAME:=}
|
service_name: "${env.OTEL_SERVICE_NAME:=\u200B}"
|
||||||
sinks: ${env.TELEMETRY_SINKS:=console,otel_trace}
|
sinks: ${env.TELEMETRY_SINKS:=console,otel_trace}
|
||||||
otel_trace_endpoint: ${env.OTEL_TRACE_ENDPOINT:=http://localhost:4318/v1/traces}
|
otel_trace_endpoint: ${env.OTEL_TRACE_ENDPOINT:=http://localhost:4318/v1/traces}
|
||||||
tool_runtime:
|
tool_runtime:
|
||||||
|
|
|
@ -35,7 +35,6 @@ providers:
|
||||||
config:
|
config:
|
||||||
kvstore:
|
kvstore:
|
||||||
type: sqlite
|
type: sqlite
|
||||||
namespace: null
|
|
||||||
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/remote-vllm}/faiss_store.db
|
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/remote-vllm}/faiss_store.db
|
||||||
safety:
|
safety:
|
||||||
- provider_id: llama-guard
|
- provider_id: llama-guard
|
||||||
|
@ -48,7 +47,6 @@ providers:
|
||||||
config:
|
config:
|
||||||
persistence_store:
|
persistence_store:
|
||||||
type: sqlite
|
type: sqlite
|
||||||
namespace: null
|
|
||||||
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/remote-vllm}/agents_store.db
|
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/remote-vllm}/agents_store.db
|
||||||
responses_store:
|
responses_store:
|
||||||
type: sqlite
|
type: sqlite
|
||||||
|
@ -59,7 +57,6 @@ providers:
|
||||||
config:
|
config:
|
||||||
kvstore:
|
kvstore:
|
||||||
type: sqlite
|
type: sqlite
|
||||||
namespace: null
|
|
||||||
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/remote-vllm}/meta_reference_eval.db
|
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/remote-vllm}/meta_reference_eval.db
|
||||||
datasetio:
|
datasetio:
|
||||||
- provider_id: huggingface
|
- provider_id: huggingface
|
||||||
|
@ -67,14 +64,12 @@ providers:
|
||||||
config:
|
config:
|
||||||
kvstore:
|
kvstore:
|
||||||
type: sqlite
|
type: sqlite
|
||||||
namespace: null
|
|
||||||
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/remote-vllm}/huggingface_datasetio.db
|
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/remote-vllm}/huggingface_datasetio.db
|
||||||
- provider_id: localfs
|
- provider_id: localfs
|
||||||
provider_type: inline::localfs
|
provider_type: inline::localfs
|
||||||
config:
|
config:
|
||||||
kvstore:
|
kvstore:
|
||||||
type: sqlite
|
type: sqlite
|
||||||
namespace: null
|
|
||||||
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/remote-vllm}/localfs_datasetio.db
|
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/remote-vllm}/localfs_datasetio.db
|
||||||
scoring:
|
scoring:
|
||||||
- provider_id: basic
|
- provider_id: basic
|
||||||
|
|
|
@ -28,7 +28,6 @@ providers:
|
||||||
config:
|
config:
|
||||||
kvstore:
|
kvstore:
|
||||||
type: sqlite
|
type: sqlite
|
||||||
namespace: null
|
|
||||||
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/remote-vllm}/faiss_store.db
|
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/remote-vllm}/faiss_store.db
|
||||||
safety:
|
safety:
|
||||||
- provider_id: llama-guard
|
- provider_id: llama-guard
|
||||||
|
@ -41,7 +40,6 @@ providers:
|
||||||
config:
|
config:
|
||||||
persistence_store:
|
persistence_store:
|
||||||
type: sqlite
|
type: sqlite
|
||||||
namespace: null
|
|
||||||
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/remote-vllm}/agents_store.db
|
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/remote-vllm}/agents_store.db
|
||||||
responses_store:
|
responses_store:
|
||||||
type: sqlite
|
type: sqlite
|
||||||
|
@ -52,7 +50,6 @@ providers:
|
||||||
config:
|
config:
|
||||||
kvstore:
|
kvstore:
|
||||||
type: sqlite
|
type: sqlite
|
||||||
namespace: null
|
|
||||||
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/remote-vllm}/meta_reference_eval.db
|
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/remote-vllm}/meta_reference_eval.db
|
||||||
datasetio:
|
datasetio:
|
||||||
- provider_id: huggingface
|
- provider_id: huggingface
|
||||||
|
@ -60,14 +57,12 @@ providers:
|
||||||
config:
|
config:
|
||||||
kvstore:
|
kvstore:
|
||||||
type: sqlite
|
type: sqlite
|
||||||
namespace: null
|
|
||||||
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/remote-vllm}/huggingface_datasetio.db
|
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/remote-vllm}/huggingface_datasetio.db
|
||||||
- provider_id: localfs
|
- provider_id: localfs
|
||||||
provider_type: inline::localfs
|
provider_type: inline::localfs
|
||||||
config:
|
config:
|
||||||
kvstore:
|
kvstore:
|
||||||
type: sqlite
|
type: sqlite
|
||||||
namespace: null
|
|
||||||
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/remote-vllm}/localfs_datasetio.db
|
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/remote-vllm}/localfs_datasetio.db
|
||||||
scoring:
|
scoring:
|
||||||
- provider_id: basic
|
- provider_id: basic
|
||||||
|
|
|
@ -23,7 +23,6 @@ providers:
|
||||||
config:
|
config:
|
||||||
kvstore:
|
kvstore:
|
||||||
type: sqlite
|
type: sqlite
|
||||||
namespace: null
|
|
||||||
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/sambanova}/faiss_store.db
|
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/sambanova}/faiss_store.db
|
||||||
- provider_id: ${env.ENABLE_CHROMADB:+chromadb}
|
- provider_id: ${env.ENABLE_CHROMADB:+chromadb}
|
||||||
provider_type: remote::chromadb
|
provider_type: remote::chromadb
|
||||||
|
@ -49,7 +48,6 @@ providers:
|
||||||
config:
|
config:
|
||||||
persistence_store:
|
persistence_store:
|
||||||
type: sqlite
|
type: sqlite
|
||||||
namespace: null
|
|
||||||
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/sambanova}/agents_store.db
|
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/sambanova}/agents_store.db
|
||||||
responses_store:
|
responses_store:
|
||||||
type: sqlite
|
type: sqlite
|
||||||
|
|
|
@ -66,7 +66,6 @@ providers:
|
||||||
config:
|
config:
|
||||||
kvstore:
|
kvstore:
|
||||||
type: sqlite
|
type: sqlite
|
||||||
namespace: null
|
|
||||||
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/faiss_store.db
|
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/faiss_store.db
|
||||||
- provider_id: ${env.ENABLE_SQLITE_VEC:+sqlite-vec}
|
- provider_id: ${env.ENABLE_SQLITE_VEC:+sqlite-vec}
|
||||||
provider_type: inline::sqlite-vec
|
provider_type: inline::sqlite-vec
|
||||||
|
@ -78,7 +77,6 @@ providers:
|
||||||
db_path: ${env.MILVUS_DB_PATH:=~/.llama/distributions/starter}/milvus.db
|
db_path: ${env.MILVUS_DB_PATH:=~/.llama/distributions/starter}/milvus.db
|
||||||
kvstore:
|
kvstore:
|
||||||
type: sqlite
|
type: sqlite
|
||||||
namespace: null
|
|
||||||
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/milvus_registry.db
|
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/milvus_registry.db
|
||||||
- provider_id: ${env.ENABLE_CHROMADB:+chromadb}
|
- provider_id: ${env.ENABLE_CHROMADB:+chromadb}
|
||||||
provider_type: remote::chromadb
|
provider_type: remote::chromadb
|
||||||
|
@ -111,7 +109,6 @@ providers:
|
||||||
config:
|
config:
|
||||||
persistence_store:
|
persistence_store:
|
||||||
type: sqlite
|
type: sqlite
|
||||||
namespace: null
|
|
||||||
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/agents_store.db
|
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/agents_store.db
|
||||||
responses_store:
|
responses_store:
|
||||||
type: sqlite
|
type: sqlite
|
||||||
|
@ -129,7 +126,6 @@ providers:
|
||||||
config:
|
config:
|
||||||
kvstore:
|
kvstore:
|
||||||
type: sqlite
|
type: sqlite
|
||||||
namespace: null
|
|
||||||
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/meta_reference_eval.db
|
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/meta_reference_eval.db
|
||||||
datasetio:
|
datasetio:
|
||||||
- provider_id: huggingface
|
- provider_id: huggingface
|
||||||
|
@ -137,14 +133,12 @@ providers:
|
||||||
config:
|
config:
|
||||||
kvstore:
|
kvstore:
|
||||||
type: sqlite
|
type: sqlite
|
||||||
namespace: null
|
|
||||||
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/huggingface_datasetio.db
|
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/huggingface_datasetio.db
|
||||||
- provider_id: localfs
|
- provider_id: localfs
|
||||||
provider_type: inline::localfs
|
provider_type: inline::localfs
|
||||||
config:
|
config:
|
||||||
kvstore:
|
kvstore:
|
||||||
type: sqlite
|
type: sqlite
|
||||||
namespace: null
|
|
||||||
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/localfs_datasetio.db
|
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/localfs_datasetio.db
|
||||||
scoring:
|
scoring:
|
||||||
- provider_id: basic
|
- provider_id: basic
|
||||||
|
|
|
@ -234,7 +234,6 @@ def get_distribution_template() -> DistributionTemplate:
|
||||||
|
|
||||||
default_models = get_model_registry(available_models)
|
default_models = get_model_registry(available_models)
|
||||||
|
|
||||||
postgres_store = PostgresSqlStoreConfig.sample_run_config()
|
|
||||||
return DistributionTemplate(
|
return DistributionTemplate(
|
||||||
name=name,
|
name=name,
|
||||||
distro_type="self_hosted",
|
distro_type="self_hosted",
|
||||||
|
@ -243,7 +242,7 @@ def get_distribution_template() -> DistributionTemplate:
|
||||||
template_path=None,
|
template_path=None,
|
||||||
providers=providers,
|
providers=providers,
|
||||||
available_models_by_provider=available_models,
|
available_models_by_provider=available_models,
|
||||||
additional_pip_packages=postgres_store.pip_packages,
|
additional_pip_packages=PostgresSqlStoreConfig.pip_packages(),
|
||||||
run_configs={
|
run_configs={
|
||||||
"run.yaml": RunConfigSettings(
|
"run.yaml": RunConfigSettings(
|
||||||
provider_overrides={
|
provider_overrides={
|
||||||
|
|
|
@ -15,6 +15,7 @@ from pydantic import BaseModel, Field
|
||||||
from llama_stack.apis.datasets import DatasetPurpose
|
from llama_stack.apis.datasets import DatasetPurpose
|
||||||
from llama_stack.apis.models import ModelType
|
from llama_stack.apis.models import ModelType
|
||||||
from llama_stack.distribution.datatypes import (
|
from llama_stack.distribution.datatypes import (
|
||||||
|
LLAMA_STACK_RUN_CONFIG_VERSION,
|
||||||
Api,
|
Api,
|
||||||
BenchmarkInput,
|
BenchmarkInput,
|
||||||
BuildConfig,
|
BuildConfig,
|
||||||
|
@ -23,14 +24,15 @@ from llama_stack.distribution.datatypes import (
|
||||||
ModelInput,
|
ModelInput,
|
||||||
Provider,
|
Provider,
|
||||||
ShieldInput,
|
ShieldInput,
|
||||||
StackRunConfig,
|
|
||||||
ToolGroupInput,
|
ToolGroupInput,
|
||||||
)
|
)
|
||||||
from llama_stack.distribution.distribution import get_provider_registry
|
from llama_stack.distribution.distribution import get_provider_registry
|
||||||
from llama_stack.distribution.utils.dynamic import instantiate_class_type
|
from llama_stack.distribution.utils.dynamic import instantiate_class_type
|
||||||
from llama_stack.providers.utils.inference.model_registry import ProviderModelEntry
|
from llama_stack.providers.utils.inference.model_registry import ProviderModelEntry
|
||||||
from llama_stack.providers.utils.kvstore.config import KVStoreConfig, SqliteKVStoreConfig
|
from llama_stack.providers.utils.kvstore.config import SqliteKVStoreConfig
|
||||||
from llama_stack.providers.utils.sqlstore.sqlstore import SqliteSqlStoreConfig, SqlStoreConfig
|
from llama_stack.providers.utils.kvstore.config import get_pip_packages as get_kv_pip_packages
|
||||||
|
from llama_stack.providers.utils.sqlstore.sqlstore import SqliteSqlStoreConfig
|
||||||
|
from llama_stack.providers.utils.sqlstore.sqlstore import get_pip_packages as get_sql_pip_packages
|
||||||
|
|
||||||
|
|
||||||
def get_model_registry(
|
def get_model_registry(
|
||||||
|
@ -87,21 +89,24 @@ class RunConfigSettings(BaseModel):
|
||||||
default_tool_groups: list[ToolGroupInput] | None = None
|
default_tool_groups: list[ToolGroupInput] | None = None
|
||||||
default_datasets: list[DatasetInput] | None = None
|
default_datasets: list[DatasetInput] | None = None
|
||||||
default_benchmarks: list[BenchmarkInput] | None = None
|
default_benchmarks: list[BenchmarkInput] | None = None
|
||||||
metadata_store: KVStoreConfig | None = None
|
metadata_store: dict | None = None
|
||||||
inference_store: SqlStoreConfig | None = None
|
inference_store: dict | None = None
|
||||||
|
|
||||||
def run_config(
|
def run_config(
|
||||||
self,
|
self,
|
||||||
name: str,
|
name: str,
|
||||||
providers: dict[str, list[str]],
|
providers: dict[str, list[str]],
|
||||||
container_image: str | None = None,
|
container_image: str | None = None,
|
||||||
) -> StackRunConfig:
|
) -> dict:
|
||||||
provider_registry = get_provider_registry()
|
provider_registry = get_provider_registry()
|
||||||
|
|
||||||
provider_configs = {}
|
provider_configs = {}
|
||||||
for api_str, provider_types in providers.items():
|
for api_str, provider_types in providers.items():
|
||||||
if api_providers := self.provider_overrides.get(api_str):
|
if api_providers := self.provider_overrides.get(api_str):
|
||||||
provider_configs[api_str] = api_providers
|
# Convert Provider objects to dicts for YAML serialization
|
||||||
|
provider_configs[api_str] = [
|
||||||
|
p.model_dump(exclude_none=True) if isinstance(p, Provider) else p for p in api_providers
|
||||||
|
]
|
||||||
continue
|
continue
|
||||||
|
|
||||||
provider_configs[api_str] = []
|
provider_configs[api_str] = []
|
||||||
|
@ -128,33 +133,40 @@ class RunConfigSettings(BaseModel):
|
||||||
provider_id=provider_id,
|
provider_id=provider_id,
|
||||||
provider_type=provider_type,
|
provider_type=provider_type,
|
||||||
config=config,
|
config=config,
|
||||||
)
|
).model_dump(exclude_none=True)
|
||||||
)
|
)
|
||||||
|
|
||||||
# Get unique set of APIs from providers
|
# Get unique set of APIs from providers
|
||||||
apis = sorted(providers.keys())
|
apis = sorted(providers.keys())
|
||||||
|
|
||||||
return StackRunConfig(
|
# Return a dict that matches StackRunConfig structure
|
||||||
image_name=name,
|
return {
|
||||||
container_image=container_image,
|
"version": LLAMA_STACK_RUN_CONFIG_VERSION,
|
||||||
apis=apis,
|
"image_name": name,
|
||||||
providers=provider_configs,
|
"container_image": container_image,
|
||||||
metadata_store=self.metadata_store
|
"apis": apis,
|
||||||
|
"providers": provider_configs,
|
||||||
|
"metadata_store": self.metadata_store
|
||||||
or SqliteKVStoreConfig.sample_run_config(
|
or SqliteKVStoreConfig.sample_run_config(
|
||||||
__distro_dir__=f"~/.llama/distributions/{name}",
|
__distro_dir__=f"~/.llama/distributions/{name}",
|
||||||
db_name="registry.db",
|
db_name="registry.db",
|
||||||
),
|
),
|
||||||
inference_store=self.inference_store
|
"inference_store": self.inference_store
|
||||||
or SqliteSqlStoreConfig.sample_run_config(
|
or SqliteSqlStoreConfig.sample_run_config(
|
||||||
__distro_dir__=f"~/.llama/distributions/{name}",
|
__distro_dir__=f"~/.llama/distributions/{name}",
|
||||||
db_name="inference_store.db",
|
db_name="inference_store.db",
|
||||||
),
|
),
|
||||||
models=self.default_models or [],
|
"models": [m.model_dump(exclude_none=True) for m in (self.default_models or [])],
|
||||||
shields=self.default_shields or [],
|
"shields": [s.model_dump(exclude_none=True) for s in (self.default_shields or [])],
|
||||||
tool_groups=self.default_tool_groups or [],
|
"vector_dbs": [],
|
||||||
datasets=self.default_datasets or [],
|
"datasets": [d.model_dump(exclude_none=True) for d in (self.default_datasets or [])],
|
||||||
benchmarks=self.default_benchmarks or [],
|
"scoring_fns": [],
|
||||||
)
|
"benchmarks": [b.model_dump(exclude_none=True) for b in (self.default_benchmarks or [])],
|
||||||
|
"tool_groups": [t.model_dump(exclude_none=True) for t in (self.default_tool_groups or [])],
|
||||||
|
"server": {
|
||||||
|
"port": 8321,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
class DistributionTemplate(BaseModel):
|
class DistributionTemplate(BaseModel):
|
||||||
|
@ -190,10 +202,12 @@ class DistributionTemplate(BaseModel):
|
||||||
# TODO: This is a hack to get the dependencies for internal APIs into build
|
# TODO: This is a hack to get the dependencies for internal APIs into build
|
||||||
# We should have a better way to do this by formalizing the concept of "internal" APIs
|
# We should have a better way to do this by formalizing the concept of "internal" APIs
|
||||||
# and providers, with a way to specify dependencies for them.
|
# and providers, with a way to specify dependencies for them.
|
||||||
if run_config_.inference_store:
|
|
||||||
additional_pip_packages.extend(run_config_.inference_store.pip_packages)
|
if run_config_.get("inference_store"):
|
||||||
if run_config_.metadata_store:
|
additional_pip_packages.extend(get_sql_pip_packages(run_config_["inference_store"]))
|
||||||
additional_pip_packages.extend(run_config_.metadata_store.pip_packages)
|
|
||||||
|
if run_config_.get("metadata_store"):
|
||||||
|
additional_pip_packages.extend(get_kv_pip_packages(run_config_["metadata_store"]))
|
||||||
|
|
||||||
if self.additional_pip_packages:
|
if self.additional_pip_packages:
|
||||||
additional_pip_packages.extend(self.additional_pip_packages)
|
additional_pip_packages.extend(self.additional_pip_packages)
|
||||||
|
@ -286,7 +300,7 @@ class DistributionTemplate(BaseModel):
|
||||||
run_config = settings.run_config(self.name, self.providers, self.container_image)
|
run_config = settings.run_config(self.name, self.providers, self.container_image)
|
||||||
with open(yaml_output_dir / yaml_pth, "w") as f:
|
with open(yaml_output_dir / yaml_pth, "w") as f:
|
||||||
yaml.safe_dump(
|
yaml.safe_dump(
|
||||||
run_config.model_dump(exclude_none=True),
|
{k: v for k, v in run_config.items() if v is not None},
|
||||||
f,
|
f,
|
||||||
sort_keys=False,
|
sort_keys=False,
|
||||||
)
|
)
|
||||||
|
|
|
@ -26,7 +26,6 @@ providers:
|
||||||
config:
|
config:
|
||||||
kvstore:
|
kvstore:
|
||||||
type: sqlite
|
type: sqlite
|
||||||
namespace: null
|
|
||||||
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/tgi}/faiss_store.db
|
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/tgi}/faiss_store.db
|
||||||
safety:
|
safety:
|
||||||
- provider_id: llama-guard
|
- provider_id: llama-guard
|
||||||
|
@ -39,7 +38,6 @@ providers:
|
||||||
config:
|
config:
|
||||||
persistence_store:
|
persistence_store:
|
||||||
type: sqlite
|
type: sqlite
|
||||||
namespace: null
|
|
||||||
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/tgi}/agents_store.db
|
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/tgi}/agents_store.db
|
||||||
responses_store:
|
responses_store:
|
||||||
type: sqlite
|
type: sqlite
|
||||||
|
@ -57,7 +55,6 @@ providers:
|
||||||
config:
|
config:
|
||||||
kvstore:
|
kvstore:
|
||||||
type: sqlite
|
type: sqlite
|
||||||
namespace: null
|
|
||||||
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/tgi}/meta_reference_eval.db
|
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/tgi}/meta_reference_eval.db
|
||||||
datasetio:
|
datasetio:
|
||||||
- provider_id: huggingface
|
- provider_id: huggingface
|
||||||
|
@ -65,14 +62,12 @@ providers:
|
||||||
config:
|
config:
|
||||||
kvstore:
|
kvstore:
|
||||||
type: sqlite
|
type: sqlite
|
||||||
namespace: null
|
|
||||||
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/tgi}/huggingface_datasetio.db
|
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/tgi}/huggingface_datasetio.db
|
||||||
- provider_id: localfs
|
- provider_id: localfs
|
||||||
provider_type: inline::localfs
|
provider_type: inline::localfs
|
||||||
config:
|
config:
|
||||||
kvstore:
|
kvstore:
|
||||||
type: sqlite
|
type: sqlite
|
||||||
namespace: null
|
|
||||||
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/tgi}/localfs_datasetio.db
|
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/tgi}/localfs_datasetio.db
|
||||||
scoring:
|
scoring:
|
||||||
- provider_id: basic
|
- provider_id: basic
|
||||||
|
|
|
@ -25,7 +25,6 @@ providers:
|
||||||
config:
|
config:
|
||||||
kvstore:
|
kvstore:
|
||||||
type: sqlite
|
type: sqlite
|
||||||
namespace: null
|
|
||||||
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/tgi}/faiss_store.db
|
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/tgi}/faiss_store.db
|
||||||
safety:
|
safety:
|
||||||
- provider_id: llama-guard
|
- provider_id: llama-guard
|
||||||
|
@ -38,7 +37,6 @@ providers:
|
||||||
config:
|
config:
|
||||||
persistence_store:
|
persistence_store:
|
||||||
type: sqlite
|
type: sqlite
|
||||||
namespace: null
|
|
||||||
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/tgi}/agents_store.db
|
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/tgi}/agents_store.db
|
||||||
responses_store:
|
responses_store:
|
||||||
type: sqlite
|
type: sqlite
|
||||||
|
@ -56,7 +54,6 @@ providers:
|
||||||
config:
|
config:
|
||||||
kvstore:
|
kvstore:
|
||||||
type: sqlite
|
type: sqlite
|
||||||
namespace: null
|
|
||||||
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/tgi}/meta_reference_eval.db
|
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/tgi}/meta_reference_eval.db
|
||||||
datasetio:
|
datasetio:
|
||||||
- provider_id: huggingface
|
- provider_id: huggingface
|
||||||
|
@ -64,14 +61,12 @@ providers:
|
||||||
config:
|
config:
|
||||||
kvstore:
|
kvstore:
|
||||||
type: sqlite
|
type: sqlite
|
||||||
namespace: null
|
|
||||||
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/tgi}/huggingface_datasetio.db
|
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/tgi}/huggingface_datasetio.db
|
||||||
- provider_id: localfs
|
- provider_id: localfs
|
||||||
provider_type: inline::localfs
|
provider_type: inline::localfs
|
||||||
config:
|
config:
|
||||||
kvstore:
|
kvstore:
|
||||||
type: sqlite
|
type: sqlite
|
||||||
namespace: null
|
|
||||||
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/tgi}/localfs_datasetio.db
|
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/tgi}/localfs_datasetio.db
|
||||||
scoring:
|
scoring:
|
||||||
- provider_id: basic
|
- provider_id: basic
|
||||||
|
|
|
@ -26,7 +26,6 @@ providers:
|
||||||
config:
|
config:
|
||||||
kvstore:
|
kvstore:
|
||||||
type: sqlite
|
type: sqlite
|
||||||
namespace: null
|
|
||||||
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/together}/faiss_store.db
|
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/together}/faiss_store.db
|
||||||
safety:
|
safety:
|
||||||
- provider_id: llama-guard
|
- provider_id: llama-guard
|
||||||
|
@ -44,7 +43,6 @@ providers:
|
||||||
config:
|
config:
|
||||||
persistence_store:
|
persistence_store:
|
||||||
type: sqlite
|
type: sqlite
|
||||||
namespace: null
|
|
||||||
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/together}/agents_store.db
|
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/together}/agents_store.db
|
||||||
responses_store:
|
responses_store:
|
||||||
type: sqlite
|
type: sqlite
|
||||||
|
@ -62,7 +60,6 @@ providers:
|
||||||
config:
|
config:
|
||||||
kvstore:
|
kvstore:
|
||||||
type: sqlite
|
type: sqlite
|
||||||
namespace: null
|
|
||||||
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/together}/meta_reference_eval.db
|
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/together}/meta_reference_eval.db
|
||||||
datasetio:
|
datasetio:
|
||||||
- provider_id: huggingface
|
- provider_id: huggingface
|
||||||
|
@ -70,14 +67,12 @@ providers:
|
||||||
config:
|
config:
|
||||||
kvstore:
|
kvstore:
|
||||||
type: sqlite
|
type: sqlite
|
||||||
namespace: null
|
|
||||||
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/together}/huggingface_datasetio.db
|
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/together}/huggingface_datasetio.db
|
||||||
- provider_id: localfs
|
- provider_id: localfs
|
||||||
provider_type: inline::localfs
|
provider_type: inline::localfs
|
||||||
config:
|
config:
|
||||||
kvstore:
|
kvstore:
|
||||||
type: sqlite
|
type: sqlite
|
||||||
namespace: null
|
|
||||||
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/together}/localfs_datasetio.db
|
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/together}/localfs_datasetio.db
|
||||||
scoring:
|
scoring:
|
||||||
- provider_id: basic
|
- provider_id: basic
|
||||||
|
|
|
@ -26,7 +26,6 @@ providers:
|
||||||
config:
|
config:
|
||||||
kvstore:
|
kvstore:
|
||||||
type: sqlite
|
type: sqlite
|
||||||
namespace: null
|
|
||||||
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/together}/faiss_store.db
|
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/together}/faiss_store.db
|
||||||
safety:
|
safety:
|
||||||
- provider_id: llama-guard
|
- provider_id: llama-guard
|
||||||
|
@ -39,7 +38,6 @@ providers:
|
||||||
config:
|
config:
|
||||||
persistence_store:
|
persistence_store:
|
||||||
type: sqlite
|
type: sqlite
|
||||||
namespace: null
|
|
||||||
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/together}/agents_store.db
|
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/together}/agents_store.db
|
||||||
responses_store:
|
responses_store:
|
||||||
type: sqlite
|
type: sqlite
|
||||||
|
@ -57,7 +55,6 @@ providers:
|
||||||
config:
|
config:
|
||||||
kvstore:
|
kvstore:
|
||||||
type: sqlite
|
type: sqlite
|
||||||
namespace: null
|
|
||||||
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/together}/meta_reference_eval.db
|
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/together}/meta_reference_eval.db
|
||||||
datasetio:
|
datasetio:
|
||||||
- provider_id: huggingface
|
- provider_id: huggingface
|
||||||
|
@ -65,14 +62,12 @@ providers:
|
||||||
config:
|
config:
|
||||||
kvstore:
|
kvstore:
|
||||||
type: sqlite
|
type: sqlite
|
||||||
namespace: null
|
|
||||||
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/together}/huggingface_datasetio.db
|
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/together}/huggingface_datasetio.db
|
||||||
- provider_id: localfs
|
- provider_id: localfs
|
||||||
provider_type: inline::localfs
|
provider_type: inline::localfs
|
||||||
config:
|
config:
|
||||||
kvstore:
|
kvstore:
|
||||||
type: sqlite
|
type: sqlite
|
||||||
namespace: null
|
|
||||||
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/together}/localfs_datasetio.db
|
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/together}/localfs_datasetio.db
|
||||||
scoring:
|
scoring:
|
||||||
- provider_id: basic
|
- provider_id: basic
|
||||||
|
|
|
@ -30,7 +30,6 @@ providers:
|
||||||
config:
|
config:
|
||||||
kvstore:
|
kvstore:
|
||||||
type: sqlite
|
type: sqlite
|
||||||
namespace: null
|
|
||||||
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/vllm-gpu}/faiss_store.db
|
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/vllm-gpu}/faiss_store.db
|
||||||
safety:
|
safety:
|
||||||
- provider_id: llama-guard
|
- provider_id: llama-guard
|
||||||
|
@ -43,7 +42,6 @@ providers:
|
||||||
config:
|
config:
|
||||||
persistence_store:
|
persistence_store:
|
||||||
type: sqlite
|
type: sqlite
|
||||||
namespace: null
|
|
||||||
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/vllm-gpu}/agents_store.db
|
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/vllm-gpu}/agents_store.db
|
||||||
responses_store:
|
responses_store:
|
||||||
type: sqlite
|
type: sqlite
|
||||||
|
@ -61,7 +59,6 @@ providers:
|
||||||
config:
|
config:
|
||||||
kvstore:
|
kvstore:
|
||||||
type: sqlite
|
type: sqlite
|
||||||
namespace: null
|
|
||||||
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/vllm-gpu}/meta_reference_eval.db
|
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/vllm-gpu}/meta_reference_eval.db
|
||||||
datasetio:
|
datasetio:
|
||||||
- provider_id: huggingface
|
- provider_id: huggingface
|
||||||
|
@ -69,14 +66,12 @@ providers:
|
||||||
config:
|
config:
|
||||||
kvstore:
|
kvstore:
|
||||||
type: sqlite
|
type: sqlite
|
||||||
namespace: null
|
|
||||||
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/vllm-gpu}/huggingface_datasetio.db
|
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/vllm-gpu}/huggingface_datasetio.db
|
||||||
- provider_id: localfs
|
- provider_id: localfs
|
||||||
provider_type: inline::localfs
|
provider_type: inline::localfs
|
||||||
config:
|
config:
|
||||||
kvstore:
|
kvstore:
|
||||||
type: sqlite
|
type: sqlite
|
||||||
namespace: null
|
|
||||||
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/vllm-gpu}/localfs_datasetio.db
|
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/vllm-gpu}/localfs_datasetio.db
|
||||||
scoring:
|
scoring:
|
||||||
- provider_id: basic
|
- provider_id: basic
|
||||||
|
|
|
@ -27,7 +27,6 @@ providers:
|
||||||
config:
|
config:
|
||||||
kvstore:
|
kvstore:
|
||||||
type: sqlite
|
type: sqlite
|
||||||
namespace: null
|
|
||||||
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/watsonx}/faiss_store.db
|
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/watsonx}/faiss_store.db
|
||||||
safety:
|
safety:
|
||||||
- provider_id: llama-guard
|
- provider_id: llama-guard
|
||||||
|
@ -40,7 +39,6 @@ providers:
|
||||||
config:
|
config:
|
||||||
persistence_store:
|
persistence_store:
|
||||||
type: sqlite
|
type: sqlite
|
||||||
namespace: null
|
|
||||||
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/watsonx}/agents_store.db
|
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/watsonx}/agents_store.db
|
||||||
responses_store:
|
responses_store:
|
||||||
type: sqlite
|
type: sqlite
|
||||||
|
@ -58,7 +56,6 @@ providers:
|
||||||
config:
|
config:
|
||||||
kvstore:
|
kvstore:
|
||||||
type: sqlite
|
type: sqlite
|
||||||
namespace: null
|
|
||||||
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/watsonx}/meta_reference_eval.db
|
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/watsonx}/meta_reference_eval.db
|
||||||
datasetio:
|
datasetio:
|
||||||
- provider_id: huggingface
|
- provider_id: huggingface
|
||||||
|
@ -66,14 +63,12 @@ providers:
|
||||||
config:
|
config:
|
||||||
kvstore:
|
kvstore:
|
||||||
type: sqlite
|
type: sqlite
|
||||||
namespace: null
|
|
||||||
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/watsonx}/huggingface_datasetio.db
|
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/watsonx}/huggingface_datasetio.db
|
||||||
- provider_id: localfs
|
- provider_id: localfs
|
||||||
provider_type: inline::localfs
|
provider_type: inline::localfs
|
||||||
config:
|
config:
|
||||||
kvstore:
|
kvstore:
|
||||||
type: sqlite
|
type: sqlite
|
||||||
namespace: null
|
|
||||||
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/watsonx}/localfs_datasetio.db
|
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/watsonx}/localfs_datasetio.db
|
||||||
scoring:
|
scoring:
|
||||||
- provider_id: basic
|
- provider_id: basic
|
||||||
|
|
|
@ -9,7 +9,9 @@ pytest --help
|
||||||
```
|
```
|
||||||
|
|
||||||
Here are the most important options:
|
Here are the most important options:
|
||||||
- `--stack-config`: specify the stack config to use. You have three ways to point to a stack:
|
- `--stack-config`: specify the stack config to use. You have four ways to point to a stack:
|
||||||
|
- **`server:<config>`** - automatically start a server with the given config (e.g., `server:fireworks`). This provides one-step testing by auto-starting the server if the port is available, or reusing an existing server if already running.
|
||||||
|
- **`server:<config>:<port>`** - same as above but with a custom port (e.g., `server:together:8322`)
|
||||||
- a URL which points to a Llama Stack distribution server
|
- a URL which points to a Llama Stack distribution server
|
||||||
- a template (e.g., `fireworks`, `together`) or a path to a `run.yaml` file
|
- a template (e.g., `fireworks`, `together`) or a path to a `run.yaml` file
|
||||||
- a comma-separated list of api=provider pairs, e.g. `inference=fireworks,safety=llama-guard,agents=meta-reference`. This is most useful for testing a single API surface.
|
- a comma-separated list of api=provider pairs, e.g. `inference=fireworks,safety=llama-guard,agents=meta-reference`. This is most useful for testing a single API surface.
|
||||||
|
@ -26,12 +28,39 @@ Model parameters can be influenced by the following options:
|
||||||
Each of these are comma-separated lists and can be used to generate multiple parameter combinations. Note that tests will be skipped
|
Each of these are comma-separated lists and can be used to generate multiple parameter combinations. Note that tests will be skipped
|
||||||
if no model is specified.
|
if no model is specified.
|
||||||
|
|
||||||
Experimental, under development, options:
|
|
||||||
- `--record-responses`: record new API responses instead of using cached ones
|
|
||||||
|
|
||||||
|
|
||||||
## Examples
|
## Examples
|
||||||
|
|
||||||
|
### Testing against a Server
|
||||||
|
|
||||||
|
Run all text inference tests by auto-starting a server with the `fireworks` config:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
pytest -s -v tests/integration/inference/test_text_inference.py \
|
||||||
|
--stack-config=server:fireworks \
|
||||||
|
--text-model=meta-llama/Llama-3.1-8B-Instruct
|
||||||
|
```
|
||||||
|
|
||||||
|
Run tests with auto-server startup on a custom port:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
pytest -s -v tests/integration/inference/ \
|
||||||
|
--stack-config=server:together:8322 \
|
||||||
|
--text-model=meta-llama/Llama-3.1-8B-Instruct
|
||||||
|
```
|
||||||
|
|
||||||
|
Run multiple test suites with auto-server (eliminates manual server management):
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Auto-start server and run all integration tests
|
||||||
|
export FIREWORKS_API_KEY=<your_key>
|
||||||
|
|
||||||
|
pytest -s -v tests/integration/inference/ tests/integration/safety/ tests/integration/agents/ \
|
||||||
|
--stack-config=server:fireworks \
|
||||||
|
--text-model=meta-llama/Llama-3.1-8B-Instruct
|
||||||
|
```
|
||||||
|
|
||||||
|
### Testing with Library Client
|
||||||
|
|
||||||
Run all text inference tests with the `together` distribution:
|
Run all text inference tests with the `together` distribution:
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
|
|
|
@ -6,9 +6,13 @@
|
||||||
|
|
||||||
import inspect
|
import inspect
|
||||||
import os
|
import os
|
||||||
|
import socket
|
||||||
|
import subprocess
|
||||||
import tempfile
|
import tempfile
|
||||||
|
import time
|
||||||
|
|
||||||
import pytest
|
import pytest
|
||||||
|
import requests
|
||||||
import yaml
|
import yaml
|
||||||
from llama_stack_client import LlamaStackClient
|
from llama_stack_client import LlamaStackClient
|
||||||
from openai import OpenAI
|
from openai import OpenAI
|
||||||
|
@ -17,6 +21,60 @@ from llama_stack import LlamaStackAsLibraryClient
|
||||||
from llama_stack.distribution.stack import run_config_from_adhoc_config_spec
|
from llama_stack.distribution.stack import run_config_from_adhoc_config_spec
|
||||||
from llama_stack.env import get_env_or_fail
|
from llama_stack.env import get_env_or_fail
|
||||||
|
|
||||||
|
DEFAULT_PORT = 8321
|
||||||
|
|
||||||
|
|
||||||
|
def is_port_available(port: int, host: str = "localhost") -> bool:
|
||||||
|
"""Check if a port is available for binding."""
|
||||||
|
try:
|
||||||
|
with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as sock:
|
||||||
|
sock.bind((host, port))
|
||||||
|
return True
|
||||||
|
except OSError:
|
||||||
|
return False
|
||||||
|
|
||||||
|
|
||||||
|
def start_llama_stack_server(config_name: str) -> subprocess.Popen:
|
||||||
|
"""Start a llama stack server with the given config."""
|
||||||
|
cmd = ["llama", "stack", "run", config_name]
|
||||||
|
devnull = open(os.devnull, "w")
|
||||||
|
process = subprocess.Popen(
|
||||||
|
cmd,
|
||||||
|
stdout=devnull, # redirect stdout to devnull to prevent deadlock
|
||||||
|
stderr=devnull, # redirect stderr to devnull to prevent deadlock
|
||||||
|
text=True,
|
||||||
|
env={**os.environ, "LLAMA_STACK_LOG_FILE": "server.log"},
|
||||||
|
)
|
||||||
|
return process
|
||||||
|
|
||||||
|
|
||||||
|
def wait_for_server_ready(base_url: str, timeout: int = 30, process: subprocess.Popen | None = None) -> bool:
|
||||||
|
"""Wait for the server to be ready by polling the health endpoint."""
|
||||||
|
health_url = f"{base_url}/v1/health"
|
||||||
|
start_time = time.time()
|
||||||
|
|
||||||
|
while time.time() - start_time < timeout:
|
||||||
|
if process and process.poll() is not None:
|
||||||
|
print(f"Server process terminated with return code: {process.returncode}")
|
||||||
|
return False
|
||||||
|
|
||||||
|
try:
|
||||||
|
response = requests.get(health_url, timeout=5)
|
||||||
|
if response.status_code == 200:
|
||||||
|
return True
|
||||||
|
except (requests.exceptions.ConnectionError, requests.exceptions.Timeout):
|
||||||
|
pass
|
||||||
|
|
||||||
|
# Print progress every 5 seconds
|
||||||
|
elapsed = time.time() - start_time
|
||||||
|
if int(elapsed) % 5 == 0 and elapsed > 0:
|
||||||
|
print(f"Waiting for server at {base_url}... ({elapsed:.1f}s elapsed)")
|
||||||
|
|
||||||
|
time.sleep(0.5)
|
||||||
|
|
||||||
|
print(f"Server failed to respond within {timeout} seconds")
|
||||||
|
return False
|
||||||
|
|
||||||
|
|
||||||
@pytest.fixture(scope="session")
|
@pytest.fixture(scope="session")
|
||||||
def provider_data():
|
def provider_data():
|
||||||
|
@ -122,6 +180,41 @@ def llama_stack_client(request, provider_data):
|
||||||
if not config:
|
if not config:
|
||||||
raise ValueError("You must specify either --stack-config or LLAMA_STACK_CONFIG")
|
raise ValueError("You must specify either --stack-config or LLAMA_STACK_CONFIG")
|
||||||
|
|
||||||
|
# Handle server:<config_name> format or server:<config_name>:<port>
|
||||||
|
if config.startswith("server:"):
|
||||||
|
parts = config.split(":")
|
||||||
|
config_name = parts[1]
|
||||||
|
port = int(parts[2]) if len(parts) > 2 else int(os.environ.get("LLAMA_STACK_PORT", DEFAULT_PORT))
|
||||||
|
base_url = f"http://localhost:{port}"
|
||||||
|
|
||||||
|
# Check if port is available
|
||||||
|
if is_port_available(port):
|
||||||
|
print(f"Starting llama stack server with config '{config_name}' on port {port}...")
|
||||||
|
|
||||||
|
# Start server
|
||||||
|
server_process = start_llama_stack_server(config_name)
|
||||||
|
|
||||||
|
# Wait for server to be ready
|
||||||
|
if not wait_for_server_ready(base_url, timeout=30, process=server_process):
|
||||||
|
print("Server failed to start within timeout")
|
||||||
|
server_process.terminate()
|
||||||
|
raise RuntimeError(
|
||||||
|
f"Server failed to start within timeout. Check that config '{config_name}' exists and is valid. "
|
||||||
|
f"See server.log for details."
|
||||||
|
)
|
||||||
|
|
||||||
|
print(f"Server is ready at {base_url}")
|
||||||
|
|
||||||
|
# Store process for potential cleanup (pytest will handle termination at session end)
|
||||||
|
request.session._llama_stack_server_process = server_process
|
||||||
|
else:
|
||||||
|
print(f"Port {port} is already in use, assuming server is already running...")
|
||||||
|
|
||||||
|
return LlamaStackClient(
|
||||||
|
base_url=base_url,
|
||||||
|
provider_data=provider_data,
|
||||||
|
)
|
||||||
|
|
||||||
# check if this looks like a URL
|
# check if this looks like a URL
|
||||||
if config.startswith("http") or "//" in config:
|
if config.startswith("http") or "//" in config:
|
||||||
return LlamaStackClient(
|
return LlamaStackClient(
|
||||||
|
@ -151,3 +244,31 @@ def llama_stack_client(request, provider_data):
|
||||||
def openai_client(client_with_models):
|
def openai_client(client_with_models):
|
||||||
base_url = f"{client_with_models.base_url}/v1/openai/v1"
|
base_url = f"{client_with_models.base_url}/v1/openai/v1"
|
||||||
return OpenAI(base_url=base_url, api_key="fake")
|
return OpenAI(base_url=base_url, api_key="fake")
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture(scope="session", autouse=True)
|
||||||
|
def cleanup_server_process(request):
|
||||||
|
"""Cleanup server process at the end of the test session."""
|
||||||
|
yield # Run tests
|
||||||
|
|
||||||
|
if hasattr(request.session, "_llama_stack_server_process"):
|
||||||
|
server_process = request.session._llama_stack_server_process
|
||||||
|
if server_process:
|
||||||
|
if server_process.poll() is None:
|
||||||
|
print("Terminating llama stack server process...")
|
||||||
|
else:
|
||||||
|
print(f"Server process already terminated with return code: {server_process.returncode}")
|
||||||
|
return
|
||||||
|
try:
|
||||||
|
server_process.terminate()
|
||||||
|
server_process.wait(timeout=10)
|
||||||
|
print("Server process terminated gracefully")
|
||||||
|
except subprocess.TimeoutExpired:
|
||||||
|
print("Server process did not terminate gracefully, killing it")
|
||||||
|
server_process.kill()
|
||||||
|
server_process.wait()
|
||||||
|
print("Server process killed")
|
||||||
|
except Exception as e:
|
||||||
|
print(f"Error during server cleanup: {e}")
|
||||||
|
else:
|
||||||
|
print("Server process not found - won't be able to cleanup")
|
||||||
|
|
|
@ -4,14 +4,14 @@
|
||||||
# This source code is licensed under the terms described in the LICENSE file in
|
# This source code is licensed under the terms described in the LICENSE file in
|
||||||
# the root directory of this source tree.
|
# the root directory of this source tree.
|
||||||
|
|
||||||
import pytest
|
import pytest_asyncio
|
||||||
|
|
||||||
from llama_stack.distribution.store.registry import CachedDiskDistributionRegistry, DiskDistributionRegistry
|
from llama_stack.distribution.store.registry import CachedDiskDistributionRegistry, DiskDistributionRegistry
|
||||||
from llama_stack.providers.utils.kvstore.config import SqliteKVStoreConfig
|
from llama_stack.providers.utils.kvstore.config import SqliteKVStoreConfig
|
||||||
from llama_stack.providers.utils.kvstore.sqlite import SqliteKVStoreImpl
|
from llama_stack.providers.utils.kvstore.sqlite import SqliteKVStoreImpl
|
||||||
|
|
||||||
|
|
||||||
@pytest.fixture(scope="function")
|
@pytest_asyncio.fixture(scope="function")
|
||||||
async def sqlite_kvstore(tmp_path):
|
async def sqlite_kvstore(tmp_path):
|
||||||
db_path = tmp_path / "test_kv.db"
|
db_path = tmp_path / "test_kv.db"
|
||||||
kvstore_config = SqliteKVStoreConfig(db_path=db_path.as_posix())
|
kvstore_config = SqliteKVStoreConfig(db_path=db_path.as_posix())
|
||||||
|
@ -20,14 +20,14 @@ async def sqlite_kvstore(tmp_path):
|
||||||
yield kvstore
|
yield kvstore
|
||||||
|
|
||||||
|
|
||||||
@pytest.fixture(scope="function")
|
@pytest_asyncio.fixture(scope="function")
|
||||||
async def disk_dist_registry(sqlite_kvstore):
|
async def disk_dist_registry(sqlite_kvstore):
|
||||||
registry = DiskDistributionRegistry(sqlite_kvstore)
|
registry = DiskDistributionRegistry(sqlite_kvstore)
|
||||||
await registry.initialize()
|
await registry.initialize()
|
||||||
yield registry
|
yield registry
|
||||||
|
|
||||||
|
|
||||||
@pytest.fixture(scope="function")
|
@pytest_asyncio.fixture(scope="function")
|
||||||
async def cached_disk_dist_registry(sqlite_kvstore):
|
async def cached_disk_dist_registry(sqlite_kvstore):
|
||||||
registry = CachedDiskDistributionRegistry(sqlite_kvstore)
|
registry = CachedDiskDistributionRegistry(sqlite_kvstore)
|
||||||
await registry.initialize()
|
await registry.initialize()
|
||||||
|
|
|
@ -9,6 +9,7 @@ from datetime import datetime
|
||||||
from unittest.mock import patch
|
from unittest.mock import patch
|
||||||
|
|
||||||
import pytest
|
import pytest
|
||||||
|
import pytest_asyncio
|
||||||
|
|
||||||
from llama_stack.apis.agents import Turn
|
from llama_stack.apis.agents import Turn
|
||||||
from llama_stack.apis.inference import CompletionMessage, StopReason
|
from llama_stack.apis.inference import CompletionMessage, StopReason
|
||||||
|
@ -16,7 +17,7 @@ from llama_stack.distribution.datatypes import User
|
||||||
from llama_stack.providers.inline.agents.meta_reference.persistence import AgentPersistence, AgentSessionInfo
|
from llama_stack.providers.inline.agents.meta_reference.persistence import AgentPersistence, AgentSessionInfo
|
||||||
|
|
||||||
|
|
||||||
@pytest.fixture
|
@pytest_asyncio.fixture
|
||||||
async def test_setup(sqlite_kvstore):
|
async def test_setup(sqlite_kvstore):
|
||||||
agent_persistence = AgentPersistence(agent_id="test_agent", kvstore=sqlite_kvstore, policy={})
|
agent_persistence = AgentPersistence(agent_id="test_agent", kvstore=sqlite_kvstore, policy={})
|
||||||
yield agent_persistence
|
yield agent_persistence
|
||||||
|
|
|
@ -148,7 +148,7 @@ async def test_chunk_id_conflict(sqlite_vec_index, sample_chunks, embedding_dime
|
||||||
assert len(chunk_ids) == len(set(chunk_ids)), "Duplicate chunk IDs detected across batches!"
|
assert len(chunk_ids) == len(set(chunk_ids)), "Duplicate chunk IDs detected across batches!"
|
||||||
|
|
||||||
|
|
||||||
@pytest.fixture(scope="session")
|
@pytest_asyncio.fixture(scope="session")
|
||||||
async def sqlite_vec_adapter(sqlite_connection):
|
async def sqlite_vec_adapter(sqlite_connection):
|
||||||
config = type("Config", (object,), {"db_path": ":memory:"}) # Mock config with in-memory database
|
config = type("Config", (object,), {"db_path": ":memory:"}) # Mock config with in-memory database
|
||||||
adapter = SQLiteVecVectorIOAdapter(config=config, inference_api=None)
|
adapter = SQLiteVecVectorIOAdapter(config=config, inference_api=None)
|
||||||
|
|
|
@ -7,6 +7,7 @@
|
||||||
from unittest.mock import MagicMock, Mock, patch
|
from unittest.mock import MagicMock, Mock, patch
|
||||||
|
|
||||||
import pytest
|
import pytest
|
||||||
|
import pytest_asyncio
|
||||||
import yaml
|
import yaml
|
||||||
from pydantic import TypeAdapter, ValidationError
|
from pydantic import TypeAdapter, ValidationError
|
||||||
|
|
||||||
|
@ -26,7 +27,7 @@ def _return_model(model):
|
||||||
return model
|
return model
|
||||||
|
|
||||||
|
|
||||||
@pytest.fixture
|
@pytest_asyncio.fixture
|
||||||
async def test_setup(cached_disk_dist_registry):
|
async def test_setup(cached_disk_dist_registry):
|
||||||
mock_inference = Mock()
|
mock_inference = Mock()
|
||||||
mock_inference.__provider_spec__ = MagicMock()
|
mock_inference.__provider_spec__ = MagicMock()
|
||||||
|
@ -245,7 +246,7 @@ async def test_automatic_access_attributes(mock_get_authenticated_user, test_set
|
||||||
assert model.identifier == "auto-access-model"
|
assert model.identifier == "auto-access-model"
|
||||||
|
|
||||||
|
|
||||||
@pytest.fixture
|
@pytest_asyncio.fixture
|
||||||
async def test_setup_with_access_policy(cached_disk_dist_registry):
|
async def test_setup_with_access_policy(cached_disk_dist_registry):
|
||||||
mock_inference = Mock()
|
mock_inference = Mock()
|
||||||
mock_inference.__provider_spec__ = MagicMock()
|
mock_inference.__provider_spec__ = MagicMock()
|
||||||
|
|
187
tests/unit/server/test_server.py
Normal file
187
tests/unit/server/test_server.py
Normal file
|
@ -0,0 +1,187 @@
|
||||||
|
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||||
|
# All rights reserved.
|
||||||
|
#
|
||||||
|
# This source code is licensed under the terms described in the LICENSE file in
|
||||||
|
# the root directory of this source tree.
|
||||||
|
|
||||||
|
from unittest.mock import Mock
|
||||||
|
|
||||||
|
from fastapi import HTTPException
|
||||||
|
from openai import BadRequestError
|
||||||
|
from pydantic import ValidationError
|
||||||
|
|
||||||
|
from llama_stack.distribution.access_control.access_control import AccessDeniedError
|
||||||
|
from llama_stack.distribution.datatypes import AuthenticationRequiredError
|
||||||
|
from llama_stack.distribution.server.server import translate_exception
|
||||||
|
|
||||||
|
|
||||||
|
class TestTranslateException:
|
||||||
|
"""Test cases for the translate_exception function."""
|
||||||
|
|
||||||
|
def test_translate_access_denied_error(self):
|
||||||
|
"""Test that AccessDeniedError is translated to 403 HTTP status."""
|
||||||
|
exc = AccessDeniedError()
|
||||||
|
result = translate_exception(exc)
|
||||||
|
|
||||||
|
assert isinstance(result, HTTPException)
|
||||||
|
assert result.status_code == 403
|
||||||
|
assert result.detail == "Permission denied: Insufficient permissions"
|
||||||
|
|
||||||
|
def test_translate_access_denied_error_with_context(self):
|
||||||
|
"""Test that AccessDeniedError with context includes detailed information."""
|
||||||
|
from llama_stack.distribution.datatypes import User
|
||||||
|
|
||||||
|
# Create mock user and resource
|
||||||
|
user = User("test-user", {"roles": ["user"], "teams": ["dev"]})
|
||||||
|
|
||||||
|
# Create a simple mock object that implements the ProtectedResource protocol
|
||||||
|
class MockResource:
|
||||||
|
def __init__(self, type: str, identifier: str, owner=None):
|
||||||
|
self.type = type
|
||||||
|
self.identifier = identifier
|
||||||
|
self.owner = owner
|
||||||
|
|
||||||
|
resource = MockResource("vector_db", "test-db")
|
||||||
|
|
||||||
|
exc = AccessDeniedError("create", resource, user)
|
||||||
|
result = translate_exception(exc)
|
||||||
|
|
||||||
|
assert isinstance(result, HTTPException)
|
||||||
|
assert result.status_code == 403
|
||||||
|
assert "test-user" in result.detail
|
||||||
|
assert "vector_db::test-db" in result.detail
|
||||||
|
assert "create" in result.detail
|
||||||
|
assert "roles=['user']" in result.detail
|
||||||
|
assert "teams=['dev']" in result.detail
|
||||||
|
|
||||||
|
def test_translate_permission_error(self):
|
||||||
|
"""Test that PermissionError is translated to 403 HTTP status."""
|
||||||
|
exc = PermissionError("Permission denied")
|
||||||
|
result = translate_exception(exc)
|
||||||
|
|
||||||
|
assert isinstance(result, HTTPException)
|
||||||
|
assert result.status_code == 403
|
||||||
|
assert result.detail == "Permission denied: Permission denied"
|
||||||
|
|
||||||
|
def test_translate_value_error(self):
|
||||||
|
"""Test that ValueError is translated to 400 HTTP status."""
|
||||||
|
exc = ValueError("Invalid input")
|
||||||
|
result = translate_exception(exc)
|
||||||
|
|
||||||
|
assert isinstance(result, HTTPException)
|
||||||
|
assert result.status_code == 400
|
||||||
|
assert result.detail == "Invalid value: Invalid input"
|
||||||
|
|
||||||
|
def test_translate_bad_request_error(self):
|
||||||
|
"""Test that BadRequestError is translated to 400 HTTP status."""
|
||||||
|
# Create a mock response for BadRequestError
|
||||||
|
mock_response = Mock()
|
||||||
|
mock_response.status_code = 400
|
||||||
|
mock_response.headers = {}
|
||||||
|
|
||||||
|
exc = BadRequestError("Bad request", response=mock_response, body="Bad request")
|
||||||
|
result = translate_exception(exc)
|
||||||
|
|
||||||
|
assert isinstance(result, HTTPException)
|
||||||
|
assert result.status_code == 400
|
||||||
|
assert result.detail == "Bad request"
|
||||||
|
|
||||||
|
def test_translate_authentication_required_error(self):
|
||||||
|
"""Test that AuthenticationRequiredError is translated to 401 HTTP status."""
|
||||||
|
exc = AuthenticationRequiredError("Authentication required")
|
||||||
|
result = translate_exception(exc)
|
||||||
|
|
||||||
|
assert isinstance(result, HTTPException)
|
||||||
|
assert result.status_code == 401
|
||||||
|
assert result.detail == "Authentication required: Authentication required"
|
||||||
|
|
||||||
|
def test_translate_timeout_error(self):
|
||||||
|
"""Test that TimeoutError is translated to 504 HTTP status."""
|
||||||
|
exc = TimeoutError("Operation timed out")
|
||||||
|
result = translate_exception(exc)
|
||||||
|
|
||||||
|
assert isinstance(result, HTTPException)
|
||||||
|
assert result.status_code == 504
|
||||||
|
assert result.detail == "Operation timed out: Operation timed out"
|
||||||
|
|
||||||
|
def test_translate_asyncio_timeout_error(self):
|
||||||
|
"""Test that asyncio.TimeoutError is translated to 504 HTTP status."""
|
||||||
|
exc = TimeoutError()
|
||||||
|
result = translate_exception(exc)
|
||||||
|
|
||||||
|
assert isinstance(result, HTTPException)
|
||||||
|
assert result.status_code == 504
|
||||||
|
assert result.detail == "Operation timed out: "
|
||||||
|
|
||||||
|
def test_translate_not_implemented_error(self):
|
||||||
|
"""Test that NotImplementedError is translated to 501 HTTP status."""
|
||||||
|
exc = NotImplementedError("Not implemented")
|
||||||
|
result = translate_exception(exc)
|
||||||
|
|
||||||
|
assert isinstance(result, HTTPException)
|
||||||
|
assert result.status_code == 501
|
||||||
|
assert result.detail == "Not implemented: Not implemented"
|
||||||
|
|
||||||
|
def test_translate_validation_error(self):
|
||||||
|
"""Test that ValidationError is translated to 400 HTTP status with proper format."""
|
||||||
|
# Create a mock validation error using proper Pydantic error format
|
||||||
|
exc = ValidationError.from_exception_data(
|
||||||
|
"TestModel",
|
||||||
|
[
|
||||||
|
{
|
||||||
|
"loc": ("field", "nested"),
|
||||||
|
"msg": "field required",
|
||||||
|
"type": "missing",
|
||||||
|
}
|
||||||
|
],
|
||||||
|
)
|
||||||
|
|
||||||
|
result = translate_exception(exc)
|
||||||
|
|
||||||
|
assert isinstance(result, HTTPException)
|
||||||
|
assert result.status_code == 400
|
||||||
|
assert "errors" in result.detail
|
||||||
|
assert len(result.detail["errors"]) == 1
|
||||||
|
assert result.detail["errors"][0]["loc"] == ["field", "nested"]
|
||||||
|
assert result.detail["errors"][0]["msg"] == "Field required"
|
||||||
|
assert result.detail["errors"][0]["type"] == "missing"
|
||||||
|
|
||||||
|
def test_translate_generic_exception(self):
|
||||||
|
"""Test that generic exceptions are translated to 500 HTTP status."""
|
||||||
|
exc = Exception("Unexpected error")
|
||||||
|
result = translate_exception(exc)
|
||||||
|
|
||||||
|
assert isinstance(result, HTTPException)
|
||||||
|
assert result.status_code == 500
|
||||||
|
assert result.detail == "Internal server error: An unexpected error occurred."
|
||||||
|
|
||||||
|
def test_translate_runtime_error(self):
|
||||||
|
"""Test that RuntimeError is translated to 500 HTTP status."""
|
||||||
|
exc = RuntimeError("Runtime error")
|
||||||
|
result = translate_exception(exc)
|
||||||
|
|
||||||
|
assert isinstance(result, HTTPException)
|
||||||
|
assert result.status_code == 500
|
||||||
|
assert result.detail == "Internal server error: An unexpected error occurred."
|
||||||
|
|
||||||
|
def test_multiple_access_denied_scenarios(self):
|
||||||
|
"""Test various scenarios that should result in 403 status codes."""
|
||||||
|
# Test AccessDeniedError (uses enhanced message)
|
||||||
|
exc1 = AccessDeniedError()
|
||||||
|
result1 = translate_exception(exc1)
|
||||||
|
assert isinstance(result1, HTTPException)
|
||||||
|
assert result1.status_code == 403
|
||||||
|
assert result1.detail == "Permission denied: Insufficient permissions"
|
||||||
|
|
||||||
|
# Test PermissionError (uses generic message)
|
||||||
|
exc2 = PermissionError("No permission")
|
||||||
|
result2 = translate_exception(exc2)
|
||||||
|
assert isinstance(result2, HTTPException)
|
||||||
|
assert result2.status_code == 403
|
||||||
|
assert result2.detail == "Permission denied: No permission"
|
||||||
|
|
||||||
|
exc3 = PermissionError("Access denied")
|
||||||
|
result3 = translate_exception(exc3)
|
||||||
|
assert isinstance(result3, HTTPException)
|
||||||
|
assert result3.status_code == 403
|
||||||
|
assert result3.detail == "Permission denied: Access denied"
|
Loading…
Add table
Add a link
Reference in a new issue