Merge branch 'main' into fiddlecube-guard

This commit is contained in:
Kaushik Srinivasan 2025-02-10 18:14:45 -08:00 committed by GitHub
commit 42d6e7e4a1
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
69 changed files with 721 additions and 367 deletions

View file

@ -23,3 +23,7 @@ jobs:
.pre-commit-config.yaml .pre-commit-config.yaml
- uses: pre-commit/action@v3.0.1 - uses: pre-commit/action@v3.0.1
- name: Verify if there are any diff files after pre-commit
run: |
git diff --exit-code || (echo "There are uncommitted changes, run pre-commit locally and commit again" && exit 1)

View file

@ -54,7 +54,7 @@ jobs:
echo "REPORT_FILE=${REPORT_OUTPUT}" >> "$GITHUB_ENV" echo "REPORT_FILE=${REPORT_OUTPUT}" >> "$GITHUB_ENV"
export INFERENCE_MODEL=meta-llama/Llama-3.1-8B-Instruct export INFERENCE_MODEL=meta-llama/Llama-3.1-8B-Instruct
LLAMA_STACK_CONFIG=./llama_stack/templates/${{ matrix.provider }}/run.yaml pytest --md-report --md-report-verbose=1 ./tests/client-sdk/inference/test_inference.py --md-report-output "$REPORT_OUTPUT" LLAMA_STACK_CONFIG=./llama_stack/templates/${{ matrix.provider }}/run.yaml pytest --md-report --md-report-verbose=1 ./tests/client-sdk/inference/ --md-report-output "$REPORT_OUTPUT"
- name: Output reports to the job summary - name: Output reports to the job summary
if: always() if: always()

View file

@ -48,6 +48,7 @@ repos:
hooks: hooks:
- id: uv-export - id: uv-export
args: ["--frozen", "--no-hashes", "--no-emit-project"] args: ["--frozen", "--no-hashes", "--no-emit-project"]
- id: uv-sync
# - repo: https://github.com/pre-commit/mirrors-mypy # - repo: https://github.com/pre-commit/mirrors-mypy
# rev: v1.14.0 # rev: v1.14.0

View file

@ -1,44 +0,0 @@
# Changelog
## 0.2.0
### Added
### Changed
### Removed
## 0.0.53
### Added
- Resource-oriented design for models, shields, memory banks, datasets and eval tasks
- Persistence for registered objects with distribution
- Ability to persist memory banks created for FAISS
- PostgreSQL KVStore implementation
- Environment variable placeholder support in run.yaml files
- Comprehensive Zero-to-Hero notebooks and quickstart guides
- Support for quantized models in Ollama
- Vision models support for Together, Fireworks, Meta-Reference, and Ollama, and vLLM
- Bedrock distribution with safety shields support
- Evals API with task registration and scoring functions
- MMLU and SimpleQA benchmark scoring functions
- Huggingface dataset provider integration for benchmarks
- Support for custom dataset registration from local paths
- Benchmark evaluation CLI tools with visualization tables
- RAG evaluation scoring functions and metrics
- Local persistence for datasets and eval tasks
### Changed
- Split safety into distinct providers (llama-guard, prompt-guard, code-scanner)
- Changed provider naming convention (`impls``inline`, `adapters``remote`)
- Updated API signatures for dataset and eval task registration
- Restructured folder organization for providers
- Enhanced Docker build configuration
- Added version prefixing for REST API routes
- Enhanced evaluation task registration workflow
- Improved benchmark evaluation output formatting
- Restructured evals folder organization for better modularity
### Removed
- `llama stack configure` command

View file

@ -34,22 +34,22 @@ By reducing friction and complexity, Llama Stack empowers developers to focus on
### API Providers ### API Providers
Here is a list of the various API providers and available distributions to developers started easily, Here is a list of the various API providers and available distributions to developers started easily,
| **API Provider Builder** | **Environments** | **Agents** | **Inference** | **Memory** | **Safety** | **Telemetry** | | **API Provider Builder** | **Environments** | **Agents** | **Inference** | **Memory** | **Safety** | **Telemetry** |
|:------------------------------------------------------------------------------------------:|:----------------------:|:------------------:|:------------------:|:------------------:|:------------------:|:------------------:| |:------------------------:|:----------------------:|:----------:|:-------------:|:----------:|:----------:|:-------------:|
| Meta Reference | Single Node | :heavy_check_mark: | :heavy_check_mark: | :heavy_check_mark: | :heavy_check_mark: | :heavy_check_mark: | | Meta Reference | Single Node | ✅ | ✅ | ✅ | ✅ | ✅ |
| SambaNova | Hosted | | :heavy_check_mark: | | | | | SambaNova | Hosted | | ✅ | | | |
| Cerebras | Hosted | | :heavy_check_mark: | | | | | Cerebras | Hosted | | ✅ | | | |
| Fireworks | Hosted | :heavy_check_mark: | :heavy_check_mark: | :heavy_check_mark: | | | | Fireworks | Hosted | ✅ | ✅ | ✅ | | |
| AWS Bedrock | Hosted | | :heavy_check_mark: | | :heavy_check_mark: | | | AWS Bedrock | Hosted | | ✅ | | ✅ | |
| Together | Hosted | :heavy_check_mark: | :heavy_check_mark: | | :heavy_check_mark: | | | Together | Hosted | ✅ | ✅ | | ✅ | |
| Groq | Hosted | | :heavy_check_mark: | | | | | Groq | Hosted | | ✅ | | | |
| Ollama | Single Node | | :heavy_check_mark: | | | | | Ollama | Single Node | | ✅ | | | |
| TGI | Hosted and Single Node | | :heavy_check_mark: | | | | | TGI | Hosted and Single Node | | ✅ | | | |
| NVIDIA NIM | Hosted and Single Node | | :heavy_check_mark: | | | | | NVIDIA NIM | Hosted and Single Node | | ✅ | | | |
| Chroma | Single Node | | | :heavy_check_mark: | | | | Chroma | Single Node | | | ✅ | | |
| PG Vector | Single Node | | | :heavy_check_mark: | | | | PG Vector | Single Node | | | ✅ | | |
| PyTorch ExecuTorch | On-device iOS | :heavy_check_mark: | :heavy_check_mark: | | | | | PyTorch ExecuTorch | On-device iOS | ✅ | ✅ | | | |
| vLLM | Hosted and Single Node | | :heavy_check_mark: | | | | | vLLM | Hosted and Single Node | | ✅ | | | |
### Distributions ### Distributions

View file

@ -69,6 +69,40 @@
"fiddlecube": [ "fiddlecube": [
"httpx" "httpx"
], ],
"dell": [
"aiohttp",
"aiosqlite",
"autoevals",
"blobfile",
"chardet",
"chromadb-client",
"datasets",
"faiss-cpu",
"fastapi",
"fire",
"httpx",
"huggingface_hub",
"matplotlib",
"nltk",
"numpy",
"openai",
"opentelemetry-exporter-otlp-proto-http",
"opentelemetry-sdk",
"pandas",
"pillow",
"psycopg2-binary",
"pypdf",
"redis",
"requests",
"scikit-learn",
"scipy",
"sentencepiece",
"tqdm",
"transformers",
"uvicorn",
"sentence-transformers --no-deps",
"torch torchvision --index-url https://download.pytorch.org/whl/cpu"
],
"fireworks": [ "fireworks": [
"aiosqlite", "aiosqlite",
"autoevals", "autoevals",
@ -255,6 +289,38 @@
"sentence-transformers --no-deps", "sentence-transformers --no-deps",
"torch torchvision --index-url https://download.pytorch.org/whl/cpu" "torch torchvision --index-url https://download.pytorch.org/whl/cpu"
], ],
"nvidia": [
"aiosqlite",
"autoevals",
"blobfile",
"chardet",
"datasets",
"faiss-cpu",
"fastapi",
"fire",
"httpx",
"matplotlib",
"mcp",
"nltk",
"numpy",
"openai",
"opentelemetry-exporter-otlp-proto-http",
"opentelemetry-sdk",
"pandas",
"pillow",
"psycopg2-binary",
"pypdf",
"redis",
"requests",
"scikit-learn",
"scipy",
"sentencepiece",
"tqdm",
"transformers",
"uvicorn",
"sentence-transformers --no-deps",
"torch torchvision --index-url https://download.pytorch.org/whl/cpu"
],
"ollama": [ "ollama": [
"aiohttp", "aiohttp",
"aiosqlite", "aiosqlite",
@ -322,6 +388,36 @@
"sentence-transformers --no-deps", "sentence-transformers --no-deps",
"torch torchvision --index-url https://download.pytorch.org/whl/cpu" "torch torchvision --index-url https://download.pytorch.org/whl/cpu"
], ],
"sambanova": [
"aiosqlite",
"blobfile",
"chardet",
"chromadb-client",
"faiss-cpu",
"fastapi",
"fire",
"httpx",
"matplotlib",
"nltk",
"numpy",
"openai",
"opentelemetry-exporter-otlp-proto-http",
"opentelemetry-sdk",
"pandas",
"pillow",
"psycopg2-binary",
"pypdf",
"redis",
"requests",
"scikit-learn",
"scipy",
"sentencepiece",
"tqdm",
"transformers",
"uvicorn",
"sentence-transformers --no-deps",
"torch torchvision --index-url https://download.pytorch.org/whl/cpu"
],
"tgi": [ "tgi": [
"aiohttp", "aiohttp",
"aiosqlite", "aiosqlite",
@ -424,101 +520,5 @@
"vllm", "vllm",
"sentence-transformers --no-deps", "sentence-transformers --no-deps",
"torch torchvision --index-url https://download.pytorch.org/whl/cpu" "torch torchvision --index-url https://download.pytorch.org/whl/cpu"
],
"nvidia": [
"aiosqlite",
"autoevals",
"blobfile",
"chardet",
"datasets",
"faiss-cpu",
"fastapi",
"fire",
"httpx",
"matplotlib",
"mcp",
"nltk",
"numpy",
"openai",
"opentelemetry-exporter-otlp-proto-http",
"opentelemetry-sdk",
"pandas",
"pillow",
"psycopg2-binary",
"pypdf",
"redis",
"requests",
"scikit-learn",
"scipy",
"sentencepiece",
"tqdm",
"transformers",
"uvicorn",
"sentence-transformers --no-deps",
"torch torchvision --index-url https://download.pytorch.org/whl/cpu"
],
"sambanova": [
"aiosqlite",
"blobfile",
"chardet",
"chromadb-client",
"faiss-cpu",
"fastapi",
"fire",
"httpx",
"matplotlib",
"nltk",
"numpy",
"openai",
"opentelemetry-exporter-otlp-proto-http",
"opentelemetry-sdk",
"pandas",
"pillow",
"psycopg2-binary",
"pypdf",
"redis",
"requests",
"scikit-learn",
"scipy",
"sentencepiece",
"tqdm",
"transformers",
"uvicorn",
"sentence-transformers --no-deps",
"torch torchvision --index-url https://download.pytorch.org/whl/cpu"
],
"dell": [
"aiohttp",
"aiosqlite",
"autoevals",
"blobfile",
"chardet",
"chromadb-client",
"datasets",
"faiss-cpu",
"fastapi",
"fire",
"httpx",
"huggingface_hub",
"matplotlib",
"nltk",
"numpy",
"openai",
"opentelemetry-exporter-otlp-proto-http",
"opentelemetry-sdk",
"pandas",
"pillow",
"psycopg2-binary",
"pypdf",
"redis",
"requests",
"scikit-learn",
"scipy",
"sentencepiece",
"tqdm",
"transformers",
"uvicorn",
"sentence-transformers --no-deps",
"torch torchvision --index-url https://download.pytorch.org/whl/cpu"
] ]
} }

9
docs/conftest.py Normal file
View file

@ -0,0 +1,9 @@
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.
def pytest_collection_modifyitems(items):
for item in items:
item.name = item.name.replace(' ', '_')

View file

@ -86,7 +86,6 @@
"# NBVAL_SKIP\n", "# NBVAL_SKIP\n",
"\n", "\n",
"!apt-get install -y bubblewrap\n", "!apt-get install -y bubblewrap\n",
"# install a branch of llama stack\n",
"import os\n", "import os\n",
"os.environ[\"UV_SYSTEM_PYTHON\"] = \"1\"\n", "os.environ[\"UV_SYSTEM_PYTHON\"] = \"1\"\n",
"!pip install uv\n", "!pip install uv\n",
@ -3397,6 +3396,231 @@
"response = client.scoring.score(input_rows=rows, scoring_functions=scoring_params)\n", "response = client.scoring.score(input_rows=rows, scoring_functions=scoring_params)\n",
"pprint(response)\n" "pprint(response)\n"
] ]
},
{
"cell_type": "markdown",
"id": "ad077440",
"metadata": {},
"source": [
"## 4. Image Understanding with Llama 3.2\n",
"\n",
"Below is a complete example of using Together's Llama Stack 0.1 server at https://llama-stack.together.ai to ask Llama 3.2 questions about an image."
]
},
{
"cell_type": "markdown",
"id": "82e381ec",
"metadata": {},
"source": [
"### 4.1 Setup and helpers\n",
"\n",
"Below we install the Llama Stack client 0.1, download the example image, define two image helpers, and set Llama Stack Together server URL and Llama 3.2 model name.\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "865fc5a8",
"metadata": {},
"outputs": [],
"source": [
"!pip install llama-stack-client==0.1.0"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "44e05e16",
"metadata": {},
"outputs": [],
"source": [
"!wget https://raw.githubusercontent.com/meta-llama/llama-models/refs/heads/main/Llama_Repo.jpeg"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "469750f7",
"metadata": {},
"outputs": [],
"source": [
"from PIL import Image\n",
"import matplotlib.pyplot as plt\n",
"\n",
"def display_image(path):\n",
" img = Image.open(path)\n",
" plt.imshow(img)\n",
" plt.axis('off')\n",
" plt.show()\n",
"\n",
"display_image(\"Llama_Repo.jpeg\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "a2c1e1c2",
"metadata": {},
"outputs": [],
"source": [
"import base64\n",
"\n",
"def encode_image(image_path):\n",
" with open(image_path, \"rb\") as image_file:\n",
" base64_string = base64.b64encode(image_file.read()).decode(\"utf-8\")\n",
" base64_url = f\"data:image/png;base64,{base64_string}\"\n",
" return base64_url"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "c565f99e",
"metadata": {},
"outputs": [],
"source": [
"from llama_stack_client import LlamaStackClient\n",
"\n",
"LLAMA_STACK_API_TOGETHER_URL=\"https://llama-stack.together.ai\"\n",
"LLAMA32_11B_INSTRUCT = \"meta-llama/Llama-3.2-11B-Vision-Instruct\""
]
},
{
"cell_type": "markdown",
"id": "7737cd41",
"metadata": {},
"source": [
"### 4.2 Using Llama Stack Chat API\n",
"\n",
"The code below uses the Llama Stack 0.1's chat API to interact with Llama 3.2:"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "d7914894",
"metadata": {},
"outputs": [],
"source": [
"from llama_stack_client.lib.inference.event_logger import EventLogger\n",
"\n",
"async def run_main(image_path: str, prompt):\n",
" client = LlamaStackClient(\n",
" base_url=LLAMA_STACK_API_TOGETHER_URL,\n",
" )\n",
"\n",
" message = {\n",
" \"role\": \"user\",\n",
" \"content\": [\n",
" {\n",
" \"type\": \"image\",\n",
" \"image\": {\n",
" \"url\": {\n",
" \"uri\": encode_image(image_path)\n",
" }\n",
" }\n",
" },\n",
" {\n",
" \"type\": \"text\",\n",
" \"text\": prompt,\n",
" }\n",
" ]\n",
" }\n",
"\n",
" response = client.inference.chat_completion(\n",
" messages=[message],\n",
" model_id=LLAMA32_11B_INSTRUCT,\n",
" stream=False,\n",
" )\n",
"\n",
" print(response.completion_message.content.lower().strip())"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "4ee09b97",
"metadata": {},
"outputs": [],
"source": [
"await run_main(\"Llama_Repo.jpeg\",\n",
" \"How many different colors are those llamas?\\\n",
" What are those colors?\")"
]
},
{
"cell_type": "markdown",
"id": "e741d7b9",
"metadata": {},
"source": [
"### 4.3 Using Llama Stack Agent API\n",
"\n",
"The code below uses the Llama Stack 0.1's Agent API to interact with Llama 3.2:"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "f9a83275",
"metadata": {},
"outputs": [],
"source": [
"from llama_stack_client.lib.agents.agent import Agent\n",
"from llama_stack_client.lib.agents.event_logger import EventLogger\n",
"from llama_stack_client.types.agent_create_params import AgentConfig\n",
"\n",
"async def run_main(image_path, prompt):\n",
" base64_image = encode_image(image_path)\n",
"\n",
" client = LlamaStackClient(\n",
" base_url=LLAMA_STACK_API_TOGETHER_URL,\n",
" )\n",
"\n",
" agent_config = AgentConfig(\n",
" model=LLAMA32_11B_INSTRUCT,\n",
" instructions=\"You are a helpful assistant\",\n",
" enable_session_persistence=False,\n",
" )\n",
"\n",
" agent = Agent(client, agent_config)\n",
" session_id = agent.create_session(\"test-session\")\n",
"\n",
" response = agent.create_turn(\n",
" messages=[{\n",
" \"role\": \"user\",\n",
" \"content\": [\n",
" {\n",
" \"type\": \"image\",\n",
" \"image\": {\n",
" \"url\": {\n",
" \"uri\": encode_image(image_path)\n",
" }\n",
" }\n",
" },\n",
" {\n",
" \"type\": \"text\",\n",
" \"text\": prompt,\n",
" }\n",
" ]\n",
" }],\n",
" session_id=session_id,\n",
" )\n",
"\n",
" for log in EventLogger().log(response):\n",
" log.print()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "15d0098b",
"metadata": {},
"outputs": [],
"source": [
"await run_main(\"Llama_Repo.jpeg\",\n",
" \"How many different colors are those llamas?\\\n",
" What are those colors?\")"
]
} }
], ],
"metadata": { "metadata": {

View file

@ -4,7 +4,7 @@ Llama Stack provides all the building blocks needed to create sophisticated AI a
The best way to get started is to look at this notebook which walks through the various APIs (from basic inference, to RAG agents) and how to use them. The best way to get started is to look at this notebook which walks through the various APIs (from basic inference, to RAG agents) and how to use them.
**Notebook**: [Building AI Applications](https://github.com/meta-llama/llama-stack/blob/main/docs/notebooks/Llama_Stack_Benchmark_Evals.ipynb) **Notebook**: [Building AI Applications](https://github.com/meta-llama/llama-stack/blob/main/docs/getting_started.ipynb)
Here are some key topics that will help you build effective agents: Here are some key topics that will help you build effective agents:

View file

@ -36,13 +36,12 @@ chunks = [
"content": "Your document text here", "content": "Your document text here",
"mime_type": "text/plain", "mime_type": "text/plain",
}, },
...,
] ]
client.vector_io.insert(vector_db_id, chunks) client.vector_io.insert(vector_db_id=vector_db_id, chunks=chunks)
# You can then query for these chunks # You can then query for these chunks
chunks_response = client.vector_io.query( chunks_response = client.vector_io.query(
vector_db_id, query="What do you know about..." vector_db_id=vector_db_id, query="What do you know about..."
) )
``` ```
@ -72,8 +71,8 @@ client.tool_runtime.rag_tool.insert(
# Query documents # Query documents
results = client.tool_runtime.rag_tool.query( results = client.tool_runtime.rag_tool.query(
vector_db_id=vector_db_id, vector_db_ids=[vector_db_id],
query="What do you know about...", content="What do you know about...",
) )
``` ```
@ -82,10 +81,14 @@ results = client.tool_runtime.rag_tool.query(
One of the most powerful patterns is combining agents with RAG capabilities. Here's a complete example: One of the most powerful patterns is combining agents with RAG capabilities. Here's a complete example:
```python ```python
from llama_stack_client.types.agent_create_params import AgentConfig
from llama_stack_client.lib.agents.agent import Agent
# Configure agent with memory # Configure agent with memory
agent_config = AgentConfig( agent_config = AgentConfig(
model="Llama3.2-3B-Instruct", model="meta-llama/Llama-3.2-3B-Instruct",
instructions="You are a helpful assistant", instructions="You are a helpful assistant",
enable_session_persistence=False,
toolgroups=[ toolgroups=[
{ {
"name": "builtin::rag", "name": "builtin::rag",
@ -105,10 +108,10 @@ response = agent.create_turn(
{"role": "user", "content": "I am providing some documents for reference."} {"role": "user", "content": "I am providing some documents for reference."}
], ],
documents=[ documents=[
dict( {
content="https://raw.githubusercontent.com/example/doc.rst", "content": "https://raw.githubusercontent.com/example/doc.rst",
mime_type="text/plain", "mime_type": "text/plain",
) }
], ],
session_id=session_id, session_id=session_id,
) )

View file

@ -1,7 +1,7 @@
<!-- This file was auto-generated by distro_codegen.py, please edit source -->
--- ---
orphan: true orphan: true
--- ---
<!-- This file was auto-generated by distro_codegen.py, please edit source -->
# Dell Distribution of Llama Stack # Dell Distribution of Llama Stack

View file

@ -1,7 +1,7 @@
<!-- This file was auto-generated by distro_codegen.py, please edit source -->
--- ---
orphan: true orphan: true
--- ---
<!-- This file was auto-generated by distro_codegen.py, please edit source -->
# Fireworks Distribution # Fireworks Distribution
```{toctree} ```{toctree}

View file

@ -1,7 +1,7 @@
<!-- This file was auto-generated by distro_codegen.py, please edit source -->
--- ---
orphan: true orphan: true
--- ---
<!-- This file was auto-generated by distro_codegen.py, please edit source -->
# Meta Reference Distribution # Meta Reference Distribution
```{toctree} ```{toctree}

View file

@ -1,7 +1,7 @@
<!-- This file was auto-generated by distro_codegen.py, please edit source -->
--- ---
orphan: true orphan: true
--- ---
<!-- This file was auto-generated by distro_codegen.py, please edit source -->
# Meta Reference Quantized Distribution # Meta Reference Quantized Distribution
```{toctree} ```{toctree}

View file

@ -1,7 +1,7 @@
<!-- This file was auto-generated by distro_codegen.py, please edit source -->
--- ---
orphan: true orphan: true
--- ---
<!-- This file was auto-generated by distro_codegen.py, please edit source -->
# Ollama Distribution # Ollama Distribution
```{toctree} ```{toctree}

View file

@ -1,7 +1,7 @@
<!-- This file was auto-generated by distro_codegen.py, please edit source -->
--- ---
orphan: true orphan: true
--- ---
<!-- This file was auto-generated by distro_codegen.py, please edit source -->
# Remote vLLM Distribution # Remote vLLM Distribution
```{toctree} ```{toctree}
:maxdepth: 2 :maxdepth: 2

View file

@ -1,7 +1,7 @@
<!-- This file was auto-generated by distro_codegen.py, please edit source -->
--- ---
orphan: true orphan: true
--- ---
<!-- This file was auto-generated by distro_codegen.py, please edit source -->
# SambaNova Distribution # SambaNova Distribution
```{toctree} ```{toctree}

View file

@ -1,7 +1,7 @@
<!-- This file was auto-generated by distro_codegen.py, please edit source -->
--- ---
orphan: true orphan: true
--- ---
<!-- This file was auto-generated by distro_codegen.py, please edit source -->
# TGI Distribution # TGI Distribution

View file

@ -1,7 +1,7 @@
<!-- This file was auto-generated by distro_codegen.py, please edit source -->
--- ---
orphan: true orphan: true
--- ---
<!-- This file was auto-generated by distro_codegen.py, please edit source -->
# Together Distribution # Together Distribution
```{toctree} ```{toctree}

View file

@ -2,7 +2,7 @@
```{admonition} News ```{admonition} News
:class: tip :class: tip
Llama Stack 0.1.1 is now available! See the [release notes](https://github.com/meta-llama/llama-stack/releases/tag/v0.1.1) for more details. Llama Stack 0.1.2 is now available! See the [release notes](https://github.com/meta-llama/llama-stack/releases/tag/v0.1.2) for more details.
``` ```
# Llama Stack # Llama Stack

View file

@ -22,9 +22,9 @@ class StackListProviders(Subcommand):
self.parser.set_defaults(func=self._run_providers_list_cmd) self.parser.set_defaults(func=self._run_providers_list_cmd)
def _add_arguments(self): def _add_arguments(self):
from llama_stack.distribution.datatypes import Api from llama_stack.distribution.distribution import providable_apis
api_values = [a.value for a in Api] api_values = [api.value for api in providable_apis()]
self.parser.add_argument( self.parser.add_argument(
"api", "api",
type=str, type=str,

View file

@ -55,6 +55,16 @@ class StackRun(Subcommand):
default=[], default=[],
metavar="KEY=VALUE", metavar="KEY=VALUE",
) )
self.parser.add_argument(
"--tls-keyfile",
type=str,
help="Path to TLS key file for HTTPS",
)
self.parser.add_argument(
"--tls-certfile",
type=str,
help="Path to TLS certificate file for HTTPS",
)
def _run_stack_run_cmd(self, args: argparse.Namespace) -> None: def _run_stack_run_cmd(self, args: argparse.Namespace) -> None:
import importlib.resources import importlib.resources
@ -178,4 +188,7 @@ class StackRun(Subcommand):
return return
run_args.extend(["--env", f"{key}={value}"]) run_args.extend(["--env", f"{key}={value}"])
if args.tls_keyfile and args.tls_certfile:
run_args.extend(["--tls-keyfile", args.tls_keyfile, "--tls-certfile", args.tls_certfile])
run_with_pty(run_args) run_with_pty(run_args)

View file

@ -117,6 +117,23 @@ class Provider(BaseModel):
config: Dict[str, Any] config: Dict[str, Any]
class ServerConfig(BaseModel):
port: int = Field(
default=8321,
description="Port to listen on",
ge=1024,
le=65535,
)
tls_certfile: Optional[str] = Field(
default=None,
description="Path to TLS certificate file for HTTPS",
)
tls_keyfile: Optional[str] = Field(
default=None,
description="Path to TLS key file for HTTPS",
)
class StackRunConfig(BaseModel): class StackRunConfig(BaseModel):
version: str = LLAMA_STACK_RUN_CONFIG_VERSION version: str = LLAMA_STACK_RUN_CONFIG_VERSION
@ -159,6 +176,11 @@ a default SQLite store will be used.""",
eval_tasks: List[EvalTaskInput] = Field(default_factory=list) eval_tasks: List[EvalTaskInput] = Field(default_factory=list)
tool_groups: List[ToolGroupInput] = Field(default_factory=list) tool_groups: List[ToolGroupInput] = Field(default_factory=list)
server: ServerConfig = Field(
default_factory=ServerConfig,
description="Configuration for the HTTP(S) server",
)
class BuildConfig(BaseModel): class BuildConfig(BaseModel):
version: str = LLAMA_STACK_BUILD_CONFIG_VERSION version: str = LLAMA_STACK_BUILD_CONFIG_VERSION

View file

@ -17,17 +17,6 @@ from typing import Any, get_args, get_origin, Optional, TypeVar
import httpx import httpx
import yaml import yaml
from llama_stack_client import (
APIResponse,
AsyncAPIResponse,
AsyncLlamaStackClient,
AsyncStream,
LlamaStackClient,
NOT_GIVEN,
)
from pydantic import BaseModel, TypeAdapter
from rich.console import Console
from termcolor import cprint
from llama_stack.distribution.build import print_pip_install_help from llama_stack.distribution.build import print_pip_install_help
from llama_stack.distribution.configure import parse_and_maybe_upgrade_config from llama_stack.distribution.configure import parse_and_maybe_upgrade_config
@ -46,6 +35,17 @@ from llama_stack.providers.utils.telemetry.tracing import (
setup_logger, setup_logger,
start_trace, start_trace,
) )
from llama_stack_client import (
APIResponse,
AsyncAPIResponse,
AsyncLlamaStackClient,
AsyncStream,
LlamaStackClient,
NOT_GIVEN,
)
from pydantic import BaseModel, TypeAdapter
from rich.console import Console
from termcolor import cprint
T = TypeVar("T") T = TypeVar("T")
@ -198,6 +198,7 @@ class AsyncLlamaStackAsLibraryClient(AsyncLlamaStackClient):
async def initialize(self) -> bool: async def initialize(self) -> bool:
try: try:
self.endpoint_impls = None
self.impls = await construct_stack(self.config, self.custom_provider_registry) self.impls = await construct_stack(self.config, self.custom_provider_registry)
except ModuleNotFoundError as _e: except ModuleNotFoundError as _e:
cprint(_e.msg, "red") cprint(_e.msg, "red")
@ -213,7 +214,7 @@ class AsyncLlamaStackAsLibraryClient(AsyncLlamaStackClient):
f"Please run:\n\n{prefix}llama stack build --template {self.config_path_or_template_name} --image-type venv\n\n", f"Please run:\n\n{prefix}llama stack build --template {self.config_path_or_template_name} --image-type venv\n\n",
"yellow", "yellow",
) )
return False raise _e
if Api.telemetry in self.impls: if Api.telemetry in self.impls:
setup_logger(self.impls[Api.telemetry]) setup_logger(self.impls[Api.telemetry])

View file

@ -282,8 +282,19 @@ def main():
action="append", action="append",
help="Environment variables in KEY=value format. Can be specified multiple times.", help="Environment variables in KEY=value format. Can be specified multiple times.",
) )
parser.add_argument(
"--tls-keyfile",
help="Path to TLS key file for HTTPS",
required="--tls-certfile" in sys.argv,
)
parser.add_argument(
"--tls-certfile",
help="Path to TLS certificate file for HTTPS",
required="--tls-keyfile" in sys.argv,
)
args = parser.parse_args() args = parser.parse_args()
if args.env: if args.env:
for env_pair in args.env: for env_pair in args.env:
try: try:
@ -381,11 +392,36 @@ def main():
import uvicorn import uvicorn
# FYI this does not do hot-reloads # Configure SSL if certificates are provided
port = args.port or config.server.port
ssl_config = None
if args.tls_keyfile:
keyfile = args.tls_keyfile
certfile = args.tls_certfile
else:
keyfile = config.server.tls_keyfile
certfile = config.server.tls_certfile
if keyfile and certfile:
ssl_config = {
"ssl_keyfile": keyfile,
"ssl_certfile": certfile,
}
print(f"HTTPS enabled with certificates:\n Key: {keyfile}\n Cert: {certfile}")
listen_host = ["::", "0.0.0.0"] if not args.disable_ipv6 else "0.0.0.0" listen_host = ["::", "0.0.0.0"] if not args.disable_ipv6 else "0.0.0.0"
print(f"Listening on {listen_host}:{args.port}") print(f"Listening on {listen_host}:{port}")
uvicorn.run(app, host=listen_host, port=args.port)
uvicorn_config = {
"app": app,
"host": listen_host,
"port": port,
}
if ssl_config:
uvicorn_config.update(ssl_config)
uvicorn.run(**uvicorn_config)
def extract_path_params(route: str) -> List[str]: def extract_path_params(route: str) -> List[str]:

View file

@ -34,6 +34,7 @@ shift
# Process environment variables from --env arguments # Process environment variables from --env arguments
env_vars="" env_vars=""
other_args=""
while [[ $# -gt 0 ]]; do while [[ $# -gt 0 ]]; do
case "$1" in case "$1" in
--env) --env)
@ -48,6 +49,7 @@ while [[ $# -gt 0 ]]; do
fi fi
;; ;;
*) *)
other_args="$other_args $1"
shift shift
;; ;;
esac esac
@ -61,4 +63,5 @@ $CONDA_PREFIX/bin/python \
-m llama_stack.distribution.server.server \ -m llama_stack.distribution.server.server \
--yaml-config "$yaml_config" \ --yaml-config "$yaml_config" \
--port "$port" \ --port "$port" \
$env_vars $env_vars \
$other_args

View file

@ -40,8 +40,12 @@ shift
port="$1" port="$1"
shift shift
# Initialize other_args
other_args=""
# Process environment variables from --env arguments # Process environment variables from --env arguments
env_vars="" env_vars=""
while [[ $# -gt 0 ]]; do while [[ $# -gt 0 ]]; do
case "$1" in case "$1" in
--env) --env)
@ -55,6 +59,7 @@ while [[ $# -gt 0 ]]; do
fi fi
;; ;;
*) *)
other_args="$other_args $1"
shift shift
;; ;;
esac esac
@ -93,5 +98,8 @@ $CONTAINER_BINARY run $CONTAINER_OPTS -it \
-v "$yaml_config:/app/config.yaml" \ -v "$yaml_config:/app/config.yaml" \
$mounts \ $mounts \
--env LLAMA_STACK_PORT=$port \ --env LLAMA_STACK_PORT=$port \
--entrypoint='["python", "-m", "llama_stack.distribution.server.server", "--yaml-config", "/app/config.yaml"]' \ --entrypoint python \
$container_image:$version_tag $container_image:$version_tag \
-m llama_stack.distribution.server.server \
--yaml-config /app/config.yaml \
$other_args

View file

@ -67,7 +67,6 @@ def generate_bwrap_command(bind_dirs: List[str]) -> str:
@dataclass @dataclass
class CodeExecutionContext: class CodeExecutionContext:
matplotlib_dump_dir: str matplotlib_dump_dir: str
use_proxy: bool = False
@dataclass @dataclass

View file

@ -26,6 +26,7 @@ from llama_stack.apis.inference import (
Message, Message,
ResponseFormat, ResponseFormat,
ToolChoice, ToolChoice,
ToolConfig,
) )
from llama_stack.distribution.request_headers import NeedsRequestProviderData from llama_stack.distribution.request_headers import NeedsRequestProviderData
from llama_stack.providers.remote.inference.groq.config import GroqConfig from llama_stack.providers.remote.inference.groq.config import GroqConfig

View file

@ -352,24 +352,20 @@ class OllamaInferenceAdapter(Inference, ModelsProtocolPrivate):
return EmbeddingsResponse(embeddings=embeddings) return EmbeddingsResponse(embeddings=embeddings)
async def register_model(self, model: Model) -> Model: async def register_model(self, model: Model) -> Model:
# ollama does not have embedding models running. Check if the model is in list of available models. async def check_model_availability(model_id: str):
if model.model_type == ModelType.embedding: response = await self.client.ps()
response = await self.client.list()
available_models = [m["model"] for m in response["models"]] available_models = [m["model"] for m in response["models"]]
if model.provider_resource_id not in available_models: if model_id not in available_models:
raise ValueError( raise ValueError(
f"Model '{model.provider_resource_id}' is not available in Ollama. " f"Model '{model_id}' is not available in Ollama. Available models: {', '.join(available_models)}"
f"Available models: {', '.join(available_models)}"
) )
if model.model_type == ModelType.embedding:
await check_model_availability(model.provider_resource_id)
return model return model
model = await self.register_helper.register_model(model) model = await self.register_helper.register_model(model)
models = await self.client.ps() await check_model_availability(model.provider_resource_id)
available_models = [m["model"] for m in models["models"]]
if model.provider_resource_id not in available_models:
raise ValueError(
f"Model '{model.provider_resource_id}' is not available in Ollama. "
f"Available models: {', '.join(available_models)}"
)
return model return model

View file

@ -12,8 +12,8 @@ from .config import QdrantConfig
async def get_adapter_impl(config: QdrantConfig, deps: Dict[Api, ProviderSpec]): async def get_adapter_impl(config: QdrantConfig, deps: Dict[Api, ProviderSpec]):
from .qdrant import QdrantVectorMemoryAdapter from .qdrant import QdrantVectorDBAdapter
impl = QdrantVectorMemoryAdapter(config, deps[Api.inference]) impl = QdrantVectorDBAdapter(config, deps[Api.inference])
await impl.initialize() await impl.initialize()
return impl return impl

View file

@ -55,7 +55,7 @@ class QdrantIndex(EmbeddingIndex):
points = [] points = []
for i, (chunk, embedding) in enumerate(zip(chunks, embeddings)): for i, (chunk, embedding) in enumerate(zip(chunks, embeddings)):
chunk_id = f"{chunk.document_id}:chunk-{i}" chunk_id = f"{chunk.metadata['document_id']}:chunk-{i}"
points.append( points.append(
PointStruct( PointStruct(
id=convert_id(chunk_id), id=convert_id(chunk_id),
@ -93,6 +93,9 @@ class QdrantIndex(EmbeddingIndex):
return QueryChunksResponse(chunks=chunks, scores=scores) return QueryChunksResponse(chunks=chunks, scores=scores)
async def delete(self):
await self.client.delete_collection(collection_name=self.collection_name)
class QdrantVectorDBAdapter(VectorIO, VectorDBsProtocolPrivate): class QdrantVectorDBAdapter(VectorIO, VectorDBsProtocolPrivate):
def __init__(self, config: QdrantConfig, inference_api: Api.inference) -> None: def __init__(self, config: QdrantConfig, inference_api: Api.inference) -> None:

View file

@ -95,7 +95,7 @@ class TestDatasetIO:
assert len(response) == 1 assert len(response) == 1
assert response[0].identifier == "test_dataset" assert response[0].identifier == "test_dataset"
with pytest.raises(Exception) as exc_info: with pytest.raises(ValueError):
# unregister a dataset that does not exist # unregister a dataset that does not exist
await datasets_impl.unregister_dataset("test_dataset2") await datasets_impl.unregister_dataset("test_dataset2")
@ -104,7 +104,7 @@ class TestDatasetIO:
assert isinstance(response, list) assert isinstance(response, list)
assert len(response) == 0 assert len(response) == 0
with pytest.raises(Exception) as exc_info: with pytest.raises(ValueError):
await datasets_impl.unregister_dataset("test_dataset") await datasets_impl.unregister_dataset("test_dataset")
@pytest.mark.asyncio @pytest.mark.asyncio

View file

@ -32,7 +32,7 @@ class TestModelRegistration:
) )
# Try to register a model that's too large for local inference # Try to register a model that's too large for local inference
with pytest.raises(ValueError) as exc_info: with pytest.raises(ValueError):
await models_impl.register_model( await models_impl.register_model(
model_id="Llama3.1-70B-Instruct", model_id="Llama3.1-70B-Instruct",
) )
@ -42,7 +42,7 @@ class TestModelRegistration:
_, models_impl = inference_stack _, models_impl = inference_stack
# Try to register a non-existent model # Try to register a non-existent model
with pytest.raises(Exception) as exc_info: with pytest.raises(ValueError):
await models_impl.register_model( await models_impl.register_model(
model_id="Llama3-NonExistent-Model", model_id="Llama3-NonExistent-Model",
) )
@ -59,7 +59,7 @@ class TestModelRegistration:
}, },
) )
with pytest.raises(ValueError) as exc_info: with pytest.raises(ValueError):
await models_impl.register_model( await models_impl.register_model(
model_id="custom-model-2", model_id="custom-model-2",
metadata={ metadata={
@ -88,7 +88,7 @@ class TestModelRegistration:
async def test_register_with_invalid_llama_model(self, inference_stack): async def test_register_with_invalid_llama_model(self, inference_stack):
_, models_impl = inference_stack _, models_impl = inference_stack
with pytest.raises(ValueError) as exc_info: with pytest.raises(ValueError):
await models_impl.register_model( await models_impl.register_model(
model_id="custom-model-2", model_id="custom-model-2",
metadata={"llama_model": "invalid-llama-model"}, metadata={"llama_model": "invalid-llama-model"},

View file

@ -4,12 +4,12 @@
# This source code is licensed under the terms described in the LICENSE file in # This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree. # the root directory of this source tree.
import base64
from pathlib import Path from pathlib import Path
import pytest import pytest
from llama_stack.apis.common.content_types import ImageContentItem, TextContentItem, URL from llama_stack.apis.common.content_types import URL, ImageContentItem, TextContentItem
from llama_stack.apis.inference import ( from llama_stack.apis.inference import (
ChatCompletionResponse, ChatCompletionResponse,
ChatCompletionResponseEventType, ChatCompletionResponseEventType,
@ -23,7 +23,7 @@ from .utils import group_chunks
THIS_DIR = Path(__file__).parent THIS_DIR = Path(__file__).parent
with open(THIS_DIR / "pasta.jpeg", "rb") as f: with open(THIS_DIR / "pasta.jpeg", "rb") as f:
PASTA_IMAGE = f.read() PASTA_IMAGE = base64.b64encode(f.read()).decode("utf-8")
class TestVisionModelInference: class TestVisionModelInference:

View file

@ -29,7 +29,7 @@ def find_template_dirs(templates_dir: Path) -> Iterator[Path]:
if not templates_dir.exists(): if not templates_dir.exists():
raise FileNotFoundError(f"Templates directory not found: {templates_dir}") raise FileNotFoundError(f"Templates directory not found: {templates_dir}")
return (d for d in templates_dir.iterdir() if d.is_dir() and d.name != "__pycache__") return sorted(d for d in templates_dir.iterdir() if d.is_dir() and d.name != "__pycache__")
def process_template(template_dir: Path, progress) -> None: def process_template(template_dir: Path, progress) -> None:

View file

@ -115,3 +115,5 @@ tool_groups:
provider_id: rag-runtime provider_id: rag-runtime
- toolgroup_id: builtin::code_interpreter - toolgroup_id: builtin::code_interpreter
provider_id: code-interpreter provider_id: code-interpreter
server:
port: 8321

View file

@ -117,3 +117,5 @@ tool_groups:
provider_id: rag-runtime provider_id: rag-runtime
- toolgroup_id: builtin::code_interpreter - toolgroup_id: builtin::code_interpreter
provider_id: code-interpreter provider_id: code-interpreter
server:
port: 8321

View file

@ -116,3 +116,5 @@ tool_groups:
provider_id: rag-runtime provider_id: rag-runtime
- toolgroup_id: builtin::code_interpreter - toolgroup_id: builtin::code_interpreter
provider_id: code-interpreter provider_id: code-interpreter
server:
port: 8321

View file

@ -107,3 +107,5 @@ tool_groups:
provider_id: rag-runtime provider_id: rag-runtime
- toolgroup_id: builtin::code_interpreter - toolgroup_id: builtin::code_interpreter
provider_id: code-interpreter provider_id: code-interpreter
server:
port: 8321

View file

@ -172,3 +172,5 @@ tool_groups:
provider_id: rag-runtime provider_id: rag-runtime
- toolgroup_id: builtin::code_interpreter - toolgroup_id: builtin::code_interpreter
provider_id: code-interpreter provider_id: code-interpreter
server:
port: 8321

View file

@ -161,3 +161,5 @@ tool_groups:
provider_id: rag-runtime provider_id: rag-runtime
- toolgroup_id: builtin::code_interpreter - toolgroup_id: builtin::code_interpreter
provider_id: code-interpreter provider_id: code-interpreter
server:
port: 8321

View file

@ -124,3 +124,5 @@ tool_groups:
provider_id: rag-runtime provider_id: rag-runtime
- toolgroup_id: builtin::code_interpreter - toolgroup_id: builtin::code_interpreter
provider_id: code-interpreter provider_id: code-interpreter
server:
port: 8321

View file

@ -114,3 +114,5 @@ tool_groups:
provider_id: rag-runtime provider_id: rag-runtime
- toolgroup_id: builtin::code_interpreter - toolgroup_id: builtin::code_interpreter
provider_id: code-interpreter provider_id: code-interpreter
server:
port: 8321

View file

@ -124,3 +124,5 @@ tool_groups:
provider_id: rag-runtime provider_id: rag-runtime
- toolgroup_id: builtin::code_interpreter - toolgroup_id: builtin::code_interpreter
provider_id: code-interpreter provider_id: code-interpreter
server:
port: 8321

View file

@ -114,3 +114,5 @@ tool_groups:
provider_id: rag-runtime provider_id: rag-runtime
- toolgroup_id: builtin::code_interpreter - toolgroup_id: builtin::code_interpreter
provider_id: code-interpreter provider_id: code-interpreter
server:
port: 8321

View file

@ -126,3 +126,5 @@ tool_groups:
provider_id: rag-runtime provider_id: rag-runtime
- toolgroup_id: builtin::code_interpreter - toolgroup_id: builtin::code_interpreter
provider_id: code-interpreter provider_id: code-interpreter
server:
port: 8321

View file

@ -115,3 +115,5 @@ tool_groups:
provider_id: rag-runtime provider_id: rag-runtime
- toolgroup_id: builtin::code_interpreter - toolgroup_id: builtin::code_interpreter
provider_id: code-interpreter provider_id: code-interpreter
server:
port: 8321

View file

@ -117,3 +117,5 @@ tool_groups:
provider_id: rag-runtime provider_id: rag-runtime
- toolgroup_id: builtin::code_interpreter - toolgroup_id: builtin::code_interpreter
provider_id: code-interpreter provider_id: code-interpreter
server:
port: 8321

View file

@ -147,3 +147,5 @@ tool_groups:
provider_id: rag-runtime provider_id: rag-runtime
- toolgroup_id: builtin::code_interpreter - toolgroup_id: builtin::code_interpreter
provider_id: code-interpreter provider_id: code-interpreter
server:
port: 8321

View file

@ -16,7 +16,7 @@ The `llamastack/distribution-{{ name }}` distribution consists of the following
You should use this distribution if you have a regular desktop machine without very powerful GPUs. Of course, if you have powerful GPUs, you can still continue using this distribution since Ollama supports GPU acceleration. You should use this distribution if you have a regular desktop machine without very powerful GPUs. Of course, if you have powerful GPUs, you can still continue using this distribution since Ollama supports GPU acceleration.
{%- if run_config_env_vars %} {% if run_config_env_vars %}
### Environment Variables ### Environment Variables
The following environment variables can be configured: The following environment variables can be configured:

View file

@ -121,3 +121,5 @@ tool_groups:
provider_id: rag-runtime provider_id: rag-runtime
- toolgroup_id: builtin::code_interpreter - toolgroup_id: builtin::code_interpreter
provider_id: code-interpreter provider_id: code-interpreter
server:
port: 8321

View file

@ -110,3 +110,5 @@ tool_groups:
provider_id: rag-runtime provider_id: rag-runtime
- toolgroup_id: builtin::code_interpreter - toolgroup_id: builtin::code_interpreter
provider_id: code-interpreter provider_id: code-interpreter
server:
port: 8321

View file

@ -126,3 +126,5 @@ tool_groups:
provider_id: rag-runtime provider_id: rag-runtime
- toolgroup_id: builtin::code_interpreter - toolgroup_id: builtin::code_interpreter
provider_id: code-interpreter provider_id: code-interpreter
server:
port: 8321

View file

@ -115,3 +115,5 @@ tool_groups:
provider_id: rag-runtime provider_id: rag-runtime
- toolgroup_id: builtin::code_interpreter - toolgroup_id: builtin::code_interpreter
provider_id: code-interpreter provider_id: code-interpreter
server:
port: 8321

View file

@ -126,3 +126,5 @@ tool_groups:
provider_id: rag-runtime provider_id: rag-runtime
- toolgroup_id: builtin::code_interpreter - toolgroup_id: builtin::code_interpreter
provider_id: code-interpreter provider_id: code-interpreter
server:
port: 8321

View file

@ -131,8 +131,15 @@ class DistributionTemplate(BaseModel):
providers_str = ", ".join(f"`{p}`" for p in providers) providers_str = ", ".join(f"`{p}`" for p in providers)
providers_table += f"| {api} | {providers_str} |\n" providers_table += f"| {api} | {providers_str} |\n"
template = "<!-- This file was auto-generated by distro_codegen.py, please edit source -->\n" template = self.template_path.read_text()
template += self.template_path.read_text() comment = "<!-- This file was auto-generated by distro_codegen.py, please edit source -->\n"
orphantext = "---\norphan: true\n---\n"
if template.startswith(orphantext):
template = template.replace(orphantext, orphantext + comment)
else:
template = comment + template
# Render template with rich-generated table # Render template with rich-generated table
env = jinja2.Environment( env = jinja2.Environment(
trim_blocks=True, trim_blocks=True,

View file

@ -114,3 +114,5 @@ tool_groups:
provider_id: rag-runtime provider_id: rag-runtime
- toolgroup_id: builtin::code_interpreter - toolgroup_id: builtin::code_interpreter
provider_id: code-interpreter provider_id: code-interpreter
server:
port: 8321

View file

@ -113,3 +113,5 @@ tool_groups:
provider_id: rag-runtime provider_id: rag-runtime
- toolgroup_id: builtin::code_interpreter - toolgroup_id: builtin::code_interpreter
provider_id: code-interpreter provider_id: code-interpreter
server:
port: 8321

View file

@ -167,3 +167,5 @@ tool_groups:
provider_id: rag-runtime provider_id: rag-runtime
- toolgroup_id: builtin::code_interpreter - toolgroup_id: builtin::code_interpreter
provider_id: code-interpreter provider_id: code-interpreter
server:
port: 8321

View file

@ -156,3 +156,5 @@ tool_groups:
provider_id: rag-runtime provider_id: rag-runtime
- toolgroup_id: builtin::code_interpreter - toolgroup_id: builtin::code_interpreter
provider_id: code-interpreter provider_id: code-interpreter
server:
port: 8321

View file

@ -117,3 +117,5 @@ tool_groups:
provider_id: rag-runtime provider_id: rag-runtime
- toolgroup_id: builtin::code_interpreter - toolgroup_id: builtin::code_interpreter
provider_id: code-interpreter provider_id: code-interpreter
server:
port: 8321

View file

@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
[project] [project]
name = "llama_stack" name = "llama_stack"
version = "0.1.1" version = "0.1.2"
authors = [{ name = "Meta Llama", email = "llama-oss@meta.com" }] authors = [{ name = "Meta Llama", email = "llama-oss@meta.com" }]
description = "Llama Stack" description = "Llama Stack"
readme = "README.md" readme = "README.md"
@ -25,8 +25,8 @@ dependencies = [
"fire", "fire",
"httpx", "httpx",
"huggingface-hub", "huggingface-hub",
"llama-models>=0.1.1", "llama-models>=0.1.2",
"llama-stack-client>=0.1.1", "llama-stack-client>=0.1.2",
"prompt-toolkit", "prompt-toolkit",
"python-dotenv", "python-dotenv",
"pydantic>=2", "pydantic>=2",

View file

@ -4,6 +4,7 @@ annotated-types==0.7.0
anyio==4.8.0 anyio==4.8.0
blobfile==3.0.0 blobfile==3.0.0
certifi==2025.1.31 certifi==2025.1.31
chardet==5.2.0
charset-normalizer==3.4.1 charset-normalizer==3.4.1
click==8.1.8 click==8.1.8
colorama==0.4.6 ; sys_platform == 'win32' colorama==0.4.6 ; sys_platform == 'win32'
@ -18,8 +19,8 @@ httpx==0.28.1
huggingface-hub==0.28.1 huggingface-hub==0.28.1
idna==3.10 idna==3.10
jinja2==3.1.5 jinja2==3.1.5
llama-models==0.1.1 llama-models==0.1.2
llama-stack-client==0.1.1 llama-stack-client==0.1.2
lxml==5.3.0 lxml==5.3.0
markdown-it-py==3.0.0 markdown-it-py==3.0.0
markupsafe==3.0.2 markupsafe==3.0.2
@ -34,6 +35,7 @@ pycryptodomex==3.21.0
pydantic==2.10.6 pydantic==2.10.6
pydantic-core==2.27.2 pydantic-core==2.27.2
pygments==2.19.1 pygments==2.19.1
pypdf==5.2.0
python-dateutil==2.9.0.post0 python-dateutil==2.9.0.post0
python-dotenv==1.0.1 python-dotenv==1.0.1
pytz==2025.1 pytz==2025.1

View file

@ -4,18 +4,18 @@ You can run llama stack integration tests on either a Llama Stack Library or a L
To test on a Llama Stack library with certain configuration, run To test on a Llama Stack library with certain configuration, run
```bash ```bash
LLAMA_STACK_CONFIG=./llama_stack/templates/cerebras/run.yaml LLAMA_STACK_CONFIG=./llama_stack/templates/cerebras/run.yaml
pytest -s -v tests/client-sdk/inference/test_inference.py pytest -s -v tests/client-sdk/inference/
``` ```
or just the template name or just the template name
```bash ```bash
LLAMA_STACK_CONFIG=together LLAMA_STACK_CONFIG=together
pytest -s -v tests/client-sdk/inference/test_inference.py pytest -s -v tests/client-sdk/inference/
``` ```
To test on a Llama Stack endpoint, run To test on a Llama Stack endpoint, run
```bash ```bash
LLAMA_STACK_BASE_URL=http//localhost:8089 LLAMA_STACK_BASE_URL=http//localhost:8089
pytest -s -v tests/client-sdk/inference/test_inference.py pytest -s -v tests/client-sdk/inference
``` ```
## Report Generation ## Report Generation

View file

@ -263,12 +263,14 @@ def test_custom_tool(llama_stack_client, agent_config):
assert "CustomTool" in logs_str assert "CustomTool" in logs_str
def test_override_system_message_behavior(llama_stack_client, agent_config): # TODO: fix this flaky test
def xtest_override_system_message_behavior(llama_stack_client, agent_config):
client_tool = TestClientTool() client_tool = TestClientTool()
agent_config = { agent_config = {
**agent_config, **agent_config,
"instructions": "You are a pirate", "instructions": "You are a pirate",
"client_tools": [client_tool.get_tool_definition()], "client_tools": [client_tool.get_tool_definition()],
"model": "meta-llama/Llama-3.2-3B-Instruct",
} }
agent = Agent(llama_stack_client, agent_config, client_tools=(client_tool,)) agent = Agent(llama_stack_client, agent_config, client_tools=(client_tool,))

View file

@ -4,9 +4,6 @@
# This source code is licensed under the terms described in the LICENSE file in # This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree. # the root directory of this source tree.
import base64
import pathlib
import pytest import pytest
from pydantic import BaseModel from pydantic import BaseModel
@ -14,6 +11,7 @@ PROVIDER_TOOL_PROMPT_FORMAT = {
"remote::ollama": "json", "remote::ollama": "json",
"remote::together": "json", "remote::together": "json",
"remote::fireworks": "json", "remote::fireworks": "json",
"remote::vllm": "json",
} }
PROVIDER_LOGPROBS_TOP_K = set( PROVIDER_LOGPROBS_TOP_K = set(
@ -56,23 +54,6 @@ def get_weather_tool_definition():
} }
@pytest.fixture
def image_path():
return pathlib.Path(__file__).parent / "dog.png"
@pytest.fixture
def base64_image_data(image_path):
# Convert the image to base64
return base64.b64encode(image_path.read_bytes()).decode("utf-8")
@pytest.fixture
def base64_image_url(base64_image_data, image_path):
# suffix includes the ., so we remove it
return f"data:image/{image_path.suffix[1:]};base64,{base64_image_data}"
def test_text_completion_non_streaming(llama_stack_client, text_model_id): def test_text_completion_non_streaming(llama_stack_client, text_model_id):
response = llama_stack_client.inference.completion( response = llama_stack_client.inference.completion(
content="Complete the sentence using one word: Roses are red, violets are ", content="Complete the sentence using one word: Roses are red, violets are ",
@ -176,8 +157,8 @@ def test_text_completion_structured_output(llama_stack_client, text_model_id, in
@pytest.mark.parametrize( @pytest.mark.parametrize(
"question,expected", "question,expected",
[ [
("What are the names of planets in our solar system?", "Earth"), ("Which planet do humans live on?", "Earth"),
("What are the names of the planets that have rings around them?", "Saturn"), ("Which planet has rings around it with a name starting with letter S?", "Saturn"),
], ],
) )
def test_text_chat_completion_non_streaming(llama_stack_client, text_model_id, question, expected): def test_text_chat_completion_non_streaming(llama_stack_client, text_model_id, question, expected):
@ -299,101 +280,3 @@ def test_text_chat_completion_structured_output(llama_stack_client, text_model_i
assert answer.last_name == "Jordan" assert answer.last_name == "Jordan"
assert answer.year_of_birth == 1963 assert answer.year_of_birth == 1963
assert answer.num_seasons_in_nba == 15 assert answer.num_seasons_in_nba == 15
def test_image_chat_completion_non_streaming(llama_stack_client, vision_model_id):
message = {
"role": "user",
"content": [
{
"type": "image",
"image": {
"url": {
# TODO: Replace with Github based URI to resources/sample1.jpg
"uri": "https://www.healthypawspetinsurance.com/Images/V3/DogAndPuppyInsurance/Dog_CTA_Desktop_HeroImage.jpg"
},
},
},
{
"type": "text",
"text": "Describe what is in this image.",
},
],
}
response = llama_stack_client.inference.chat_completion(
model_id=vision_model_id,
messages=[message],
stream=False,
)
message_content = response.completion_message.content.lower().strip()
assert len(message_content) > 0
assert any(expected in message_content for expected in {"dog", "puppy", "pup"})
def test_image_chat_completion_streaming(llama_stack_client, vision_model_id):
message = {
"role": "user",
"content": [
{
"type": "image",
"image": {
"url": {
# TODO: Replace with Github based URI to resources/sample1.jpg
"uri": "https://www.healthypawspetinsurance.com/Images/V3/DogAndPuppyInsurance/Dog_CTA_Desktop_HeroImage.jpg"
},
},
},
{
"type": "text",
"text": "Describe what is in this image.",
},
],
}
response = llama_stack_client.inference.chat_completion(
model_id=vision_model_id,
messages=[message],
stream=True,
)
streamed_content = ""
for chunk in response:
streamed_content += chunk.event.delta.text.lower()
assert len(streamed_content) > 0
assert any(expected in streamed_content for expected in {"dog", "puppy", "pup"})
@pytest.mark.parametrize("type_", ["url", "data"])
def test_image_chat_completion_base64(llama_stack_client, vision_model_id, base64_image_data, base64_image_url, type_):
image_spec = {
"url": {
"type": "image",
"image": {
"url": {
"uri": base64_image_url,
},
},
},
"data": {
"type": "image",
"image": {
"data": base64_image_data,
},
},
}[type_]
message = {
"role": "user",
"content": [
image_spec,
{
"type": "text",
"text": "Describe what is in this image.",
},
],
}
response = llama_stack_client.inference.chat_completion(
model_id=vision_model_id,
messages=[message],
stream=False,
)
message_content = response.completion_message.content.lower().strip()
assert len(message_content) > 0

View file

@ -0,0 +1,133 @@
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.
import base64
import pathlib
import pytest
@pytest.fixture(scope="session")
def inference_provider_type(llama_stack_client):
providers = llama_stack_client.providers.list()
inference_providers = [p for p in providers if p.api == "inference"]
assert len(inference_providers) > 0, "No inference providers found"
return inference_providers[0].provider_type
@pytest.fixture
def image_path():
return pathlib.Path(__file__).parent / "dog.png"
@pytest.fixture
def base64_image_data(image_path):
# Convert the image to base64
return base64.b64encode(image_path.read_bytes()).decode("utf-8")
@pytest.fixture
def base64_image_url(base64_image_data, image_path):
# suffix includes the ., so we remove it
return f"data:image/{image_path.suffix[1:]};base64,{base64_image_data}"
def test_image_chat_completion_non_streaming(llama_stack_client, vision_model_id):
message = {
"role": "user",
"content": [
{
"type": "image",
"image": {
"url": {
# TODO: Replace with Github based URI to resources/sample1.jpg
"uri": "https://www.healthypawspetinsurance.com/Images/V3/DogAndPuppyInsurance/Dog_CTA_Desktop_HeroImage.jpg"
},
},
},
{
"type": "text",
"text": "Describe what is in this image.",
},
],
}
response = llama_stack_client.inference.chat_completion(
model_id=vision_model_id,
messages=[message],
stream=False,
)
message_content = response.completion_message.content.lower().strip()
assert len(message_content) > 0
assert any(expected in message_content for expected in {"dog", "puppy", "pup"})
def test_image_chat_completion_streaming(llama_stack_client, vision_model_id):
message = {
"role": "user",
"content": [
{
"type": "image",
"image": {
"url": {
# TODO: Replace with Github based URI to resources/sample1.jpg
"uri": "https://www.healthypawspetinsurance.com/Images/V3/DogAndPuppyInsurance/Dog_CTA_Desktop_HeroImage.jpg"
},
},
},
{
"type": "text",
"text": "Describe what is in this image.",
},
],
}
response = llama_stack_client.inference.chat_completion(
model_id=vision_model_id,
messages=[message],
stream=True,
)
streamed_content = ""
for chunk in response:
streamed_content += chunk.event.delta.text.lower()
assert len(streamed_content) > 0
assert any(expected in streamed_content for expected in {"dog", "puppy", "pup"})
@pytest.mark.parametrize("type_", ["url", "data"])
def test_image_chat_completion_base64(llama_stack_client, vision_model_id, base64_image_data, base64_image_url, type_):
image_spec = {
"url": {
"type": "image",
"image": {
"url": {
"uri": base64_image_url,
},
},
},
"data": {
"type": "image",
"image": {
"data": base64_image_data,
},
},
}[type_]
message = {
"role": "user",
"content": [
image_spec,
{
"type": "text",
"text": "Describe what is in this image.",
},
],
}
response = llama_stack_client.inference.chat_completion(
model_id=vision_model_id,
messages=[message],
stream=False,
)
message_content = response.completion_message.content.lower().strip()
assert len(message_content) > 0

18
uv.lock generated
View file

@ -687,7 +687,7 @@ wheels = [
[[package]] [[package]]
name = "llama-models" name = "llama-models"
version = "0.1.1" version = "0.1.2"
source = { registry = "https://pypi.org/simple" } source = { registry = "https://pypi.org/simple" }
dependencies = [ dependencies = [
{ name = "jinja2" }, { name = "jinja2" },
@ -696,14 +696,14 @@ dependencies = [
{ name = "pyyaml" }, { name = "pyyaml" },
{ name = "tiktoken" }, { name = "tiktoken" },
] ]
sdist = { url = "https://files.pythonhosted.org/packages/df/80/4a4595cf5e55f71c0e15b85ff2f4c04b0742bf664ede062a09c9d383bf7b/llama_models-0.1.1.tar.gz", hash = "sha256:7cb5a9fe38485b47aff4c93e183d6d390a676a7619f3355502576b652f17733a", size = 1608412 } sdist = { url = "https://files.pythonhosted.org/packages/b5/f2/ed8310d4677cd38ab45ffba45aea2a4e9882b640045ad9c3198ac69e5a85/llama_models-0.1.2.tar.gz", hash = "sha256:1266eaec7a8db336e4ed034d2b494189ccb7fd6d6b7aefe874eee749a4340b9b", size = 1608069 }
wheels = [ wheels = [
{ url = "https://files.pythonhosted.org/packages/d9/93/d49dd0f0cd37df1a7a7fb25444d010f626cdf42b21eea11d839b0f6a808a/llama_models-0.1.1-py3-none-any.whl", hash = "sha256:7e4f15dc4f6f011852ea2c42f9770b75140f5eca670b32cc67fc0a4605c55f89", size = 1650981 }, { url = "https://files.pythonhosted.org/packages/55/a7/34b9e88ef4109759c8881f43b8006139e3d13d54c440b8c571b253655f54/llama_models-0.1.2-py3-none-any.whl", hash = "sha256:8aa5287d1c6325698991ff677e71148cac347e07493bb5b3ab891e614b89e1f8", size = 1651273 },
] ]
[[package]] [[package]]
name = "llama-stack" name = "llama-stack"
version = "0.1.1" version = "0.1.2"
source = { editable = "." } source = { editable = "." }
dependencies = [ dependencies = [
{ name = "blobfile" }, { name = "blobfile" },
@ -751,8 +751,8 @@ requires-dist = [
{ name = "fire" }, { name = "fire" },
{ name = "httpx" }, { name = "httpx" },
{ name = "huggingface-hub" }, { name = "huggingface-hub" },
{ name = "llama-models", specifier = ">=0.1.1" }, { name = "llama-models", specifier = ">=0.1.2" },
{ name = "llama-stack-client", specifier = ">=0.1.1" }, { name = "llama-stack-client", specifier = ">=0.1.2" },
{ name = "myst-parser", marker = "extra == 'docs'" }, { name = "myst-parser", marker = "extra == 'docs'" },
{ name = "nbval", marker = "extra == 'dev'" }, { name = "nbval", marker = "extra == 'dev'" },
{ name = "pre-commit", marker = "extra == 'dev'" }, { name = "pre-commit", marker = "extra == 'dev'" },
@ -780,7 +780,7 @@ requires-dist = [
[[package]] [[package]]
name = "llama-stack-client" name = "llama-stack-client"
version = "0.1.1" version = "0.1.2"
source = { registry = "https://pypi.org/simple" } source = { registry = "https://pypi.org/simple" }
dependencies = [ dependencies = [
{ name = "anyio" }, { name = "anyio" },
@ -797,9 +797,9 @@ dependencies = [
{ name = "tqdm" }, { name = "tqdm" },
{ name = "typing-extensions" }, { name = "typing-extensions" },
] ]
sdist = { url = "https://files.pythonhosted.org/packages/07/42/7004958ac1a6da9a8060decf0d9120fdeb3b2775de090a0a473f2ee4a27d/llama_stack_client-0.1.1.tar.gz", hash = "sha256:3e549a848ade959d342fa52ec49b1913b7bb615a77b5b8dcaefe6ff94409049e", size = 179729 } sdist = { url = "https://files.pythonhosted.org/packages/9e/75/8b41a3026c871a8650cd8d2cfda9f891a9163458813574f36518bb40afe4/llama_stack_client-0.1.2.tar.gz", hash = "sha256:94277ddae52be557d771dcdc15d85af9012b5aa87439dd69ec1dc0ff486b0c8e", size = 188023 }
wheels = [ wheels = [
{ url = "https://files.pythonhosted.org/packages/80/66/5255c09dc001ff437fd6fe6fad27142035b60073df243f7df0494095f605/llama_stack_client-0.1.1-py3-none-any.whl", hash = "sha256:e07d58fdcc1eaa370dd00b94c2dd1a8169c0ac60c37f6f2772cbc2c5b63f2e62", size = 348665 }, { url = "https://files.pythonhosted.org/packages/c4/32/3a3a97eecff1f1e3a1dc90e9b00681abea11ec4f43a7ca549981261e18b6/llama_stack_client-0.1.2-py3-none-any.whl", hash = "sha256:85ff0fb57a62d7d0470cfaa2b07a595c9fb3483297944d5e5a066db850d38ccd", size = 359415 },
] ]
[[package]] [[package]]