Merge branch 'main' into fiddlecube-guard

This commit is contained in:
Kaushik Srinivasan 2025-02-10 18:14:45 -08:00 committed by GitHub
commit 42d6e7e4a1
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
69 changed files with 721 additions and 367 deletions

View file

@ -23,3 +23,7 @@ jobs:
.pre-commit-config.yaml
- uses: pre-commit/action@v3.0.1
- name: Verify if there are any diff files after pre-commit
run: |
git diff --exit-code || (echo "There are uncommitted changes, run pre-commit locally and commit again" && exit 1)

View file

@ -54,7 +54,7 @@ jobs:
echo "REPORT_FILE=${REPORT_OUTPUT}" >> "$GITHUB_ENV"
export INFERENCE_MODEL=meta-llama/Llama-3.1-8B-Instruct
LLAMA_STACK_CONFIG=./llama_stack/templates/${{ matrix.provider }}/run.yaml pytest --md-report --md-report-verbose=1 ./tests/client-sdk/inference/test_inference.py --md-report-output "$REPORT_OUTPUT"
LLAMA_STACK_CONFIG=./llama_stack/templates/${{ matrix.provider }}/run.yaml pytest --md-report --md-report-verbose=1 ./tests/client-sdk/inference/ --md-report-output "$REPORT_OUTPUT"
- name: Output reports to the job summary
if: always()

View file

@ -48,6 +48,7 @@ repos:
hooks:
- id: uv-export
args: ["--frozen", "--no-hashes", "--no-emit-project"]
- id: uv-sync
# - repo: https://github.com/pre-commit/mirrors-mypy
# rev: v1.14.0

View file

@ -1,44 +0,0 @@
# Changelog
## 0.2.0
### Added
### Changed
### Removed
## 0.0.53
### Added
- Resource-oriented design for models, shields, memory banks, datasets and eval tasks
- Persistence for registered objects with distribution
- Ability to persist memory banks created for FAISS
- PostgreSQL KVStore implementation
- Environment variable placeholder support in run.yaml files
- Comprehensive Zero-to-Hero notebooks and quickstart guides
- Support for quantized models in Ollama
- Vision models support for Together, Fireworks, Meta-Reference, and Ollama, and vLLM
- Bedrock distribution with safety shields support
- Evals API with task registration and scoring functions
- MMLU and SimpleQA benchmark scoring functions
- Huggingface dataset provider integration for benchmarks
- Support for custom dataset registration from local paths
- Benchmark evaluation CLI tools with visualization tables
- RAG evaluation scoring functions and metrics
- Local persistence for datasets and eval tasks
### Changed
- Split safety into distinct providers (llama-guard, prompt-guard, code-scanner)
- Changed provider naming convention (`impls``inline`, `adapters``remote`)
- Updated API signatures for dataset and eval task registration
- Restructured folder organization for providers
- Enhanced Docker build configuration
- Added version prefixing for REST API routes
- Enhanced evaluation task registration workflow
- Improved benchmark evaluation output formatting
- Restructured evals folder organization for better modularity
### Removed
- `llama stack configure` command

View file

@ -34,22 +34,22 @@ By reducing friction and complexity, Llama Stack empowers developers to focus on
### API Providers
Here is a list of the various API providers and available distributions to developers started easily,
| **API Provider Builder** | **Environments** | **Agents** | **Inference** | **Memory** | **Safety** | **Telemetry** |
|:------------------------------------------------------------------------------------------:|:----------------------:|:------------------:|:------------------:|:------------------:|:------------------:|:------------------:|
| Meta Reference | Single Node | :heavy_check_mark: | :heavy_check_mark: | :heavy_check_mark: | :heavy_check_mark: | :heavy_check_mark: |
| SambaNova | Hosted | | :heavy_check_mark: | | | |
| Cerebras | Hosted | | :heavy_check_mark: | | | |
| Fireworks | Hosted | :heavy_check_mark: | :heavy_check_mark: | :heavy_check_mark: | | |
| AWS Bedrock | Hosted | | :heavy_check_mark: | | :heavy_check_mark: | |
| Together | Hosted | :heavy_check_mark: | :heavy_check_mark: | | :heavy_check_mark: | |
| Groq | Hosted | | :heavy_check_mark: | | | |
| Ollama | Single Node | | :heavy_check_mark: | | | |
| TGI | Hosted and Single Node | | :heavy_check_mark: | | | |
| NVIDIA NIM | Hosted and Single Node | | :heavy_check_mark: | | | |
| Chroma | Single Node | | | :heavy_check_mark: | | |
| PG Vector | Single Node | | | :heavy_check_mark: | | |
| PyTorch ExecuTorch | On-device iOS | :heavy_check_mark: | :heavy_check_mark: | | | |
| vLLM | Hosted and Single Node | | :heavy_check_mark: | | | |
| **API Provider Builder** | **Environments** | **Agents** | **Inference** | **Memory** | **Safety** | **Telemetry** |
|:------------------------:|:----------------------:|:----------:|:-------------:|:----------:|:----------:|:-------------:|
| Meta Reference | Single Node | ✅ | ✅ | ✅ | ✅ | ✅ |
| SambaNova | Hosted | | ✅ | | | |
| Cerebras | Hosted | | ✅ | | | |
| Fireworks | Hosted | ✅ | ✅ | ✅ | | |
| AWS Bedrock | Hosted | | ✅ | | ✅ | |
| Together | Hosted | ✅ | ✅ | | ✅ | |
| Groq | Hosted | | ✅ | | | |
| Ollama | Single Node | | ✅ | | | |
| TGI | Hosted and Single Node | | ✅ | | | |
| NVIDIA NIM | Hosted and Single Node | | ✅ | | | |
| Chroma | Single Node | | | ✅ | | |
| PG Vector | Single Node | | | ✅ | | |
| PyTorch ExecuTorch | On-device iOS | ✅ | ✅ | | | |
| vLLM | Hosted and Single Node | | ✅ | | | |
### Distributions

View file

@ -69,6 +69,40 @@
"fiddlecube": [
"httpx"
],
"dell": [
"aiohttp",
"aiosqlite",
"autoevals",
"blobfile",
"chardet",
"chromadb-client",
"datasets",
"faiss-cpu",
"fastapi",
"fire",
"httpx",
"huggingface_hub",
"matplotlib",
"nltk",
"numpy",
"openai",
"opentelemetry-exporter-otlp-proto-http",
"opentelemetry-sdk",
"pandas",
"pillow",
"psycopg2-binary",
"pypdf",
"redis",
"requests",
"scikit-learn",
"scipy",
"sentencepiece",
"tqdm",
"transformers",
"uvicorn",
"sentence-transformers --no-deps",
"torch torchvision --index-url https://download.pytorch.org/whl/cpu"
],
"fireworks": [
"aiosqlite",
"autoevals",
@ -255,6 +289,38 @@
"sentence-transformers --no-deps",
"torch torchvision --index-url https://download.pytorch.org/whl/cpu"
],
"nvidia": [
"aiosqlite",
"autoevals",
"blobfile",
"chardet",
"datasets",
"faiss-cpu",
"fastapi",
"fire",
"httpx",
"matplotlib",
"mcp",
"nltk",
"numpy",
"openai",
"opentelemetry-exporter-otlp-proto-http",
"opentelemetry-sdk",
"pandas",
"pillow",
"psycopg2-binary",
"pypdf",
"redis",
"requests",
"scikit-learn",
"scipy",
"sentencepiece",
"tqdm",
"transformers",
"uvicorn",
"sentence-transformers --no-deps",
"torch torchvision --index-url https://download.pytorch.org/whl/cpu"
],
"ollama": [
"aiohttp",
"aiosqlite",
@ -322,6 +388,36 @@
"sentence-transformers --no-deps",
"torch torchvision --index-url https://download.pytorch.org/whl/cpu"
],
"sambanova": [
"aiosqlite",
"blobfile",
"chardet",
"chromadb-client",
"faiss-cpu",
"fastapi",
"fire",
"httpx",
"matplotlib",
"nltk",
"numpy",
"openai",
"opentelemetry-exporter-otlp-proto-http",
"opentelemetry-sdk",
"pandas",
"pillow",
"psycopg2-binary",
"pypdf",
"redis",
"requests",
"scikit-learn",
"scipy",
"sentencepiece",
"tqdm",
"transformers",
"uvicorn",
"sentence-transformers --no-deps",
"torch torchvision --index-url https://download.pytorch.org/whl/cpu"
],
"tgi": [
"aiohttp",
"aiosqlite",
@ -424,101 +520,5 @@
"vllm",
"sentence-transformers --no-deps",
"torch torchvision --index-url https://download.pytorch.org/whl/cpu"
],
"nvidia": [
"aiosqlite",
"autoevals",
"blobfile",
"chardet",
"datasets",
"faiss-cpu",
"fastapi",
"fire",
"httpx",
"matplotlib",
"mcp",
"nltk",
"numpy",
"openai",
"opentelemetry-exporter-otlp-proto-http",
"opentelemetry-sdk",
"pandas",
"pillow",
"psycopg2-binary",
"pypdf",
"redis",
"requests",
"scikit-learn",
"scipy",
"sentencepiece",
"tqdm",
"transformers",
"uvicorn",
"sentence-transformers --no-deps",
"torch torchvision --index-url https://download.pytorch.org/whl/cpu"
],
"sambanova": [
"aiosqlite",
"blobfile",
"chardet",
"chromadb-client",
"faiss-cpu",
"fastapi",
"fire",
"httpx",
"matplotlib",
"nltk",
"numpy",
"openai",
"opentelemetry-exporter-otlp-proto-http",
"opentelemetry-sdk",
"pandas",
"pillow",
"psycopg2-binary",
"pypdf",
"redis",
"requests",
"scikit-learn",
"scipy",
"sentencepiece",
"tqdm",
"transformers",
"uvicorn",
"sentence-transformers --no-deps",
"torch torchvision --index-url https://download.pytorch.org/whl/cpu"
],
"dell": [
"aiohttp",
"aiosqlite",
"autoevals",
"blobfile",
"chardet",
"chromadb-client",
"datasets",
"faiss-cpu",
"fastapi",
"fire",
"httpx",
"huggingface_hub",
"matplotlib",
"nltk",
"numpy",
"openai",
"opentelemetry-exporter-otlp-proto-http",
"opentelemetry-sdk",
"pandas",
"pillow",
"psycopg2-binary",
"pypdf",
"redis",
"requests",
"scikit-learn",
"scipy",
"sentencepiece",
"tqdm",
"transformers",
"uvicorn",
"sentence-transformers --no-deps",
"torch torchvision --index-url https://download.pytorch.org/whl/cpu"
]
}

9
docs/conftest.py Normal file
View file

@ -0,0 +1,9 @@
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.
def pytest_collection_modifyitems(items):
for item in items:
item.name = item.name.replace(' ', '_')

View file

@ -86,7 +86,6 @@
"# NBVAL_SKIP\n",
"\n",
"!apt-get install -y bubblewrap\n",
"# install a branch of llama stack\n",
"import os\n",
"os.environ[\"UV_SYSTEM_PYTHON\"] = \"1\"\n",
"!pip install uv\n",
@ -3397,6 +3396,231 @@
"response = client.scoring.score(input_rows=rows, scoring_functions=scoring_params)\n",
"pprint(response)\n"
]
},
{
"cell_type": "markdown",
"id": "ad077440",
"metadata": {},
"source": [
"## 4. Image Understanding with Llama 3.2\n",
"\n",
"Below is a complete example of using Together's Llama Stack 0.1 server at https://llama-stack.together.ai to ask Llama 3.2 questions about an image."
]
},
{
"cell_type": "markdown",
"id": "82e381ec",
"metadata": {},
"source": [
"### 4.1 Setup and helpers\n",
"\n",
"Below we install the Llama Stack client 0.1, download the example image, define two image helpers, and set Llama Stack Together server URL and Llama 3.2 model name.\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "865fc5a8",
"metadata": {},
"outputs": [],
"source": [
"!pip install llama-stack-client==0.1.0"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "44e05e16",
"metadata": {},
"outputs": [],
"source": [
"!wget https://raw.githubusercontent.com/meta-llama/llama-models/refs/heads/main/Llama_Repo.jpeg"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "469750f7",
"metadata": {},
"outputs": [],
"source": [
"from PIL import Image\n",
"import matplotlib.pyplot as plt\n",
"\n",
"def display_image(path):\n",
" img = Image.open(path)\n",
" plt.imshow(img)\n",
" plt.axis('off')\n",
" plt.show()\n",
"\n",
"display_image(\"Llama_Repo.jpeg\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "a2c1e1c2",
"metadata": {},
"outputs": [],
"source": [
"import base64\n",
"\n",
"def encode_image(image_path):\n",
" with open(image_path, \"rb\") as image_file:\n",
" base64_string = base64.b64encode(image_file.read()).decode(\"utf-8\")\n",
" base64_url = f\"data:image/png;base64,{base64_string}\"\n",
" return base64_url"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "c565f99e",
"metadata": {},
"outputs": [],
"source": [
"from llama_stack_client import LlamaStackClient\n",
"\n",
"LLAMA_STACK_API_TOGETHER_URL=\"https://llama-stack.together.ai\"\n",
"LLAMA32_11B_INSTRUCT = \"meta-llama/Llama-3.2-11B-Vision-Instruct\""
]
},
{
"cell_type": "markdown",
"id": "7737cd41",
"metadata": {},
"source": [
"### 4.2 Using Llama Stack Chat API\n",
"\n",
"The code below uses the Llama Stack 0.1's chat API to interact with Llama 3.2:"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "d7914894",
"metadata": {},
"outputs": [],
"source": [
"from llama_stack_client.lib.inference.event_logger import EventLogger\n",
"\n",
"async def run_main(image_path: str, prompt):\n",
" client = LlamaStackClient(\n",
" base_url=LLAMA_STACK_API_TOGETHER_URL,\n",
" )\n",
"\n",
" message = {\n",
" \"role\": \"user\",\n",
" \"content\": [\n",
" {\n",
" \"type\": \"image\",\n",
" \"image\": {\n",
" \"url\": {\n",
" \"uri\": encode_image(image_path)\n",
" }\n",
" }\n",
" },\n",
" {\n",
" \"type\": \"text\",\n",
" \"text\": prompt,\n",
" }\n",
" ]\n",
" }\n",
"\n",
" response = client.inference.chat_completion(\n",
" messages=[message],\n",
" model_id=LLAMA32_11B_INSTRUCT,\n",
" stream=False,\n",
" )\n",
"\n",
" print(response.completion_message.content.lower().strip())"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "4ee09b97",
"metadata": {},
"outputs": [],
"source": [
"await run_main(\"Llama_Repo.jpeg\",\n",
" \"How many different colors are those llamas?\\\n",
" What are those colors?\")"
]
},
{
"cell_type": "markdown",
"id": "e741d7b9",
"metadata": {},
"source": [
"### 4.3 Using Llama Stack Agent API\n",
"\n",
"The code below uses the Llama Stack 0.1's Agent API to interact with Llama 3.2:"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "f9a83275",
"metadata": {},
"outputs": [],
"source": [
"from llama_stack_client.lib.agents.agent import Agent\n",
"from llama_stack_client.lib.agents.event_logger import EventLogger\n",
"from llama_stack_client.types.agent_create_params import AgentConfig\n",
"\n",
"async def run_main(image_path, prompt):\n",
" base64_image = encode_image(image_path)\n",
"\n",
" client = LlamaStackClient(\n",
" base_url=LLAMA_STACK_API_TOGETHER_URL,\n",
" )\n",
"\n",
" agent_config = AgentConfig(\n",
" model=LLAMA32_11B_INSTRUCT,\n",
" instructions=\"You are a helpful assistant\",\n",
" enable_session_persistence=False,\n",
" )\n",
"\n",
" agent = Agent(client, agent_config)\n",
" session_id = agent.create_session(\"test-session\")\n",
"\n",
" response = agent.create_turn(\n",
" messages=[{\n",
" \"role\": \"user\",\n",
" \"content\": [\n",
" {\n",
" \"type\": \"image\",\n",
" \"image\": {\n",
" \"url\": {\n",
" \"uri\": encode_image(image_path)\n",
" }\n",
" }\n",
" },\n",
" {\n",
" \"type\": \"text\",\n",
" \"text\": prompt,\n",
" }\n",
" ]\n",
" }],\n",
" session_id=session_id,\n",
" )\n",
"\n",
" for log in EventLogger().log(response):\n",
" log.print()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "15d0098b",
"metadata": {},
"outputs": [],
"source": [
"await run_main(\"Llama_Repo.jpeg\",\n",
" \"How many different colors are those llamas?\\\n",
" What are those colors?\")"
]
}
],
"metadata": {

View file

@ -4,7 +4,7 @@ Llama Stack provides all the building blocks needed to create sophisticated AI a
The best way to get started is to look at this notebook which walks through the various APIs (from basic inference, to RAG agents) and how to use them.
**Notebook**: [Building AI Applications](https://github.com/meta-llama/llama-stack/blob/main/docs/notebooks/Llama_Stack_Benchmark_Evals.ipynb)
**Notebook**: [Building AI Applications](https://github.com/meta-llama/llama-stack/blob/main/docs/getting_started.ipynb)
Here are some key topics that will help you build effective agents:

View file

@ -36,13 +36,12 @@ chunks = [
"content": "Your document text here",
"mime_type": "text/plain",
},
...,
]
client.vector_io.insert(vector_db_id, chunks)
client.vector_io.insert(vector_db_id=vector_db_id, chunks=chunks)
# You can then query for these chunks
chunks_response = client.vector_io.query(
vector_db_id, query="What do you know about..."
vector_db_id=vector_db_id, query="What do you know about..."
)
```
@ -72,8 +71,8 @@ client.tool_runtime.rag_tool.insert(
# Query documents
results = client.tool_runtime.rag_tool.query(
vector_db_id=vector_db_id,
query="What do you know about...",
vector_db_ids=[vector_db_id],
content="What do you know about...",
)
```
@ -82,10 +81,14 @@ results = client.tool_runtime.rag_tool.query(
One of the most powerful patterns is combining agents with RAG capabilities. Here's a complete example:
```python
from llama_stack_client.types.agent_create_params import AgentConfig
from llama_stack_client.lib.agents.agent import Agent
# Configure agent with memory
agent_config = AgentConfig(
model="Llama3.2-3B-Instruct",
model="meta-llama/Llama-3.2-3B-Instruct",
instructions="You are a helpful assistant",
enable_session_persistence=False,
toolgroups=[
{
"name": "builtin::rag",
@ -105,10 +108,10 @@ response = agent.create_turn(
{"role": "user", "content": "I am providing some documents for reference."}
],
documents=[
dict(
content="https://raw.githubusercontent.com/example/doc.rst",
mime_type="text/plain",
)
{
"content": "https://raw.githubusercontent.com/example/doc.rst",
"mime_type": "text/plain",
}
],
session_id=session_id,
)

View file

@ -1,7 +1,7 @@
<!-- This file was auto-generated by distro_codegen.py, please edit source -->
---
orphan: true
---
<!-- This file was auto-generated by distro_codegen.py, please edit source -->
# Dell Distribution of Llama Stack

View file

@ -1,7 +1,7 @@
<!-- This file was auto-generated by distro_codegen.py, please edit source -->
---
orphan: true
---
<!-- This file was auto-generated by distro_codegen.py, please edit source -->
# Fireworks Distribution
```{toctree}

View file

@ -1,7 +1,7 @@
<!-- This file was auto-generated by distro_codegen.py, please edit source -->
---
orphan: true
---
<!-- This file was auto-generated by distro_codegen.py, please edit source -->
# Meta Reference Distribution
```{toctree}

View file

@ -1,7 +1,7 @@
<!-- This file was auto-generated by distro_codegen.py, please edit source -->
---
orphan: true
---
<!-- This file was auto-generated by distro_codegen.py, please edit source -->
# Meta Reference Quantized Distribution
```{toctree}

View file

@ -1,7 +1,7 @@
<!-- This file was auto-generated by distro_codegen.py, please edit source -->
---
orphan: true
---
<!-- This file was auto-generated by distro_codegen.py, please edit source -->
# Ollama Distribution
```{toctree}

View file

@ -1,7 +1,7 @@
<!-- This file was auto-generated by distro_codegen.py, please edit source -->
---
orphan: true
---
<!-- This file was auto-generated by distro_codegen.py, please edit source -->
# Remote vLLM Distribution
```{toctree}
:maxdepth: 2

View file

@ -1,7 +1,7 @@
<!-- This file was auto-generated by distro_codegen.py, please edit source -->
---
orphan: true
---
<!-- This file was auto-generated by distro_codegen.py, please edit source -->
# SambaNova Distribution
```{toctree}

View file

@ -1,7 +1,7 @@
<!-- This file was auto-generated by distro_codegen.py, please edit source -->
---
orphan: true
---
<!-- This file was auto-generated by distro_codegen.py, please edit source -->
# TGI Distribution

View file

@ -1,7 +1,7 @@
<!-- This file was auto-generated by distro_codegen.py, please edit source -->
---
orphan: true
---
<!-- This file was auto-generated by distro_codegen.py, please edit source -->
# Together Distribution
```{toctree}

View file

@ -2,7 +2,7 @@
```{admonition} News
:class: tip
Llama Stack 0.1.1 is now available! See the [release notes](https://github.com/meta-llama/llama-stack/releases/tag/v0.1.1) for more details.
Llama Stack 0.1.2 is now available! See the [release notes](https://github.com/meta-llama/llama-stack/releases/tag/v0.1.2) for more details.
```
# Llama Stack

View file

@ -22,9 +22,9 @@ class StackListProviders(Subcommand):
self.parser.set_defaults(func=self._run_providers_list_cmd)
def _add_arguments(self):
from llama_stack.distribution.datatypes import Api
from llama_stack.distribution.distribution import providable_apis
api_values = [a.value for a in Api]
api_values = [api.value for api in providable_apis()]
self.parser.add_argument(
"api",
type=str,

View file

@ -55,6 +55,16 @@ class StackRun(Subcommand):
default=[],
metavar="KEY=VALUE",
)
self.parser.add_argument(
"--tls-keyfile",
type=str,
help="Path to TLS key file for HTTPS",
)
self.parser.add_argument(
"--tls-certfile",
type=str,
help="Path to TLS certificate file for HTTPS",
)
def _run_stack_run_cmd(self, args: argparse.Namespace) -> None:
import importlib.resources
@ -178,4 +188,7 @@ class StackRun(Subcommand):
return
run_args.extend(["--env", f"{key}={value}"])
if args.tls_keyfile and args.tls_certfile:
run_args.extend(["--tls-keyfile", args.tls_keyfile, "--tls-certfile", args.tls_certfile])
run_with_pty(run_args)

View file

@ -117,6 +117,23 @@ class Provider(BaseModel):
config: Dict[str, Any]
class ServerConfig(BaseModel):
port: int = Field(
default=8321,
description="Port to listen on",
ge=1024,
le=65535,
)
tls_certfile: Optional[str] = Field(
default=None,
description="Path to TLS certificate file for HTTPS",
)
tls_keyfile: Optional[str] = Field(
default=None,
description="Path to TLS key file for HTTPS",
)
class StackRunConfig(BaseModel):
version: str = LLAMA_STACK_RUN_CONFIG_VERSION
@ -159,6 +176,11 @@ a default SQLite store will be used.""",
eval_tasks: List[EvalTaskInput] = Field(default_factory=list)
tool_groups: List[ToolGroupInput] = Field(default_factory=list)
server: ServerConfig = Field(
default_factory=ServerConfig,
description="Configuration for the HTTP(S) server",
)
class BuildConfig(BaseModel):
version: str = LLAMA_STACK_BUILD_CONFIG_VERSION

View file

@ -17,17 +17,6 @@ from typing import Any, get_args, get_origin, Optional, TypeVar
import httpx
import yaml
from llama_stack_client import (
APIResponse,
AsyncAPIResponse,
AsyncLlamaStackClient,
AsyncStream,
LlamaStackClient,
NOT_GIVEN,
)
from pydantic import BaseModel, TypeAdapter
from rich.console import Console
from termcolor import cprint
from llama_stack.distribution.build import print_pip_install_help
from llama_stack.distribution.configure import parse_and_maybe_upgrade_config
@ -46,6 +35,17 @@ from llama_stack.providers.utils.telemetry.tracing import (
setup_logger,
start_trace,
)
from llama_stack_client import (
APIResponse,
AsyncAPIResponse,
AsyncLlamaStackClient,
AsyncStream,
LlamaStackClient,
NOT_GIVEN,
)
from pydantic import BaseModel, TypeAdapter
from rich.console import Console
from termcolor import cprint
T = TypeVar("T")
@ -198,6 +198,7 @@ class AsyncLlamaStackAsLibraryClient(AsyncLlamaStackClient):
async def initialize(self) -> bool:
try:
self.endpoint_impls = None
self.impls = await construct_stack(self.config, self.custom_provider_registry)
except ModuleNotFoundError as _e:
cprint(_e.msg, "red")
@ -213,7 +214,7 @@ class AsyncLlamaStackAsLibraryClient(AsyncLlamaStackClient):
f"Please run:\n\n{prefix}llama stack build --template {self.config_path_or_template_name} --image-type venv\n\n",
"yellow",
)
return False
raise _e
if Api.telemetry in self.impls:
setup_logger(self.impls[Api.telemetry])

View file

@ -282,8 +282,19 @@ def main():
action="append",
help="Environment variables in KEY=value format. Can be specified multiple times.",
)
parser.add_argument(
"--tls-keyfile",
help="Path to TLS key file for HTTPS",
required="--tls-certfile" in sys.argv,
)
parser.add_argument(
"--tls-certfile",
help="Path to TLS certificate file for HTTPS",
required="--tls-keyfile" in sys.argv,
)
args = parser.parse_args()
if args.env:
for env_pair in args.env:
try:
@ -381,11 +392,36 @@ def main():
import uvicorn
# FYI this does not do hot-reloads
# Configure SSL if certificates are provided
port = args.port or config.server.port
ssl_config = None
if args.tls_keyfile:
keyfile = args.tls_keyfile
certfile = args.tls_certfile
else:
keyfile = config.server.tls_keyfile
certfile = config.server.tls_certfile
if keyfile and certfile:
ssl_config = {
"ssl_keyfile": keyfile,
"ssl_certfile": certfile,
}
print(f"HTTPS enabled with certificates:\n Key: {keyfile}\n Cert: {certfile}")
listen_host = ["::", "0.0.0.0"] if not args.disable_ipv6 else "0.0.0.0"
print(f"Listening on {listen_host}:{args.port}")
uvicorn.run(app, host=listen_host, port=args.port)
print(f"Listening on {listen_host}:{port}")
uvicorn_config = {
"app": app,
"host": listen_host,
"port": port,
}
if ssl_config:
uvicorn_config.update(ssl_config)
uvicorn.run(**uvicorn_config)
def extract_path_params(route: str) -> List[str]:

View file

@ -34,6 +34,7 @@ shift
# Process environment variables from --env arguments
env_vars=""
other_args=""
while [[ $# -gt 0 ]]; do
case "$1" in
--env)
@ -48,6 +49,7 @@ while [[ $# -gt 0 ]]; do
fi
;;
*)
other_args="$other_args $1"
shift
;;
esac
@ -61,4 +63,5 @@ $CONDA_PREFIX/bin/python \
-m llama_stack.distribution.server.server \
--yaml-config "$yaml_config" \
--port "$port" \
$env_vars
$env_vars \
$other_args

View file

@ -40,8 +40,12 @@ shift
port="$1"
shift
# Initialize other_args
other_args=""
# Process environment variables from --env arguments
env_vars=""
while [[ $# -gt 0 ]]; do
case "$1" in
--env)
@ -55,6 +59,7 @@ while [[ $# -gt 0 ]]; do
fi
;;
*)
other_args="$other_args $1"
shift
;;
esac
@ -93,5 +98,8 @@ $CONTAINER_BINARY run $CONTAINER_OPTS -it \
-v "$yaml_config:/app/config.yaml" \
$mounts \
--env LLAMA_STACK_PORT=$port \
--entrypoint='["python", "-m", "llama_stack.distribution.server.server", "--yaml-config", "/app/config.yaml"]' \
$container_image:$version_tag
--entrypoint python \
$container_image:$version_tag \
-m llama_stack.distribution.server.server \
--yaml-config /app/config.yaml \
$other_args

View file

@ -67,7 +67,6 @@ def generate_bwrap_command(bind_dirs: List[str]) -> str:
@dataclass
class CodeExecutionContext:
matplotlib_dump_dir: str
use_proxy: bool = False
@dataclass

View file

@ -26,6 +26,7 @@ from llama_stack.apis.inference import (
Message,
ResponseFormat,
ToolChoice,
ToolConfig,
)
from llama_stack.distribution.request_headers import NeedsRequestProviderData
from llama_stack.providers.remote.inference.groq.config import GroqConfig

View file

@ -352,24 +352,20 @@ class OllamaInferenceAdapter(Inference, ModelsProtocolPrivate):
return EmbeddingsResponse(embeddings=embeddings)
async def register_model(self, model: Model) -> Model:
# ollama does not have embedding models running. Check if the model is in list of available models.
if model.model_type == ModelType.embedding:
response = await self.client.list()
async def check_model_availability(model_id: str):
response = await self.client.ps()
available_models = [m["model"] for m in response["models"]]
if model.provider_resource_id not in available_models:
if model_id not in available_models:
raise ValueError(
f"Model '{model.provider_resource_id}' is not available in Ollama. "
f"Available models: {', '.join(available_models)}"
f"Model '{model_id}' is not available in Ollama. Available models: {', '.join(available_models)}"
)
if model.model_type == ModelType.embedding:
await check_model_availability(model.provider_resource_id)
return model
model = await self.register_helper.register_model(model)
models = await self.client.ps()
available_models = [m["model"] for m in models["models"]]
if model.provider_resource_id not in available_models:
raise ValueError(
f"Model '{model.provider_resource_id}' is not available in Ollama. "
f"Available models: {', '.join(available_models)}"
)
await check_model_availability(model.provider_resource_id)
return model

View file

@ -12,8 +12,8 @@ from .config import QdrantConfig
async def get_adapter_impl(config: QdrantConfig, deps: Dict[Api, ProviderSpec]):
from .qdrant import QdrantVectorMemoryAdapter
from .qdrant import QdrantVectorDBAdapter
impl = QdrantVectorMemoryAdapter(config, deps[Api.inference])
impl = QdrantVectorDBAdapter(config, deps[Api.inference])
await impl.initialize()
return impl

View file

@ -55,7 +55,7 @@ class QdrantIndex(EmbeddingIndex):
points = []
for i, (chunk, embedding) in enumerate(zip(chunks, embeddings)):
chunk_id = f"{chunk.document_id}:chunk-{i}"
chunk_id = f"{chunk.metadata['document_id']}:chunk-{i}"
points.append(
PointStruct(
id=convert_id(chunk_id),
@ -93,6 +93,9 @@ class QdrantIndex(EmbeddingIndex):
return QueryChunksResponse(chunks=chunks, scores=scores)
async def delete(self):
await self.client.delete_collection(collection_name=self.collection_name)
class QdrantVectorDBAdapter(VectorIO, VectorDBsProtocolPrivate):
def __init__(self, config: QdrantConfig, inference_api: Api.inference) -> None:

View file

@ -95,7 +95,7 @@ class TestDatasetIO:
assert len(response) == 1
assert response[0].identifier == "test_dataset"
with pytest.raises(Exception) as exc_info:
with pytest.raises(ValueError):
# unregister a dataset that does not exist
await datasets_impl.unregister_dataset("test_dataset2")
@ -104,7 +104,7 @@ class TestDatasetIO:
assert isinstance(response, list)
assert len(response) == 0
with pytest.raises(Exception) as exc_info:
with pytest.raises(ValueError):
await datasets_impl.unregister_dataset("test_dataset")
@pytest.mark.asyncio

View file

@ -32,7 +32,7 @@ class TestModelRegistration:
)
# Try to register a model that's too large for local inference
with pytest.raises(ValueError) as exc_info:
with pytest.raises(ValueError):
await models_impl.register_model(
model_id="Llama3.1-70B-Instruct",
)
@ -42,7 +42,7 @@ class TestModelRegistration:
_, models_impl = inference_stack
# Try to register a non-existent model
with pytest.raises(Exception) as exc_info:
with pytest.raises(ValueError):
await models_impl.register_model(
model_id="Llama3-NonExistent-Model",
)
@ -59,7 +59,7 @@ class TestModelRegistration:
},
)
with pytest.raises(ValueError) as exc_info:
with pytest.raises(ValueError):
await models_impl.register_model(
model_id="custom-model-2",
metadata={
@ -88,7 +88,7 @@ class TestModelRegistration:
async def test_register_with_invalid_llama_model(self, inference_stack):
_, models_impl = inference_stack
with pytest.raises(ValueError) as exc_info:
with pytest.raises(ValueError):
await models_impl.register_model(
model_id="custom-model-2",
metadata={"llama_model": "invalid-llama-model"},

View file

@ -4,12 +4,12 @@
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.
import base64
from pathlib import Path
import pytest
from llama_stack.apis.common.content_types import ImageContentItem, TextContentItem, URL
from llama_stack.apis.common.content_types import URL, ImageContentItem, TextContentItem
from llama_stack.apis.inference import (
ChatCompletionResponse,
ChatCompletionResponseEventType,
@ -23,7 +23,7 @@ from .utils import group_chunks
THIS_DIR = Path(__file__).parent
with open(THIS_DIR / "pasta.jpeg", "rb") as f:
PASTA_IMAGE = f.read()
PASTA_IMAGE = base64.b64encode(f.read()).decode("utf-8")
class TestVisionModelInference:

View file

@ -29,7 +29,7 @@ def find_template_dirs(templates_dir: Path) -> Iterator[Path]:
if not templates_dir.exists():
raise FileNotFoundError(f"Templates directory not found: {templates_dir}")
return (d for d in templates_dir.iterdir() if d.is_dir() and d.name != "__pycache__")
return sorted(d for d in templates_dir.iterdir() if d.is_dir() and d.name != "__pycache__")
def process_template(template_dir: Path, progress) -> None:

View file

@ -115,3 +115,5 @@ tool_groups:
provider_id: rag-runtime
- toolgroup_id: builtin::code_interpreter
provider_id: code-interpreter
server:
port: 8321

View file

@ -117,3 +117,5 @@ tool_groups:
provider_id: rag-runtime
- toolgroup_id: builtin::code_interpreter
provider_id: code-interpreter
server:
port: 8321

View file

@ -116,3 +116,5 @@ tool_groups:
provider_id: rag-runtime
- toolgroup_id: builtin::code_interpreter
provider_id: code-interpreter
server:
port: 8321

View file

@ -107,3 +107,5 @@ tool_groups:
provider_id: rag-runtime
- toolgroup_id: builtin::code_interpreter
provider_id: code-interpreter
server:
port: 8321

View file

@ -172,3 +172,5 @@ tool_groups:
provider_id: rag-runtime
- toolgroup_id: builtin::code_interpreter
provider_id: code-interpreter
server:
port: 8321

View file

@ -161,3 +161,5 @@ tool_groups:
provider_id: rag-runtime
- toolgroup_id: builtin::code_interpreter
provider_id: code-interpreter
server:
port: 8321

View file

@ -124,3 +124,5 @@ tool_groups:
provider_id: rag-runtime
- toolgroup_id: builtin::code_interpreter
provider_id: code-interpreter
server:
port: 8321

View file

@ -114,3 +114,5 @@ tool_groups:
provider_id: rag-runtime
- toolgroup_id: builtin::code_interpreter
provider_id: code-interpreter
server:
port: 8321

View file

@ -124,3 +124,5 @@ tool_groups:
provider_id: rag-runtime
- toolgroup_id: builtin::code_interpreter
provider_id: code-interpreter
server:
port: 8321

View file

@ -114,3 +114,5 @@ tool_groups:
provider_id: rag-runtime
- toolgroup_id: builtin::code_interpreter
provider_id: code-interpreter
server:
port: 8321

View file

@ -126,3 +126,5 @@ tool_groups:
provider_id: rag-runtime
- toolgroup_id: builtin::code_interpreter
provider_id: code-interpreter
server:
port: 8321

View file

@ -115,3 +115,5 @@ tool_groups:
provider_id: rag-runtime
- toolgroup_id: builtin::code_interpreter
provider_id: code-interpreter
server:
port: 8321

View file

@ -117,3 +117,5 @@ tool_groups:
provider_id: rag-runtime
- toolgroup_id: builtin::code_interpreter
provider_id: code-interpreter
server:
port: 8321

View file

@ -147,3 +147,5 @@ tool_groups:
provider_id: rag-runtime
- toolgroup_id: builtin::code_interpreter
provider_id: code-interpreter
server:
port: 8321

View file

@ -16,7 +16,7 @@ The `llamastack/distribution-{{ name }}` distribution consists of the following
You should use this distribution if you have a regular desktop machine without very powerful GPUs. Of course, if you have powerful GPUs, you can still continue using this distribution since Ollama supports GPU acceleration.
{%- if run_config_env_vars %}
{% if run_config_env_vars %}
### Environment Variables
The following environment variables can be configured:

View file

@ -121,3 +121,5 @@ tool_groups:
provider_id: rag-runtime
- toolgroup_id: builtin::code_interpreter
provider_id: code-interpreter
server:
port: 8321

View file

@ -110,3 +110,5 @@ tool_groups:
provider_id: rag-runtime
- toolgroup_id: builtin::code_interpreter
provider_id: code-interpreter
server:
port: 8321

View file

@ -126,3 +126,5 @@ tool_groups:
provider_id: rag-runtime
- toolgroup_id: builtin::code_interpreter
provider_id: code-interpreter
server:
port: 8321

View file

@ -115,3 +115,5 @@ tool_groups:
provider_id: rag-runtime
- toolgroup_id: builtin::code_interpreter
provider_id: code-interpreter
server:
port: 8321

View file

@ -126,3 +126,5 @@ tool_groups:
provider_id: rag-runtime
- toolgroup_id: builtin::code_interpreter
provider_id: code-interpreter
server:
port: 8321

View file

@ -131,8 +131,15 @@ class DistributionTemplate(BaseModel):
providers_str = ", ".join(f"`{p}`" for p in providers)
providers_table += f"| {api} | {providers_str} |\n"
template = "<!-- This file was auto-generated by distro_codegen.py, please edit source -->\n"
template += self.template_path.read_text()
template = self.template_path.read_text()
comment = "<!-- This file was auto-generated by distro_codegen.py, please edit source -->\n"
orphantext = "---\norphan: true\n---\n"
if template.startswith(orphantext):
template = template.replace(orphantext, orphantext + comment)
else:
template = comment + template
# Render template with rich-generated table
env = jinja2.Environment(
trim_blocks=True,

View file

@ -114,3 +114,5 @@ tool_groups:
provider_id: rag-runtime
- toolgroup_id: builtin::code_interpreter
provider_id: code-interpreter
server:
port: 8321

View file

@ -113,3 +113,5 @@ tool_groups:
provider_id: rag-runtime
- toolgroup_id: builtin::code_interpreter
provider_id: code-interpreter
server:
port: 8321

View file

@ -167,3 +167,5 @@ tool_groups:
provider_id: rag-runtime
- toolgroup_id: builtin::code_interpreter
provider_id: code-interpreter
server:
port: 8321

View file

@ -156,3 +156,5 @@ tool_groups:
provider_id: rag-runtime
- toolgroup_id: builtin::code_interpreter
provider_id: code-interpreter
server:
port: 8321

View file

@ -117,3 +117,5 @@ tool_groups:
provider_id: rag-runtime
- toolgroup_id: builtin::code_interpreter
provider_id: code-interpreter
server:
port: 8321

View file

@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
[project]
name = "llama_stack"
version = "0.1.1"
version = "0.1.2"
authors = [{ name = "Meta Llama", email = "llama-oss@meta.com" }]
description = "Llama Stack"
readme = "README.md"
@ -25,8 +25,8 @@ dependencies = [
"fire",
"httpx",
"huggingface-hub",
"llama-models>=0.1.1",
"llama-stack-client>=0.1.1",
"llama-models>=0.1.2",
"llama-stack-client>=0.1.2",
"prompt-toolkit",
"python-dotenv",
"pydantic>=2",

View file

@ -4,6 +4,7 @@ annotated-types==0.7.0
anyio==4.8.0
blobfile==3.0.0
certifi==2025.1.31
chardet==5.2.0
charset-normalizer==3.4.1
click==8.1.8
colorama==0.4.6 ; sys_platform == 'win32'
@ -18,8 +19,8 @@ httpx==0.28.1
huggingface-hub==0.28.1
idna==3.10
jinja2==3.1.5
llama-models==0.1.1
llama-stack-client==0.1.1
llama-models==0.1.2
llama-stack-client==0.1.2
lxml==5.3.0
markdown-it-py==3.0.0
markupsafe==3.0.2
@ -34,6 +35,7 @@ pycryptodomex==3.21.0
pydantic==2.10.6
pydantic-core==2.27.2
pygments==2.19.1
pypdf==5.2.0
python-dateutil==2.9.0.post0
python-dotenv==1.0.1
pytz==2025.1

View file

@ -4,18 +4,18 @@ You can run llama stack integration tests on either a Llama Stack Library or a L
To test on a Llama Stack library with certain configuration, run
```bash
LLAMA_STACK_CONFIG=./llama_stack/templates/cerebras/run.yaml
pytest -s -v tests/client-sdk/inference/test_inference.py
pytest -s -v tests/client-sdk/inference/
```
or just the template name
```bash
LLAMA_STACK_CONFIG=together
pytest -s -v tests/client-sdk/inference/test_inference.py
pytest -s -v tests/client-sdk/inference/
```
To test on a Llama Stack endpoint, run
```bash
LLAMA_STACK_BASE_URL=http//localhost:8089
pytest -s -v tests/client-sdk/inference/test_inference.py
pytest -s -v tests/client-sdk/inference
```
## Report Generation

View file

@ -263,12 +263,14 @@ def test_custom_tool(llama_stack_client, agent_config):
assert "CustomTool" in logs_str
def test_override_system_message_behavior(llama_stack_client, agent_config):
# TODO: fix this flaky test
def xtest_override_system_message_behavior(llama_stack_client, agent_config):
client_tool = TestClientTool()
agent_config = {
**agent_config,
"instructions": "You are a pirate",
"client_tools": [client_tool.get_tool_definition()],
"model": "meta-llama/Llama-3.2-3B-Instruct",
}
agent = Agent(llama_stack_client, agent_config, client_tools=(client_tool,))

View file

@ -4,9 +4,6 @@
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.
import base64
import pathlib
import pytest
from pydantic import BaseModel
@ -14,6 +11,7 @@ PROVIDER_TOOL_PROMPT_FORMAT = {
"remote::ollama": "json",
"remote::together": "json",
"remote::fireworks": "json",
"remote::vllm": "json",
}
PROVIDER_LOGPROBS_TOP_K = set(
@ -56,23 +54,6 @@ def get_weather_tool_definition():
}
@pytest.fixture
def image_path():
return pathlib.Path(__file__).parent / "dog.png"
@pytest.fixture
def base64_image_data(image_path):
# Convert the image to base64
return base64.b64encode(image_path.read_bytes()).decode("utf-8")
@pytest.fixture
def base64_image_url(base64_image_data, image_path):
# suffix includes the ., so we remove it
return f"data:image/{image_path.suffix[1:]};base64,{base64_image_data}"
def test_text_completion_non_streaming(llama_stack_client, text_model_id):
response = llama_stack_client.inference.completion(
content="Complete the sentence using one word: Roses are red, violets are ",
@ -176,8 +157,8 @@ def test_text_completion_structured_output(llama_stack_client, text_model_id, in
@pytest.mark.parametrize(
"question,expected",
[
("What are the names of planets in our solar system?", "Earth"),
("What are the names of the planets that have rings around them?", "Saturn"),
("Which planet do humans live on?", "Earth"),
("Which planet has rings around it with a name starting with letter S?", "Saturn"),
],
)
def test_text_chat_completion_non_streaming(llama_stack_client, text_model_id, question, expected):
@ -299,101 +280,3 @@ def test_text_chat_completion_structured_output(llama_stack_client, text_model_i
assert answer.last_name == "Jordan"
assert answer.year_of_birth == 1963
assert answer.num_seasons_in_nba == 15
def test_image_chat_completion_non_streaming(llama_stack_client, vision_model_id):
message = {
"role": "user",
"content": [
{
"type": "image",
"image": {
"url": {
# TODO: Replace with Github based URI to resources/sample1.jpg
"uri": "https://www.healthypawspetinsurance.com/Images/V3/DogAndPuppyInsurance/Dog_CTA_Desktop_HeroImage.jpg"
},
},
},
{
"type": "text",
"text": "Describe what is in this image.",
},
],
}
response = llama_stack_client.inference.chat_completion(
model_id=vision_model_id,
messages=[message],
stream=False,
)
message_content = response.completion_message.content.lower().strip()
assert len(message_content) > 0
assert any(expected in message_content for expected in {"dog", "puppy", "pup"})
def test_image_chat_completion_streaming(llama_stack_client, vision_model_id):
message = {
"role": "user",
"content": [
{
"type": "image",
"image": {
"url": {
# TODO: Replace with Github based URI to resources/sample1.jpg
"uri": "https://www.healthypawspetinsurance.com/Images/V3/DogAndPuppyInsurance/Dog_CTA_Desktop_HeroImage.jpg"
},
},
},
{
"type": "text",
"text": "Describe what is in this image.",
},
],
}
response = llama_stack_client.inference.chat_completion(
model_id=vision_model_id,
messages=[message],
stream=True,
)
streamed_content = ""
for chunk in response:
streamed_content += chunk.event.delta.text.lower()
assert len(streamed_content) > 0
assert any(expected in streamed_content for expected in {"dog", "puppy", "pup"})
@pytest.mark.parametrize("type_", ["url", "data"])
def test_image_chat_completion_base64(llama_stack_client, vision_model_id, base64_image_data, base64_image_url, type_):
image_spec = {
"url": {
"type": "image",
"image": {
"url": {
"uri": base64_image_url,
},
},
},
"data": {
"type": "image",
"image": {
"data": base64_image_data,
},
},
}[type_]
message = {
"role": "user",
"content": [
image_spec,
{
"type": "text",
"text": "Describe what is in this image.",
},
],
}
response = llama_stack_client.inference.chat_completion(
model_id=vision_model_id,
messages=[message],
stream=False,
)
message_content = response.completion_message.content.lower().strip()
assert len(message_content) > 0

View file

@ -0,0 +1,133 @@
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.
import base64
import pathlib
import pytest
@pytest.fixture(scope="session")
def inference_provider_type(llama_stack_client):
providers = llama_stack_client.providers.list()
inference_providers = [p for p in providers if p.api == "inference"]
assert len(inference_providers) > 0, "No inference providers found"
return inference_providers[0].provider_type
@pytest.fixture
def image_path():
return pathlib.Path(__file__).parent / "dog.png"
@pytest.fixture
def base64_image_data(image_path):
# Convert the image to base64
return base64.b64encode(image_path.read_bytes()).decode("utf-8")
@pytest.fixture
def base64_image_url(base64_image_data, image_path):
# suffix includes the ., so we remove it
return f"data:image/{image_path.suffix[1:]};base64,{base64_image_data}"
def test_image_chat_completion_non_streaming(llama_stack_client, vision_model_id):
message = {
"role": "user",
"content": [
{
"type": "image",
"image": {
"url": {
# TODO: Replace with Github based URI to resources/sample1.jpg
"uri": "https://www.healthypawspetinsurance.com/Images/V3/DogAndPuppyInsurance/Dog_CTA_Desktop_HeroImage.jpg"
},
},
},
{
"type": "text",
"text": "Describe what is in this image.",
},
],
}
response = llama_stack_client.inference.chat_completion(
model_id=vision_model_id,
messages=[message],
stream=False,
)
message_content = response.completion_message.content.lower().strip()
assert len(message_content) > 0
assert any(expected in message_content for expected in {"dog", "puppy", "pup"})
def test_image_chat_completion_streaming(llama_stack_client, vision_model_id):
message = {
"role": "user",
"content": [
{
"type": "image",
"image": {
"url": {
# TODO: Replace with Github based URI to resources/sample1.jpg
"uri": "https://www.healthypawspetinsurance.com/Images/V3/DogAndPuppyInsurance/Dog_CTA_Desktop_HeroImage.jpg"
},
},
},
{
"type": "text",
"text": "Describe what is in this image.",
},
],
}
response = llama_stack_client.inference.chat_completion(
model_id=vision_model_id,
messages=[message],
stream=True,
)
streamed_content = ""
for chunk in response:
streamed_content += chunk.event.delta.text.lower()
assert len(streamed_content) > 0
assert any(expected in streamed_content for expected in {"dog", "puppy", "pup"})
@pytest.mark.parametrize("type_", ["url", "data"])
def test_image_chat_completion_base64(llama_stack_client, vision_model_id, base64_image_data, base64_image_url, type_):
image_spec = {
"url": {
"type": "image",
"image": {
"url": {
"uri": base64_image_url,
},
},
},
"data": {
"type": "image",
"image": {
"data": base64_image_data,
},
},
}[type_]
message = {
"role": "user",
"content": [
image_spec,
{
"type": "text",
"text": "Describe what is in this image.",
},
],
}
response = llama_stack_client.inference.chat_completion(
model_id=vision_model_id,
messages=[message],
stream=False,
)
message_content = response.completion_message.content.lower().strip()
assert len(message_content) > 0

18
uv.lock generated
View file

@ -687,7 +687,7 @@ wheels = [
[[package]]
name = "llama-models"
version = "0.1.1"
version = "0.1.2"
source = { registry = "https://pypi.org/simple" }
dependencies = [
{ name = "jinja2" },
@ -696,14 +696,14 @@ dependencies = [
{ name = "pyyaml" },
{ name = "tiktoken" },
]
sdist = { url = "https://files.pythonhosted.org/packages/df/80/4a4595cf5e55f71c0e15b85ff2f4c04b0742bf664ede062a09c9d383bf7b/llama_models-0.1.1.tar.gz", hash = "sha256:7cb5a9fe38485b47aff4c93e183d6d390a676a7619f3355502576b652f17733a", size = 1608412 }
sdist = { url = "https://files.pythonhosted.org/packages/b5/f2/ed8310d4677cd38ab45ffba45aea2a4e9882b640045ad9c3198ac69e5a85/llama_models-0.1.2.tar.gz", hash = "sha256:1266eaec7a8db336e4ed034d2b494189ccb7fd6d6b7aefe874eee749a4340b9b", size = 1608069 }
wheels = [
{ url = "https://files.pythonhosted.org/packages/d9/93/d49dd0f0cd37df1a7a7fb25444d010f626cdf42b21eea11d839b0f6a808a/llama_models-0.1.1-py3-none-any.whl", hash = "sha256:7e4f15dc4f6f011852ea2c42f9770b75140f5eca670b32cc67fc0a4605c55f89", size = 1650981 },
{ url = "https://files.pythonhosted.org/packages/55/a7/34b9e88ef4109759c8881f43b8006139e3d13d54c440b8c571b253655f54/llama_models-0.1.2-py3-none-any.whl", hash = "sha256:8aa5287d1c6325698991ff677e71148cac347e07493bb5b3ab891e614b89e1f8", size = 1651273 },
]
[[package]]
name = "llama-stack"
version = "0.1.1"
version = "0.1.2"
source = { editable = "." }
dependencies = [
{ name = "blobfile" },
@ -751,8 +751,8 @@ requires-dist = [
{ name = "fire" },
{ name = "httpx" },
{ name = "huggingface-hub" },
{ name = "llama-models", specifier = ">=0.1.1" },
{ name = "llama-stack-client", specifier = ">=0.1.1" },
{ name = "llama-models", specifier = ">=0.1.2" },
{ name = "llama-stack-client", specifier = ">=0.1.2" },
{ name = "myst-parser", marker = "extra == 'docs'" },
{ name = "nbval", marker = "extra == 'dev'" },
{ name = "pre-commit", marker = "extra == 'dev'" },
@ -780,7 +780,7 @@ requires-dist = [
[[package]]
name = "llama-stack-client"
version = "0.1.1"
version = "0.1.2"
source = { registry = "https://pypi.org/simple" }
dependencies = [
{ name = "anyio" },
@ -797,9 +797,9 @@ dependencies = [
{ name = "tqdm" },
{ name = "typing-extensions" },
]
sdist = { url = "https://files.pythonhosted.org/packages/07/42/7004958ac1a6da9a8060decf0d9120fdeb3b2775de090a0a473f2ee4a27d/llama_stack_client-0.1.1.tar.gz", hash = "sha256:3e549a848ade959d342fa52ec49b1913b7bb615a77b5b8dcaefe6ff94409049e", size = 179729 }
sdist = { url = "https://files.pythonhosted.org/packages/9e/75/8b41a3026c871a8650cd8d2cfda9f891a9163458813574f36518bb40afe4/llama_stack_client-0.1.2.tar.gz", hash = "sha256:94277ddae52be557d771dcdc15d85af9012b5aa87439dd69ec1dc0ff486b0c8e", size = 188023 }
wheels = [
{ url = "https://files.pythonhosted.org/packages/80/66/5255c09dc001ff437fd6fe6fad27142035b60073df243f7df0494095f605/llama_stack_client-0.1.1-py3-none-any.whl", hash = "sha256:e07d58fdcc1eaa370dd00b94c2dd1a8169c0ac60c37f6f2772cbc2c5b63f2e62", size = 348665 },
{ url = "https://files.pythonhosted.org/packages/c4/32/3a3a97eecff1f1e3a1dc90e9b00681abea11ec4f43a7ca549981261e18b6/llama_stack_client-0.1.2-py3-none-any.whl", hash = "sha256:85ff0fb57a62d7d0470cfaa2b07a595c9fb3483297944d5e5a066db850d38ccd", size = 359415 },
]
[[package]]