mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-08-07 02:58:21 +00:00
Merge branch 'main' into fiddlecube-guard
This commit is contained in:
commit
42d6e7e4a1
69 changed files with 721 additions and 367 deletions
4
.github/workflows/pre-commit.yml
vendored
4
.github/workflows/pre-commit.yml
vendored
|
@ -23,3 +23,7 @@ jobs:
|
||||||
.pre-commit-config.yaml
|
.pre-commit-config.yaml
|
||||||
|
|
||||||
- uses: pre-commit/action@v3.0.1
|
- uses: pre-commit/action@v3.0.1
|
||||||
|
|
||||||
|
- name: Verify if there are any diff files after pre-commit
|
||||||
|
run: |
|
||||||
|
git diff --exit-code || (echo "There are uncommitted changes, run pre-commit locally and commit again" && exit 1)
|
||||||
|
|
2
.github/workflows/tests.yml
vendored
2
.github/workflows/tests.yml
vendored
|
@ -54,7 +54,7 @@ jobs:
|
||||||
echo "REPORT_FILE=${REPORT_OUTPUT}" >> "$GITHUB_ENV"
|
echo "REPORT_FILE=${REPORT_OUTPUT}" >> "$GITHUB_ENV"
|
||||||
|
|
||||||
export INFERENCE_MODEL=meta-llama/Llama-3.1-8B-Instruct
|
export INFERENCE_MODEL=meta-llama/Llama-3.1-8B-Instruct
|
||||||
LLAMA_STACK_CONFIG=./llama_stack/templates/${{ matrix.provider }}/run.yaml pytest --md-report --md-report-verbose=1 ./tests/client-sdk/inference/test_inference.py --md-report-output "$REPORT_OUTPUT"
|
LLAMA_STACK_CONFIG=./llama_stack/templates/${{ matrix.provider }}/run.yaml pytest --md-report --md-report-verbose=1 ./tests/client-sdk/inference/ --md-report-output "$REPORT_OUTPUT"
|
||||||
|
|
||||||
- name: Output reports to the job summary
|
- name: Output reports to the job summary
|
||||||
if: always()
|
if: always()
|
||||||
|
|
|
@ -48,6 +48,7 @@ repos:
|
||||||
hooks:
|
hooks:
|
||||||
- id: uv-export
|
- id: uv-export
|
||||||
args: ["--frozen", "--no-hashes", "--no-emit-project"]
|
args: ["--frozen", "--no-hashes", "--no-emit-project"]
|
||||||
|
- id: uv-sync
|
||||||
|
|
||||||
# - repo: https://github.com/pre-commit/mirrors-mypy
|
# - repo: https://github.com/pre-commit/mirrors-mypy
|
||||||
# rev: v1.14.0
|
# rev: v1.14.0
|
||||||
|
|
44
CHANGELOG.md
44
CHANGELOG.md
|
@ -1,44 +0,0 @@
|
||||||
# Changelog
|
|
||||||
|
|
||||||
## 0.2.0
|
|
||||||
|
|
||||||
### Added
|
|
||||||
|
|
||||||
### Changed
|
|
||||||
|
|
||||||
### Removed
|
|
||||||
|
|
||||||
|
|
||||||
## 0.0.53
|
|
||||||
|
|
||||||
### Added
|
|
||||||
- Resource-oriented design for models, shields, memory banks, datasets and eval tasks
|
|
||||||
- Persistence for registered objects with distribution
|
|
||||||
- Ability to persist memory banks created for FAISS
|
|
||||||
- PostgreSQL KVStore implementation
|
|
||||||
- Environment variable placeholder support in run.yaml files
|
|
||||||
- Comprehensive Zero-to-Hero notebooks and quickstart guides
|
|
||||||
- Support for quantized models in Ollama
|
|
||||||
- Vision models support for Together, Fireworks, Meta-Reference, and Ollama, and vLLM
|
|
||||||
- Bedrock distribution with safety shields support
|
|
||||||
- Evals API with task registration and scoring functions
|
|
||||||
- MMLU and SimpleQA benchmark scoring functions
|
|
||||||
- Huggingface dataset provider integration for benchmarks
|
|
||||||
- Support for custom dataset registration from local paths
|
|
||||||
- Benchmark evaluation CLI tools with visualization tables
|
|
||||||
- RAG evaluation scoring functions and metrics
|
|
||||||
- Local persistence for datasets and eval tasks
|
|
||||||
|
|
||||||
### Changed
|
|
||||||
- Split safety into distinct providers (llama-guard, prompt-guard, code-scanner)
|
|
||||||
- Changed provider naming convention (`impls` → `inline`, `adapters` → `remote`)
|
|
||||||
- Updated API signatures for dataset and eval task registration
|
|
||||||
- Restructured folder organization for providers
|
|
||||||
- Enhanced Docker build configuration
|
|
||||||
- Added version prefixing for REST API routes
|
|
||||||
- Enhanced evaluation task registration workflow
|
|
||||||
- Improved benchmark evaluation output formatting
|
|
||||||
- Restructured evals folder organization for better modularity
|
|
||||||
|
|
||||||
### Removed
|
|
||||||
- `llama stack configure` command
|
|
32
README.md
32
README.md
|
@ -34,22 +34,22 @@ By reducing friction and complexity, Llama Stack empowers developers to focus on
|
||||||
### API Providers
|
### API Providers
|
||||||
Here is a list of the various API providers and available distributions to developers started easily,
|
Here is a list of the various API providers and available distributions to developers started easily,
|
||||||
|
|
||||||
| **API Provider Builder** | **Environments** | **Agents** | **Inference** | **Memory** | **Safety** | **Telemetry** |
|
| **API Provider Builder** | **Environments** | **Agents** | **Inference** | **Memory** | **Safety** | **Telemetry** |
|
||||||
|:------------------------------------------------------------------------------------------:|:----------------------:|:------------------:|:------------------:|:------------------:|:------------------:|:------------------:|
|
|:------------------------:|:----------------------:|:----------:|:-------------:|:----------:|:----------:|:-------------:|
|
||||||
| Meta Reference | Single Node | :heavy_check_mark: | :heavy_check_mark: | :heavy_check_mark: | :heavy_check_mark: | :heavy_check_mark: |
|
| Meta Reference | Single Node | ✅ | ✅ | ✅ | ✅ | ✅ |
|
||||||
| SambaNova | Hosted | | :heavy_check_mark: | | | |
|
| SambaNova | Hosted | | ✅ | | | |
|
||||||
| Cerebras | Hosted | | :heavy_check_mark: | | | |
|
| Cerebras | Hosted | | ✅ | | | |
|
||||||
| Fireworks | Hosted | :heavy_check_mark: | :heavy_check_mark: | :heavy_check_mark: | | |
|
| Fireworks | Hosted | ✅ | ✅ | ✅ | | |
|
||||||
| AWS Bedrock | Hosted | | :heavy_check_mark: | | :heavy_check_mark: | |
|
| AWS Bedrock | Hosted | | ✅ | | ✅ | |
|
||||||
| Together | Hosted | :heavy_check_mark: | :heavy_check_mark: | | :heavy_check_mark: | |
|
| Together | Hosted | ✅ | ✅ | | ✅ | |
|
||||||
| Groq | Hosted | | :heavy_check_mark: | | | |
|
| Groq | Hosted | | ✅ | | | |
|
||||||
| Ollama | Single Node | | :heavy_check_mark: | | | |
|
| Ollama | Single Node | | ✅ | | | |
|
||||||
| TGI | Hosted and Single Node | | :heavy_check_mark: | | | |
|
| TGI | Hosted and Single Node | | ✅ | | | |
|
||||||
| NVIDIA NIM | Hosted and Single Node | | :heavy_check_mark: | | | |
|
| NVIDIA NIM | Hosted and Single Node | | ✅ | | | |
|
||||||
| Chroma | Single Node | | | :heavy_check_mark: | | |
|
| Chroma | Single Node | | | ✅ | | |
|
||||||
| PG Vector | Single Node | | | :heavy_check_mark: | | |
|
| PG Vector | Single Node | | | ✅ | | |
|
||||||
| PyTorch ExecuTorch | On-device iOS | :heavy_check_mark: | :heavy_check_mark: | | | |
|
| PyTorch ExecuTorch | On-device iOS | ✅ | ✅ | | | |
|
||||||
| vLLM | Hosted and Single Node | | :heavy_check_mark: | | | |
|
| vLLM | Hosted and Single Node | | ✅ | | | |
|
||||||
|
|
||||||
### Distributions
|
### Distributions
|
||||||
|
|
||||||
|
|
|
@ -69,6 +69,40 @@
|
||||||
"fiddlecube": [
|
"fiddlecube": [
|
||||||
"httpx"
|
"httpx"
|
||||||
],
|
],
|
||||||
|
"dell": [
|
||||||
|
"aiohttp",
|
||||||
|
"aiosqlite",
|
||||||
|
"autoevals",
|
||||||
|
"blobfile",
|
||||||
|
"chardet",
|
||||||
|
"chromadb-client",
|
||||||
|
"datasets",
|
||||||
|
"faiss-cpu",
|
||||||
|
"fastapi",
|
||||||
|
"fire",
|
||||||
|
"httpx",
|
||||||
|
"huggingface_hub",
|
||||||
|
"matplotlib",
|
||||||
|
"nltk",
|
||||||
|
"numpy",
|
||||||
|
"openai",
|
||||||
|
"opentelemetry-exporter-otlp-proto-http",
|
||||||
|
"opentelemetry-sdk",
|
||||||
|
"pandas",
|
||||||
|
"pillow",
|
||||||
|
"psycopg2-binary",
|
||||||
|
"pypdf",
|
||||||
|
"redis",
|
||||||
|
"requests",
|
||||||
|
"scikit-learn",
|
||||||
|
"scipy",
|
||||||
|
"sentencepiece",
|
||||||
|
"tqdm",
|
||||||
|
"transformers",
|
||||||
|
"uvicorn",
|
||||||
|
"sentence-transformers --no-deps",
|
||||||
|
"torch torchvision --index-url https://download.pytorch.org/whl/cpu"
|
||||||
|
],
|
||||||
"fireworks": [
|
"fireworks": [
|
||||||
"aiosqlite",
|
"aiosqlite",
|
||||||
"autoevals",
|
"autoevals",
|
||||||
|
@ -255,6 +289,38 @@
|
||||||
"sentence-transformers --no-deps",
|
"sentence-transformers --no-deps",
|
||||||
"torch torchvision --index-url https://download.pytorch.org/whl/cpu"
|
"torch torchvision --index-url https://download.pytorch.org/whl/cpu"
|
||||||
],
|
],
|
||||||
|
"nvidia": [
|
||||||
|
"aiosqlite",
|
||||||
|
"autoevals",
|
||||||
|
"blobfile",
|
||||||
|
"chardet",
|
||||||
|
"datasets",
|
||||||
|
"faiss-cpu",
|
||||||
|
"fastapi",
|
||||||
|
"fire",
|
||||||
|
"httpx",
|
||||||
|
"matplotlib",
|
||||||
|
"mcp",
|
||||||
|
"nltk",
|
||||||
|
"numpy",
|
||||||
|
"openai",
|
||||||
|
"opentelemetry-exporter-otlp-proto-http",
|
||||||
|
"opentelemetry-sdk",
|
||||||
|
"pandas",
|
||||||
|
"pillow",
|
||||||
|
"psycopg2-binary",
|
||||||
|
"pypdf",
|
||||||
|
"redis",
|
||||||
|
"requests",
|
||||||
|
"scikit-learn",
|
||||||
|
"scipy",
|
||||||
|
"sentencepiece",
|
||||||
|
"tqdm",
|
||||||
|
"transformers",
|
||||||
|
"uvicorn",
|
||||||
|
"sentence-transformers --no-deps",
|
||||||
|
"torch torchvision --index-url https://download.pytorch.org/whl/cpu"
|
||||||
|
],
|
||||||
"ollama": [
|
"ollama": [
|
||||||
"aiohttp",
|
"aiohttp",
|
||||||
"aiosqlite",
|
"aiosqlite",
|
||||||
|
@ -322,6 +388,36 @@
|
||||||
"sentence-transformers --no-deps",
|
"sentence-transformers --no-deps",
|
||||||
"torch torchvision --index-url https://download.pytorch.org/whl/cpu"
|
"torch torchvision --index-url https://download.pytorch.org/whl/cpu"
|
||||||
],
|
],
|
||||||
|
"sambanova": [
|
||||||
|
"aiosqlite",
|
||||||
|
"blobfile",
|
||||||
|
"chardet",
|
||||||
|
"chromadb-client",
|
||||||
|
"faiss-cpu",
|
||||||
|
"fastapi",
|
||||||
|
"fire",
|
||||||
|
"httpx",
|
||||||
|
"matplotlib",
|
||||||
|
"nltk",
|
||||||
|
"numpy",
|
||||||
|
"openai",
|
||||||
|
"opentelemetry-exporter-otlp-proto-http",
|
||||||
|
"opentelemetry-sdk",
|
||||||
|
"pandas",
|
||||||
|
"pillow",
|
||||||
|
"psycopg2-binary",
|
||||||
|
"pypdf",
|
||||||
|
"redis",
|
||||||
|
"requests",
|
||||||
|
"scikit-learn",
|
||||||
|
"scipy",
|
||||||
|
"sentencepiece",
|
||||||
|
"tqdm",
|
||||||
|
"transformers",
|
||||||
|
"uvicorn",
|
||||||
|
"sentence-transformers --no-deps",
|
||||||
|
"torch torchvision --index-url https://download.pytorch.org/whl/cpu"
|
||||||
|
],
|
||||||
"tgi": [
|
"tgi": [
|
||||||
"aiohttp",
|
"aiohttp",
|
||||||
"aiosqlite",
|
"aiosqlite",
|
||||||
|
@ -424,101 +520,5 @@
|
||||||
"vllm",
|
"vllm",
|
||||||
"sentence-transformers --no-deps",
|
"sentence-transformers --no-deps",
|
||||||
"torch torchvision --index-url https://download.pytorch.org/whl/cpu"
|
"torch torchvision --index-url https://download.pytorch.org/whl/cpu"
|
||||||
],
|
|
||||||
"nvidia": [
|
|
||||||
"aiosqlite",
|
|
||||||
"autoevals",
|
|
||||||
"blobfile",
|
|
||||||
"chardet",
|
|
||||||
"datasets",
|
|
||||||
"faiss-cpu",
|
|
||||||
"fastapi",
|
|
||||||
"fire",
|
|
||||||
"httpx",
|
|
||||||
"matplotlib",
|
|
||||||
"mcp",
|
|
||||||
"nltk",
|
|
||||||
"numpy",
|
|
||||||
"openai",
|
|
||||||
"opentelemetry-exporter-otlp-proto-http",
|
|
||||||
"opentelemetry-sdk",
|
|
||||||
"pandas",
|
|
||||||
"pillow",
|
|
||||||
"psycopg2-binary",
|
|
||||||
"pypdf",
|
|
||||||
"redis",
|
|
||||||
"requests",
|
|
||||||
"scikit-learn",
|
|
||||||
"scipy",
|
|
||||||
"sentencepiece",
|
|
||||||
"tqdm",
|
|
||||||
"transformers",
|
|
||||||
"uvicorn",
|
|
||||||
"sentence-transformers --no-deps",
|
|
||||||
"torch torchvision --index-url https://download.pytorch.org/whl/cpu"
|
|
||||||
],
|
|
||||||
"sambanova": [
|
|
||||||
"aiosqlite",
|
|
||||||
"blobfile",
|
|
||||||
"chardet",
|
|
||||||
"chromadb-client",
|
|
||||||
"faiss-cpu",
|
|
||||||
"fastapi",
|
|
||||||
"fire",
|
|
||||||
"httpx",
|
|
||||||
"matplotlib",
|
|
||||||
"nltk",
|
|
||||||
"numpy",
|
|
||||||
"openai",
|
|
||||||
"opentelemetry-exporter-otlp-proto-http",
|
|
||||||
"opentelemetry-sdk",
|
|
||||||
"pandas",
|
|
||||||
"pillow",
|
|
||||||
"psycopg2-binary",
|
|
||||||
"pypdf",
|
|
||||||
"redis",
|
|
||||||
"requests",
|
|
||||||
"scikit-learn",
|
|
||||||
"scipy",
|
|
||||||
"sentencepiece",
|
|
||||||
"tqdm",
|
|
||||||
"transformers",
|
|
||||||
"uvicorn",
|
|
||||||
"sentence-transformers --no-deps",
|
|
||||||
"torch torchvision --index-url https://download.pytorch.org/whl/cpu"
|
|
||||||
],
|
|
||||||
"dell": [
|
|
||||||
"aiohttp",
|
|
||||||
"aiosqlite",
|
|
||||||
"autoevals",
|
|
||||||
"blobfile",
|
|
||||||
"chardet",
|
|
||||||
"chromadb-client",
|
|
||||||
"datasets",
|
|
||||||
"faiss-cpu",
|
|
||||||
"fastapi",
|
|
||||||
"fire",
|
|
||||||
"httpx",
|
|
||||||
"huggingface_hub",
|
|
||||||
"matplotlib",
|
|
||||||
"nltk",
|
|
||||||
"numpy",
|
|
||||||
"openai",
|
|
||||||
"opentelemetry-exporter-otlp-proto-http",
|
|
||||||
"opentelemetry-sdk",
|
|
||||||
"pandas",
|
|
||||||
"pillow",
|
|
||||||
"psycopg2-binary",
|
|
||||||
"pypdf",
|
|
||||||
"redis",
|
|
||||||
"requests",
|
|
||||||
"scikit-learn",
|
|
||||||
"scipy",
|
|
||||||
"sentencepiece",
|
|
||||||
"tqdm",
|
|
||||||
"transformers",
|
|
||||||
"uvicorn",
|
|
||||||
"sentence-transformers --no-deps",
|
|
||||||
"torch torchvision --index-url https://download.pytorch.org/whl/cpu"
|
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
|
|
9
docs/conftest.py
Normal file
9
docs/conftest.py
Normal file
|
@ -0,0 +1,9 @@
|
||||||
|
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||||
|
# All rights reserved.
|
||||||
|
#
|
||||||
|
# This source code is licensed under the terms described in the LICENSE file in
|
||||||
|
# the root directory of this source tree.
|
||||||
|
|
||||||
|
def pytest_collection_modifyitems(items):
|
||||||
|
for item in items:
|
||||||
|
item.name = item.name.replace(' ', '_')
|
|
@ -86,7 +86,6 @@
|
||||||
"# NBVAL_SKIP\n",
|
"# NBVAL_SKIP\n",
|
||||||
"\n",
|
"\n",
|
||||||
"!apt-get install -y bubblewrap\n",
|
"!apt-get install -y bubblewrap\n",
|
||||||
"# install a branch of llama stack\n",
|
|
||||||
"import os\n",
|
"import os\n",
|
||||||
"os.environ[\"UV_SYSTEM_PYTHON\"] = \"1\"\n",
|
"os.environ[\"UV_SYSTEM_PYTHON\"] = \"1\"\n",
|
||||||
"!pip install uv\n",
|
"!pip install uv\n",
|
||||||
|
@ -3397,6 +3396,231 @@
|
||||||
"response = client.scoring.score(input_rows=rows, scoring_functions=scoring_params)\n",
|
"response = client.scoring.score(input_rows=rows, scoring_functions=scoring_params)\n",
|
||||||
"pprint(response)\n"
|
"pprint(response)\n"
|
||||||
]
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"id": "ad077440",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"## 4. Image Understanding with Llama 3.2\n",
|
||||||
|
"\n",
|
||||||
|
"Below is a complete example of using Together's Llama Stack 0.1 server at https://llama-stack.together.ai to ask Llama 3.2 questions about an image."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"id": "82e381ec",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"### 4.1 Setup and helpers\n",
|
||||||
|
"\n",
|
||||||
|
"Below we install the Llama Stack client 0.1, download the example image, define two image helpers, and set Llama Stack Together server URL and Llama 3.2 model name.\n"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"id": "865fc5a8",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"!pip install llama-stack-client==0.1.0"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"id": "44e05e16",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"!wget https://raw.githubusercontent.com/meta-llama/llama-models/refs/heads/main/Llama_Repo.jpeg"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"id": "469750f7",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"from PIL import Image\n",
|
||||||
|
"import matplotlib.pyplot as plt\n",
|
||||||
|
"\n",
|
||||||
|
"def display_image(path):\n",
|
||||||
|
" img = Image.open(path)\n",
|
||||||
|
" plt.imshow(img)\n",
|
||||||
|
" plt.axis('off')\n",
|
||||||
|
" plt.show()\n",
|
||||||
|
"\n",
|
||||||
|
"display_image(\"Llama_Repo.jpeg\")"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"id": "a2c1e1c2",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"import base64\n",
|
||||||
|
"\n",
|
||||||
|
"def encode_image(image_path):\n",
|
||||||
|
" with open(image_path, \"rb\") as image_file:\n",
|
||||||
|
" base64_string = base64.b64encode(image_file.read()).decode(\"utf-8\")\n",
|
||||||
|
" base64_url = f\"data:image/png;base64,{base64_string}\"\n",
|
||||||
|
" return base64_url"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"id": "c565f99e",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"from llama_stack_client import LlamaStackClient\n",
|
||||||
|
"\n",
|
||||||
|
"LLAMA_STACK_API_TOGETHER_URL=\"https://llama-stack.together.ai\"\n",
|
||||||
|
"LLAMA32_11B_INSTRUCT = \"meta-llama/Llama-3.2-11B-Vision-Instruct\""
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"id": "7737cd41",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"### 4.2 Using Llama Stack Chat API\n",
|
||||||
|
"\n",
|
||||||
|
"The code below uses the Llama Stack 0.1's chat API to interact with Llama 3.2:"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"id": "d7914894",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"from llama_stack_client.lib.inference.event_logger import EventLogger\n",
|
||||||
|
"\n",
|
||||||
|
"async def run_main(image_path: str, prompt):\n",
|
||||||
|
" client = LlamaStackClient(\n",
|
||||||
|
" base_url=LLAMA_STACK_API_TOGETHER_URL,\n",
|
||||||
|
" )\n",
|
||||||
|
"\n",
|
||||||
|
" message = {\n",
|
||||||
|
" \"role\": \"user\",\n",
|
||||||
|
" \"content\": [\n",
|
||||||
|
" {\n",
|
||||||
|
" \"type\": \"image\",\n",
|
||||||
|
" \"image\": {\n",
|
||||||
|
" \"url\": {\n",
|
||||||
|
" \"uri\": encode_image(image_path)\n",
|
||||||
|
" }\n",
|
||||||
|
" }\n",
|
||||||
|
" },\n",
|
||||||
|
" {\n",
|
||||||
|
" \"type\": \"text\",\n",
|
||||||
|
" \"text\": prompt,\n",
|
||||||
|
" }\n",
|
||||||
|
" ]\n",
|
||||||
|
" }\n",
|
||||||
|
"\n",
|
||||||
|
" response = client.inference.chat_completion(\n",
|
||||||
|
" messages=[message],\n",
|
||||||
|
" model_id=LLAMA32_11B_INSTRUCT,\n",
|
||||||
|
" stream=False,\n",
|
||||||
|
" )\n",
|
||||||
|
"\n",
|
||||||
|
" print(response.completion_message.content.lower().strip())"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"id": "4ee09b97",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"await run_main(\"Llama_Repo.jpeg\",\n",
|
||||||
|
" \"How many different colors are those llamas?\\\n",
|
||||||
|
" What are those colors?\")"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"id": "e741d7b9",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"### 4.3 Using Llama Stack Agent API\n",
|
||||||
|
"\n",
|
||||||
|
"The code below uses the Llama Stack 0.1's Agent API to interact with Llama 3.2:"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"id": "f9a83275",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"from llama_stack_client.lib.agents.agent import Agent\n",
|
||||||
|
"from llama_stack_client.lib.agents.event_logger import EventLogger\n",
|
||||||
|
"from llama_stack_client.types.agent_create_params import AgentConfig\n",
|
||||||
|
"\n",
|
||||||
|
"async def run_main(image_path, prompt):\n",
|
||||||
|
" base64_image = encode_image(image_path)\n",
|
||||||
|
"\n",
|
||||||
|
" client = LlamaStackClient(\n",
|
||||||
|
" base_url=LLAMA_STACK_API_TOGETHER_URL,\n",
|
||||||
|
" )\n",
|
||||||
|
"\n",
|
||||||
|
" agent_config = AgentConfig(\n",
|
||||||
|
" model=LLAMA32_11B_INSTRUCT,\n",
|
||||||
|
" instructions=\"You are a helpful assistant\",\n",
|
||||||
|
" enable_session_persistence=False,\n",
|
||||||
|
" )\n",
|
||||||
|
"\n",
|
||||||
|
" agent = Agent(client, agent_config)\n",
|
||||||
|
" session_id = agent.create_session(\"test-session\")\n",
|
||||||
|
"\n",
|
||||||
|
" response = agent.create_turn(\n",
|
||||||
|
" messages=[{\n",
|
||||||
|
" \"role\": \"user\",\n",
|
||||||
|
" \"content\": [\n",
|
||||||
|
" {\n",
|
||||||
|
" \"type\": \"image\",\n",
|
||||||
|
" \"image\": {\n",
|
||||||
|
" \"url\": {\n",
|
||||||
|
" \"uri\": encode_image(image_path)\n",
|
||||||
|
" }\n",
|
||||||
|
" }\n",
|
||||||
|
" },\n",
|
||||||
|
" {\n",
|
||||||
|
" \"type\": \"text\",\n",
|
||||||
|
" \"text\": prompt,\n",
|
||||||
|
" }\n",
|
||||||
|
" ]\n",
|
||||||
|
" }],\n",
|
||||||
|
" session_id=session_id,\n",
|
||||||
|
" )\n",
|
||||||
|
"\n",
|
||||||
|
" for log in EventLogger().log(response):\n",
|
||||||
|
" log.print()"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"id": "15d0098b",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"await run_main(\"Llama_Repo.jpeg\",\n",
|
||||||
|
" \"How many different colors are those llamas?\\\n",
|
||||||
|
" What are those colors?\")"
|
||||||
|
]
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"metadata": {
|
"metadata": {
|
||||||
|
|
|
@ -4,7 +4,7 @@ Llama Stack provides all the building blocks needed to create sophisticated AI a
|
||||||
|
|
||||||
The best way to get started is to look at this notebook which walks through the various APIs (from basic inference, to RAG agents) and how to use them.
|
The best way to get started is to look at this notebook which walks through the various APIs (from basic inference, to RAG agents) and how to use them.
|
||||||
|
|
||||||
**Notebook**: [Building AI Applications](https://github.com/meta-llama/llama-stack/blob/main/docs/notebooks/Llama_Stack_Benchmark_Evals.ipynb)
|
**Notebook**: [Building AI Applications](https://github.com/meta-llama/llama-stack/blob/main/docs/getting_started.ipynb)
|
||||||
|
|
||||||
Here are some key topics that will help you build effective agents:
|
Here are some key topics that will help you build effective agents:
|
||||||
|
|
||||||
|
|
|
@ -36,13 +36,12 @@ chunks = [
|
||||||
"content": "Your document text here",
|
"content": "Your document text here",
|
||||||
"mime_type": "text/plain",
|
"mime_type": "text/plain",
|
||||||
},
|
},
|
||||||
...,
|
|
||||||
]
|
]
|
||||||
client.vector_io.insert(vector_db_id, chunks)
|
client.vector_io.insert(vector_db_id=vector_db_id, chunks=chunks)
|
||||||
|
|
||||||
# You can then query for these chunks
|
# You can then query for these chunks
|
||||||
chunks_response = client.vector_io.query(
|
chunks_response = client.vector_io.query(
|
||||||
vector_db_id, query="What do you know about..."
|
vector_db_id=vector_db_id, query="What do you know about..."
|
||||||
)
|
)
|
||||||
```
|
```
|
||||||
|
|
||||||
|
@ -72,8 +71,8 @@ client.tool_runtime.rag_tool.insert(
|
||||||
|
|
||||||
# Query documents
|
# Query documents
|
||||||
results = client.tool_runtime.rag_tool.query(
|
results = client.tool_runtime.rag_tool.query(
|
||||||
vector_db_id=vector_db_id,
|
vector_db_ids=[vector_db_id],
|
||||||
query="What do you know about...",
|
content="What do you know about...",
|
||||||
)
|
)
|
||||||
```
|
```
|
||||||
|
|
||||||
|
@ -82,10 +81,14 @@ results = client.tool_runtime.rag_tool.query(
|
||||||
One of the most powerful patterns is combining agents with RAG capabilities. Here's a complete example:
|
One of the most powerful patterns is combining agents with RAG capabilities. Here's a complete example:
|
||||||
|
|
||||||
```python
|
```python
|
||||||
|
from llama_stack_client.types.agent_create_params import AgentConfig
|
||||||
|
from llama_stack_client.lib.agents.agent import Agent
|
||||||
|
|
||||||
# Configure agent with memory
|
# Configure agent with memory
|
||||||
agent_config = AgentConfig(
|
agent_config = AgentConfig(
|
||||||
model="Llama3.2-3B-Instruct",
|
model="meta-llama/Llama-3.2-3B-Instruct",
|
||||||
instructions="You are a helpful assistant",
|
instructions="You are a helpful assistant",
|
||||||
|
enable_session_persistence=False,
|
||||||
toolgroups=[
|
toolgroups=[
|
||||||
{
|
{
|
||||||
"name": "builtin::rag",
|
"name": "builtin::rag",
|
||||||
|
@ -105,10 +108,10 @@ response = agent.create_turn(
|
||||||
{"role": "user", "content": "I am providing some documents for reference."}
|
{"role": "user", "content": "I am providing some documents for reference."}
|
||||||
],
|
],
|
||||||
documents=[
|
documents=[
|
||||||
dict(
|
{
|
||||||
content="https://raw.githubusercontent.com/example/doc.rst",
|
"content": "https://raw.githubusercontent.com/example/doc.rst",
|
||||||
mime_type="text/plain",
|
"mime_type": "text/plain",
|
||||||
)
|
}
|
||||||
],
|
],
|
||||||
session_id=session_id,
|
session_id=session_id,
|
||||||
)
|
)
|
||||||
|
|
|
@ -1,7 +1,7 @@
|
||||||
<!-- This file was auto-generated by distro_codegen.py, please edit source -->
|
|
||||||
---
|
---
|
||||||
orphan: true
|
orphan: true
|
||||||
---
|
---
|
||||||
|
<!-- This file was auto-generated by distro_codegen.py, please edit source -->
|
||||||
|
|
||||||
# Dell Distribution of Llama Stack
|
# Dell Distribution of Llama Stack
|
||||||
|
|
||||||
|
|
|
@ -1,7 +1,7 @@
|
||||||
<!-- This file was auto-generated by distro_codegen.py, please edit source -->
|
|
||||||
---
|
---
|
||||||
orphan: true
|
orphan: true
|
||||||
---
|
---
|
||||||
|
<!-- This file was auto-generated by distro_codegen.py, please edit source -->
|
||||||
# Fireworks Distribution
|
# Fireworks Distribution
|
||||||
|
|
||||||
```{toctree}
|
```{toctree}
|
||||||
|
|
|
@ -1,7 +1,7 @@
|
||||||
<!-- This file was auto-generated by distro_codegen.py, please edit source -->
|
|
||||||
---
|
---
|
||||||
orphan: true
|
orphan: true
|
||||||
---
|
---
|
||||||
|
<!-- This file was auto-generated by distro_codegen.py, please edit source -->
|
||||||
# Meta Reference Distribution
|
# Meta Reference Distribution
|
||||||
|
|
||||||
```{toctree}
|
```{toctree}
|
||||||
|
|
|
@ -1,7 +1,7 @@
|
||||||
<!-- This file was auto-generated by distro_codegen.py, please edit source -->
|
|
||||||
---
|
---
|
||||||
orphan: true
|
orphan: true
|
||||||
---
|
---
|
||||||
|
<!-- This file was auto-generated by distro_codegen.py, please edit source -->
|
||||||
# Meta Reference Quantized Distribution
|
# Meta Reference Quantized Distribution
|
||||||
|
|
||||||
```{toctree}
|
```{toctree}
|
||||||
|
|
|
@ -1,7 +1,7 @@
|
||||||
<!-- This file was auto-generated by distro_codegen.py, please edit source -->
|
|
||||||
---
|
---
|
||||||
orphan: true
|
orphan: true
|
||||||
---
|
---
|
||||||
|
<!-- This file was auto-generated by distro_codegen.py, please edit source -->
|
||||||
# Ollama Distribution
|
# Ollama Distribution
|
||||||
|
|
||||||
```{toctree}
|
```{toctree}
|
||||||
|
|
|
@ -1,7 +1,7 @@
|
||||||
<!-- This file was auto-generated by distro_codegen.py, please edit source -->
|
|
||||||
---
|
---
|
||||||
orphan: true
|
orphan: true
|
||||||
---
|
---
|
||||||
|
<!-- This file was auto-generated by distro_codegen.py, please edit source -->
|
||||||
# Remote vLLM Distribution
|
# Remote vLLM Distribution
|
||||||
```{toctree}
|
```{toctree}
|
||||||
:maxdepth: 2
|
:maxdepth: 2
|
||||||
|
|
|
@ -1,7 +1,7 @@
|
||||||
<!-- This file was auto-generated by distro_codegen.py, please edit source -->
|
|
||||||
---
|
---
|
||||||
orphan: true
|
orphan: true
|
||||||
---
|
---
|
||||||
|
<!-- This file was auto-generated by distro_codegen.py, please edit source -->
|
||||||
# SambaNova Distribution
|
# SambaNova Distribution
|
||||||
|
|
||||||
```{toctree}
|
```{toctree}
|
||||||
|
|
|
@ -1,7 +1,7 @@
|
||||||
<!-- This file was auto-generated by distro_codegen.py, please edit source -->
|
|
||||||
---
|
---
|
||||||
orphan: true
|
orphan: true
|
||||||
---
|
---
|
||||||
|
<!-- This file was auto-generated by distro_codegen.py, please edit source -->
|
||||||
|
|
||||||
# TGI Distribution
|
# TGI Distribution
|
||||||
|
|
||||||
|
|
|
@ -1,7 +1,7 @@
|
||||||
<!-- This file was auto-generated by distro_codegen.py, please edit source -->
|
|
||||||
---
|
---
|
||||||
orphan: true
|
orphan: true
|
||||||
---
|
---
|
||||||
|
<!-- This file was auto-generated by distro_codegen.py, please edit source -->
|
||||||
# Together Distribution
|
# Together Distribution
|
||||||
|
|
||||||
```{toctree}
|
```{toctree}
|
||||||
|
|
|
@ -2,7 +2,7 @@
|
||||||
```{admonition} News
|
```{admonition} News
|
||||||
:class: tip
|
:class: tip
|
||||||
|
|
||||||
Llama Stack 0.1.1 is now available! See the [release notes](https://github.com/meta-llama/llama-stack/releases/tag/v0.1.1) for more details.
|
Llama Stack 0.1.2 is now available! See the [release notes](https://github.com/meta-llama/llama-stack/releases/tag/v0.1.2) for more details.
|
||||||
```
|
```
|
||||||
|
|
||||||
# Llama Stack
|
# Llama Stack
|
||||||
|
|
|
@ -22,9 +22,9 @@ class StackListProviders(Subcommand):
|
||||||
self.parser.set_defaults(func=self._run_providers_list_cmd)
|
self.parser.set_defaults(func=self._run_providers_list_cmd)
|
||||||
|
|
||||||
def _add_arguments(self):
|
def _add_arguments(self):
|
||||||
from llama_stack.distribution.datatypes import Api
|
from llama_stack.distribution.distribution import providable_apis
|
||||||
|
|
||||||
api_values = [a.value for a in Api]
|
api_values = [api.value for api in providable_apis()]
|
||||||
self.parser.add_argument(
|
self.parser.add_argument(
|
||||||
"api",
|
"api",
|
||||||
type=str,
|
type=str,
|
||||||
|
|
|
@ -55,6 +55,16 @@ class StackRun(Subcommand):
|
||||||
default=[],
|
default=[],
|
||||||
metavar="KEY=VALUE",
|
metavar="KEY=VALUE",
|
||||||
)
|
)
|
||||||
|
self.parser.add_argument(
|
||||||
|
"--tls-keyfile",
|
||||||
|
type=str,
|
||||||
|
help="Path to TLS key file for HTTPS",
|
||||||
|
)
|
||||||
|
self.parser.add_argument(
|
||||||
|
"--tls-certfile",
|
||||||
|
type=str,
|
||||||
|
help="Path to TLS certificate file for HTTPS",
|
||||||
|
)
|
||||||
|
|
||||||
def _run_stack_run_cmd(self, args: argparse.Namespace) -> None:
|
def _run_stack_run_cmd(self, args: argparse.Namespace) -> None:
|
||||||
import importlib.resources
|
import importlib.resources
|
||||||
|
@ -178,4 +188,7 @@ class StackRun(Subcommand):
|
||||||
return
|
return
|
||||||
run_args.extend(["--env", f"{key}={value}"])
|
run_args.extend(["--env", f"{key}={value}"])
|
||||||
|
|
||||||
|
if args.tls_keyfile and args.tls_certfile:
|
||||||
|
run_args.extend(["--tls-keyfile", args.tls_keyfile, "--tls-certfile", args.tls_certfile])
|
||||||
|
|
||||||
run_with_pty(run_args)
|
run_with_pty(run_args)
|
||||||
|
|
|
@ -117,6 +117,23 @@ class Provider(BaseModel):
|
||||||
config: Dict[str, Any]
|
config: Dict[str, Any]
|
||||||
|
|
||||||
|
|
||||||
|
class ServerConfig(BaseModel):
|
||||||
|
port: int = Field(
|
||||||
|
default=8321,
|
||||||
|
description="Port to listen on",
|
||||||
|
ge=1024,
|
||||||
|
le=65535,
|
||||||
|
)
|
||||||
|
tls_certfile: Optional[str] = Field(
|
||||||
|
default=None,
|
||||||
|
description="Path to TLS certificate file for HTTPS",
|
||||||
|
)
|
||||||
|
tls_keyfile: Optional[str] = Field(
|
||||||
|
default=None,
|
||||||
|
description="Path to TLS key file for HTTPS",
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
class StackRunConfig(BaseModel):
|
class StackRunConfig(BaseModel):
|
||||||
version: str = LLAMA_STACK_RUN_CONFIG_VERSION
|
version: str = LLAMA_STACK_RUN_CONFIG_VERSION
|
||||||
|
|
||||||
|
@ -159,6 +176,11 @@ a default SQLite store will be used.""",
|
||||||
eval_tasks: List[EvalTaskInput] = Field(default_factory=list)
|
eval_tasks: List[EvalTaskInput] = Field(default_factory=list)
|
||||||
tool_groups: List[ToolGroupInput] = Field(default_factory=list)
|
tool_groups: List[ToolGroupInput] = Field(default_factory=list)
|
||||||
|
|
||||||
|
server: ServerConfig = Field(
|
||||||
|
default_factory=ServerConfig,
|
||||||
|
description="Configuration for the HTTP(S) server",
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
class BuildConfig(BaseModel):
|
class BuildConfig(BaseModel):
|
||||||
version: str = LLAMA_STACK_BUILD_CONFIG_VERSION
|
version: str = LLAMA_STACK_BUILD_CONFIG_VERSION
|
||||||
|
|
|
@ -17,17 +17,6 @@ from typing import Any, get_args, get_origin, Optional, TypeVar
|
||||||
|
|
||||||
import httpx
|
import httpx
|
||||||
import yaml
|
import yaml
|
||||||
from llama_stack_client import (
|
|
||||||
APIResponse,
|
|
||||||
AsyncAPIResponse,
|
|
||||||
AsyncLlamaStackClient,
|
|
||||||
AsyncStream,
|
|
||||||
LlamaStackClient,
|
|
||||||
NOT_GIVEN,
|
|
||||||
)
|
|
||||||
from pydantic import BaseModel, TypeAdapter
|
|
||||||
from rich.console import Console
|
|
||||||
from termcolor import cprint
|
|
||||||
|
|
||||||
from llama_stack.distribution.build import print_pip_install_help
|
from llama_stack.distribution.build import print_pip_install_help
|
||||||
from llama_stack.distribution.configure import parse_and_maybe_upgrade_config
|
from llama_stack.distribution.configure import parse_and_maybe_upgrade_config
|
||||||
|
@ -46,6 +35,17 @@ from llama_stack.providers.utils.telemetry.tracing import (
|
||||||
setup_logger,
|
setup_logger,
|
||||||
start_trace,
|
start_trace,
|
||||||
)
|
)
|
||||||
|
from llama_stack_client import (
|
||||||
|
APIResponse,
|
||||||
|
AsyncAPIResponse,
|
||||||
|
AsyncLlamaStackClient,
|
||||||
|
AsyncStream,
|
||||||
|
LlamaStackClient,
|
||||||
|
NOT_GIVEN,
|
||||||
|
)
|
||||||
|
from pydantic import BaseModel, TypeAdapter
|
||||||
|
from rich.console import Console
|
||||||
|
from termcolor import cprint
|
||||||
|
|
||||||
T = TypeVar("T")
|
T = TypeVar("T")
|
||||||
|
|
||||||
|
@ -198,6 +198,7 @@ class AsyncLlamaStackAsLibraryClient(AsyncLlamaStackClient):
|
||||||
|
|
||||||
async def initialize(self) -> bool:
|
async def initialize(self) -> bool:
|
||||||
try:
|
try:
|
||||||
|
self.endpoint_impls = None
|
||||||
self.impls = await construct_stack(self.config, self.custom_provider_registry)
|
self.impls = await construct_stack(self.config, self.custom_provider_registry)
|
||||||
except ModuleNotFoundError as _e:
|
except ModuleNotFoundError as _e:
|
||||||
cprint(_e.msg, "red")
|
cprint(_e.msg, "red")
|
||||||
|
@ -213,7 +214,7 @@ class AsyncLlamaStackAsLibraryClient(AsyncLlamaStackClient):
|
||||||
f"Please run:\n\n{prefix}llama stack build --template {self.config_path_or_template_name} --image-type venv\n\n",
|
f"Please run:\n\n{prefix}llama stack build --template {self.config_path_or_template_name} --image-type venv\n\n",
|
||||||
"yellow",
|
"yellow",
|
||||||
)
|
)
|
||||||
return False
|
raise _e
|
||||||
|
|
||||||
if Api.telemetry in self.impls:
|
if Api.telemetry in self.impls:
|
||||||
setup_logger(self.impls[Api.telemetry])
|
setup_logger(self.impls[Api.telemetry])
|
||||||
|
|
|
@ -282,8 +282,19 @@ def main():
|
||||||
action="append",
|
action="append",
|
||||||
help="Environment variables in KEY=value format. Can be specified multiple times.",
|
help="Environment variables in KEY=value format. Can be specified multiple times.",
|
||||||
)
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--tls-keyfile",
|
||||||
|
help="Path to TLS key file for HTTPS",
|
||||||
|
required="--tls-certfile" in sys.argv,
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--tls-certfile",
|
||||||
|
help="Path to TLS certificate file for HTTPS",
|
||||||
|
required="--tls-keyfile" in sys.argv,
|
||||||
|
)
|
||||||
|
|
||||||
args = parser.parse_args()
|
args = parser.parse_args()
|
||||||
|
|
||||||
if args.env:
|
if args.env:
|
||||||
for env_pair in args.env:
|
for env_pair in args.env:
|
||||||
try:
|
try:
|
||||||
|
@ -381,11 +392,36 @@ def main():
|
||||||
|
|
||||||
import uvicorn
|
import uvicorn
|
||||||
|
|
||||||
# FYI this does not do hot-reloads
|
# Configure SSL if certificates are provided
|
||||||
|
port = args.port or config.server.port
|
||||||
|
|
||||||
|
ssl_config = None
|
||||||
|
if args.tls_keyfile:
|
||||||
|
keyfile = args.tls_keyfile
|
||||||
|
certfile = args.tls_certfile
|
||||||
|
else:
|
||||||
|
keyfile = config.server.tls_keyfile
|
||||||
|
certfile = config.server.tls_certfile
|
||||||
|
|
||||||
|
if keyfile and certfile:
|
||||||
|
ssl_config = {
|
||||||
|
"ssl_keyfile": keyfile,
|
||||||
|
"ssl_certfile": certfile,
|
||||||
|
}
|
||||||
|
print(f"HTTPS enabled with certificates:\n Key: {keyfile}\n Cert: {certfile}")
|
||||||
|
|
||||||
listen_host = ["::", "0.0.0.0"] if not args.disable_ipv6 else "0.0.0.0"
|
listen_host = ["::", "0.0.0.0"] if not args.disable_ipv6 else "0.0.0.0"
|
||||||
print(f"Listening on {listen_host}:{args.port}")
|
print(f"Listening on {listen_host}:{port}")
|
||||||
uvicorn.run(app, host=listen_host, port=args.port)
|
|
||||||
|
uvicorn_config = {
|
||||||
|
"app": app,
|
||||||
|
"host": listen_host,
|
||||||
|
"port": port,
|
||||||
|
}
|
||||||
|
if ssl_config:
|
||||||
|
uvicorn_config.update(ssl_config)
|
||||||
|
|
||||||
|
uvicorn.run(**uvicorn_config)
|
||||||
|
|
||||||
|
|
||||||
def extract_path_params(route: str) -> List[str]:
|
def extract_path_params(route: str) -> List[str]:
|
||||||
|
|
|
@ -34,6 +34,7 @@ shift
|
||||||
|
|
||||||
# Process environment variables from --env arguments
|
# Process environment variables from --env arguments
|
||||||
env_vars=""
|
env_vars=""
|
||||||
|
other_args=""
|
||||||
while [[ $# -gt 0 ]]; do
|
while [[ $# -gt 0 ]]; do
|
||||||
case "$1" in
|
case "$1" in
|
||||||
--env)
|
--env)
|
||||||
|
@ -48,6 +49,7 @@ while [[ $# -gt 0 ]]; do
|
||||||
fi
|
fi
|
||||||
;;
|
;;
|
||||||
*)
|
*)
|
||||||
|
other_args="$other_args $1"
|
||||||
shift
|
shift
|
||||||
;;
|
;;
|
||||||
esac
|
esac
|
||||||
|
@ -61,4 +63,5 @@ $CONDA_PREFIX/bin/python \
|
||||||
-m llama_stack.distribution.server.server \
|
-m llama_stack.distribution.server.server \
|
||||||
--yaml-config "$yaml_config" \
|
--yaml-config "$yaml_config" \
|
||||||
--port "$port" \
|
--port "$port" \
|
||||||
$env_vars
|
$env_vars \
|
||||||
|
$other_args
|
||||||
|
|
|
@ -40,8 +40,12 @@ shift
|
||||||
port="$1"
|
port="$1"
|
||||||
shift
|
shift
|
||||||
|
|
||||||
|
# Initialize other_args
|
||||||
|
other_args=""
|
||||||
|
|
||||||
# Process environment variables from --env arguments
|
# Process environment variables from --env arguments
|
||||||
env_vars=""
|
env_vars=""
|
||||||
|
|
||||||
while [[ $# -gt 0 ]]; do
|
while [[ $# -gt 0 ]]; do
|
||||||
case "$1" in
|
case "$1" in
|
||||||
--env)
|
--env)
|
||||||
|
@ -55,6 +59,7 @@ while [[ $# -gt 0 ]]; do
|
||||||
fi
|
fi
|
||||||
;;
|
;;
|
||||||
*)
|
*)
|
||||||
|
other_args="$other_args $1"
|
||||||
shift
|
shift
|
||||||
;;
|
;;
|
||||||
esac
|
esac
|
||||||
|
@ -93,5 +98,8 @@ $CONTAINER_BINARY run $CONTAINER_OPTS -it \
|
||||||
-v "$yaml_config:/app/config.yaml" \
|
-v "$yaml_config:/app/config.yaml" \
|
||||||
$mounts \
|
$mounts \
|
||||||
--env LLAMA_STACK_PORT=$port \
|
--env LLAMA_STACK_PORT=$port \
|
||||||
--entrypoint='["python", "-m", "llama_stack.distribution.server.server", "--yaml-config", "/app/config.yaml"]' \
|
--entrypoint python \
|
||||||
$container_image:$version_tag
|
$container_image:$version_tag \
|
||||||
|
-m llama_stack.distribution.server.server \
|
||||||
|
--yaml-config /app/config.yaml \
|
||||||
|
$other_args
|
||||||
|
|
|
@ -67,7 +67,6 @@ def generate_bwrap_command(bind_dirs: List[str]) -> str:
|
||||||
@dataclass
|
@dataclass
|
||||||
class CodeExecutionContext:
|
class CodeExecutionContext:
|
||||||
matplotlib_dump_dir: str
|
matplotlib_dump_dir: str
|
||||||
use_proxy: bool = False
|
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
@dataclass
|
||||||
|
|
|
@ -26,6 +26,7 @@ from llama_stack.apis.inference import (
|
||||||
Message,
|
Message,
|
||||||
ResponseFormat,
|
ResponseFormat,
|
||||||
ToolChoice,
|
ToolChoice,
|
||||||
|
ToolConfig,
|
||||||
)
|
)
|
||||||
from llama_stack.distribution.request_headers import NeedsRequestProviderData
|
from llama_stack.distribution.request_headers import NeedsRequestProviderData
|
||||||
from llama_stack.providers.remote.inference.groq.config import GroqConfig
|
from llama_stack.providers.remote.inference.groq.config import GroqConfig
|
||||||
|
|
|
@ -352,24 +352,20 @@ class OllamaInferenceAdapter(Inference, ModelsProtocolPrivate):
|
||||||
return EmbeddingsResponse(embeddings=embeddings)
|
return EmbeddingsResponse(embeddings=embeddings)
|
||||||
|
|
||||||
async def register_model(self, model: Model) -> Model:
|
async def register_model(self, model: Model) -> Model:
|
||||||
# ollama does not have embedding models running. Check if the model is in list of available models.
|
async def check_model_availability(model_id: str):
|
||||||
if model.model_type == ModelType.embedding:
|
response = await self.client.ps()
|
||||||
response = await self.client.list()
|
|
||||||
available_models = [m["model"] for m in response["models"]]
|
available_models = [m["model"] for m in response["models"]]
|
||||||
if model.provider_resource_id not in available_models:
|
if model_id not in available_models:
|
||||||
raise ValueError(
|
raise ValueError(
|
||||||
f"Model '{model.provider_resource_id}' is not available in Ollama. "
|
f"Model '{model_id}' is not available in Ollama. Available models: {', '.join(available_models)}"
|
||||||
f"Available models: {', '.join(available_models)}"
|
|
||||||
)
|
)
|
||||||
|
|
||||||
|
if model.model_type == ModelType.embedding:
|
||||||
|
await check_model_availability(model.provider_resource_id)
|
||||||
return model
|
return model
|
||||||
|
|
||||||
model = await self.register_helper.register_model(model)
|
model = await self.register_helper.register_model(model)
|
||||||
models = await self.client.ps()
|
await check_model_availability(model.provider_resource_id)
|
||||||
available_models = [m["model"] for m in models["models"]]
|
|
||||||
if model.provider_resource_id not in available_models:
|
|
||||||
raise ValueError(
|
|
||||||
f"Model '{model.provider_resource_id}' is not available in Ollama. "
|
|
||||||
f"Available models: {', '.join(available_models)}"
|
|
||||||
)
|
|
||||||
|
|
||||||
return model
|
return model
|
||||||
|
|
||||||
|
|
|
@ -12,8 +12,8 @@ from .config import QdrantConfig
|
||||||
|
|
||||||
|
|
||||||
async def get_adapter_impl(config: QdrantConfig, deps: Dict[Api, ProviderSpec]):
|
async def get_adapter_impl(config: QdrantConfig, deps: Dict[Api, ProviderSpec]):
|
||||||
from .qdrant import QdrantVectorMemoryAdapter
|
from .qdrant import QdrantVectorDBAdapter
|
||||||
|
|
||||||
impl = QdrantVectorMemoryAdapter(config, deps[Api.inference])
|
impl = QdrantVectorDBAdapter(config, deps[Api.inference])
|
||||||
await impl.initialize()
|
await impl.initialize()
|
||||||
return impl
|
return impl
|
||||||
|
|
|
@ -55,7 +55,7 @@ class QdrantIndex(EmbeddingIndex):
|
||||||
|
|
||||||
points = []
|
points = []
|
||||||
for i, (chunk, embedding) in enumerate(zip(chunks, embeddings)):
|
for i, (chunk, embedding) in enumerate(zip(chunks, embeddings)):
|
||||||
chunk_id = f"{chunk.document_id}:chunk-{i}"
|
chunk_id = f"{chunk.metadata['document_id']}:chunk-{i}"
|
||||||
points.append(
|
points.append(
|
||||||
PointStruct(
|
PointStruct(
|
||||||
id=convert_id(chunk_id),
|
id=convert_id(chunk_id),
|
||||||
|
@ -93,6 +93,9 @@ class QdrantIndex(EmbeddingIndex):
|
||||||
|
|
||||||
return QueryChunksResponse(chunks=chunks, scores=scores)
|
return QueryChunksResponse(chunks=chunks, scores=scores)
|
||||||
|
|
||||||
|
async def delete(self):
|
||||||
|
await self.client.delete_collection(collection_name=self.collection_name)
|
||||||
|
|
||||||
|
|
||||||
class QdrantVectorDBAdapter(VectorIO, VectorDBsProtocolPrivate):
|
class QdrantVectorDBAdapter(VectorIO, VectorDBsProtocolPrivate):
|
||||||
def __init__(self, config: QdrantConfig, inference_api: Api.inference) -> None:
|
def __init__(self, config: QdrantConfig, inference_api: Api.inference) -> None:
|
||||||
|
|
|
@ -95,7 +95,7 @@ class TestDatasetIO:
|
||||||
assert len(response) == 1
|
assert len(response) == 1
|
||||||
assert response[0].identifier == "test_dataset"
|
assert response[0].identifier == "test_dataset"
|
||||||
|
|
||||||
with pytest.raises(Exception) as exc_info:
|
with pytest.raises(ValueError):
|
||||||
# unregister a dataset that does not exist
|
# unregister a dataset that does not exist
|
||||||
await datasets_impl.unregister_dataset("test_dataset2")
|
await datasets_impl.unregister_dataset("test_dataset2")
|
||||||
|
|
||||||
|
@ -104,7 +104,7 @@ class TestDatasetIO:
|
||||||
assert isinstance(response, list)
|
assert isinstance(response, list)
|
||||||
assert len(response) == 0
|
assert len(response) == 0
|
||||||
|
|
||||||
with pytest.raises(Exception) as exc_info:
|
with pytest.raises(ValueError):
|
||||||
await datasets_impl.unregister_dataset("test_dataset")
|
await datasets_impl.unregister_dataset("test_dataset")
|
||||||
|
|
||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
|
|
|
@ -32,7 +32,7 @@ class TestModelRegistration:
|
||||||
)
|
)
|
||||||
|
|
||||||
# Try to register a model that's too large for local inference
|
# Try to register a model that's too large for local inference
|
||||||
with pytest.raises(ValueError) as exc_info:
|
with pytest.raises(ValueError):
|
||||||
await models_impl.register_model(
|
await models_impl.register_model(
|
||||||
model_id="Llama3.1-70B-Instruct",
|
model_id="Llama3.1-70B-Instruct",
|
||||||
)
|
)
|
||||||
|
@ -42,7 +42,7 @@ class TestModelRegistration:
|
||||||
_, models_impl = inference_stack
|
_, models_impl = inference_stack
|
||||||
|
|
||||||
# Try to register a non-existent model
|
# Try to register a non-existent model
|
||||||
with pytest.raises(Exception) as exc_info:
|
with pytest.raises(ValueError):
|
||||||
await models_impl.register_model(
|
await models_impl.register_model(
|
||||||
model_id="Llama3-NonExistent-Model",
|
model_id="Llama3-NonExistent-Model",
|
||||||
)
|
)
|
||||||
|
@ -59,7 +59,7 @@ class TestModelRegistration:
|
||||||
},
|
},
|
||||||
)
|
)
|
||||||
|
|
||||||
with pytest.raises(ValueError) as exc_info:
|
with pytest.raises(ValueError):
|
||||||
await models_impl.register_model(
|
await models_impl.register_model(
|
||||||
model_id="custom-model-2",
|
model_id="custom-model-2",
|
||||||
metadata={
|
metadata={
|
||||||
|
@ -88,7 +88,7 @@ class TestModelRegistration:
|
||||||
async def test_register_with_invalid_llama_model(self, inference_stack):
|
async def test_register_with_invalid_llama_model(self, inference_stack):
|
||||||
_, models_impl = inference_stack
|
_, models_impl = inference_stack
|
||||||
|
|
||||||
with pytest.raises(ValueError) as exc_info:
|
with pytest.raises(ValueError):
|
||||||
await models_impl.register_model(
|
await models_impl.register_model(
|
||||||
model_id="custom-model-2",
|
model_id="custom-model-2",
|
||||||
metadata={"llama_model": "invalid-llama-model"},
|
metadata={"llama_model": "invalid-llama-model"},
|
||||||
|
|
|
@ -4,12 +4,12 @@
|
||||||
# This source code is licensed under the terms described in the LICENSE file in
|
# This source code is licensed under the terms described in the LICENSE file in
|
||||||
# the root directory of this source tree.
|
# the root directory of this source tree.
|
||||||
|
|
||||||
|
import base64
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
import pytest
|
import pytest
|
||||||
|
|
||||||
from llama_stack.apis.common.content_types import ImageContentItem, TextContentItem, URL
|
from llama_stack.apis.common.content_types import URL, ImageContentItem, TextContentItem
|
||||||
|
|
||||||
from llama_stack.apis.inference import (
|
from llama_stack.apis.inference import (
|
||||||
ChatCompletionResponse,
|
ChatCompletionResponse,
|
||||||
ChatCompletionResponseEventType,
|
ChatCompletionResponseEventType,
|
||||||
|
@ -23,7 +23,7 @@ from .utils import group_chunks
|
||||||
THIS_DIR = Path(__file__).parent
|
THIS_DIR = Path(__file__).parent
|
||||||
|
|
||||||
with open(THIS_DIR / "pasta.jpeg", "rb") as f:
|
with open(THIS_DIR / "pasta.jpeg", "rb") as f:
|
||||||
PASTA_IMAGE = f.read()
|
PASTA_IMAGE = base64.b64encode(f.read()).decode("utf-8")
|
||||||
|
|
||||||
|
|
||||||
class TestVisionModelInference:
|
class TestVisionModelInference:
|
||||||
|
|
|
@ -29,7 +29,7 @@ def find_template_dirs(templates_dir: Path) -> Iterator[Path]:
|
||||||
if not templates_dir.exists():
|
if not templates_dir.exists():
|
||||||
raise FileNotFoundError(f"Templates directory not found: {templates_dir}")
|
raise FileNotFoundError(f"Templates directory not found: {templates_dir}")
|
||||||
|
|
||||||
return (d for d in templates_dir.iterdir() if d.is_dir() and d.name != "__pycache__")
|
return sorted(d for d in templates_dir.iterdir() if d.is_dir() and d.name != "__pycache__")
|
||||||
|
|
||||||
|
|
||||||
def process_template(template_dir: Path, progress) -> None:
|
def process_template(template_dir: Path, progress) -> None:
|
||||||
|
|
|
@ -115,3 +115,5 @@ tool_groups:
|
||||||
provider_id: rag-runtime
|
provider_id: rag-runtime
|
||||||
- toolgroup_id: builtin::code_interpreter
|
- toolgroup_id: builtin::code_interpreter
|
||||||
provider_id: code-interpreter
|
provider_id: code-interpreter
|
||||||
|
server:
|
||||||
|
port: 8321
|
||||||
|
|
|
@ -117,3 +117,5 @@ tool_groups:
|
||||||
provider_id: rag-runtime
|
provider_id: rag-runtime
|
||||||
- toolgroup_id: builtin::code_interpreter
|
- toolgroup_id: builtin::code_interpreter
|
||||||
provider_id: code-interpreter
|
provider_id: code-interpreter
|
||||||
|
server:
|
||||||
|
port: 8321
|
||||||
|
|
|
@ -116,3 +116,5 @@ tool_groups:
|
||||||
provider_id: rag-runtime
|
provider_id: rag-runtime
|
||||||
- toolgroup_id: builtin::code_interpreter
|
- toolgroup_id: builtin::code_interpreter
|
||||||
provider_id: code-interpreter
|
provider_id: code-interpreter
|
||||||
|
server:
|
||||||
|
port: 8321
|
||||||
|
|
|
@ -107,3 +107,5 @@ tool_groups:
|
||||||
provider_id: rag-runtime
|
provider_id: rag-runtime
|
||||||
- toolgroup_id: builtin::code_interpreter
|
- toolgroup_id: builtin::code_interpreter
|
||||||
provider_id: code-interpreter
|
provider_id: code-interpreter
|
||||||
|
server:
|
||||||
|
port: 8321
|
||||||
|
|
|
@ -172,3 +172,5 @@ tool_groups:
|
||||||
provider_id: rag-runtime
|
provider_id: rag-runtime
|
||||||
- toolgroup_id: builtin::code_interpreter
|
- toolgroup_id: builtin::code_interpreter
|
||||||
provider_id: code-interpreter
|
provider_id: code-interpreter
|
||||||
|
server:
|
||||||
|
port: 8321
|
||||||
|
|
|
@ -161,3 +161,5 @@ tool_groups:
|
||||||
provider_id: rag-runtime
|
provider_id: rag-runtime
|
||||||
- toolgroup_id: builtin::code_interpreter
|
- toolgroup_id: builtin::code_interpreter
|
||||||
provider_id: code-interpreter
|
provider_id: code-interpreter
|
||||||
|
server:
|
||||||
|
port: 8321
|
||||||
|
|
|
@ -124,3 +124,5 @@ tool_groups:
|
||||||
provider_id: rag-runtime
|
provider_id: rag-runtime
|
||||||
- toolgroup_id: builtin::code_interpreter
|
- toolgroup_id: builtin::code_interpreter
|
||||||
provider_id: code-interpreter
|
provider_id: code-interpreter
|
||||||
|
server:
|
||||||
|
port: 8321
|
||||||
|
|
|
@ -114,3 +114,5 @@ tool_groups:
|
||||||
provider_id: rag-runtime
|
provider_id: rag-runtime
|
||||||
- toolgroup_id: builtin::code_interpreter
|
- toolgroup_id: builtin::code_interpreter
|
||||||
provider_id: code-interpreter
|
provider_id: code-interpreter
|
||||||
|
server:
|
||||||
|
port: 8321
|
||||||
|
|
|
@ -124,3 +124,5 @@ tool_groups:
|
||||||
provider_id: rag-runtime
|
provider_id: rag-runtime
|
||||||
- toolgroup_id: builtin::code_interpreter
|
- toolgroup_id: builtin::code_interpreter
|
||||||
provider_id: code-interpreter
|
provider_id: code-interpreter
|
||||||
|
server:
|
||||||
|
port: 8321
|
||||||
|
|
|
@ -114,3 +114,5 @@ tool_groups:
|
||||||
provider_id: rag-runtime
|
provider_id: rag-runtime
|
||||||
- toolgroup_id: builtin::code_interpreter
|
- toolgroup_id: builtin::code_interpreter
|
||||||
provider_id: code-interpreter
|
provider_id: code-interpreter
|
||||||
|
server:
|
||||||
|
port: 8321
|
||||||
|
|
|
@ -126,3 +126,5 @@ tool_groups:
|
||||||
provider_id: rag-runtime
|
provider_id: rag-runtime
|
||||||
- toolgroup_id: builtin::code_interpreter
|
- toolgroup_id: builtin::code_interpreter
|
||||||
provider_id: code-interpreter
|
provider_id: code-interpreter
|
||||||
|
server:
|
||||||
|
port: 8321
|
||||||
|
|
|
@ -115,3 +115,5 @@ tool_groups:
|
||||||
provider_id: rag-runtime
|
provider_id: rag-runtime
|
||||||
- toolgroup_id: builtin::code_interpreter
|
- toolgroup_id: builtin::code_interpreter
|
||||||
provider_id: code-interpreter
|
provider_id: code-interpreter
|
||||||
|
server:
|
||||||
|
port: 8321
|
||||||
|
|
|
@ -117,3 +117,5 @@ tool_groups:
|
||||||
provider_id: rag-runtime
|
provider_id: rag-runtime
|
||||||
- toolgroup_id: builtin::code_interpreter
|
- toolgroup_id: builtin::code_interpreter
|
||||||
provider_id: code-interpreter
|
provider_id: code-interpreter
|
||||||
|
server:
|
||||||
|
port: 8321
|
||||||
|
|
|
@ -147,3 +147,5 @@ tool_groups:
|
||||||
provider_id: rag-runtime
|
provider_id: rag-runtime
|
||||||
- toolgroup_id: builtin::code_interpreter
|
- toolgroup_id: builtin::code_interpreter
|
||||||
provider_id: code-interpreter
|
provider_id: code-interpreter
|
||||||
|
server:
|
||||||
|
port: 8321
|
||||||
|
|
|
@ -16,7 +16,7 @@ The `llamastack/distribution-{{ name }}` distribution consists of the following
|
||||||
|
|
||||||
You should use this distribution if you have a regular desktop machine without very powerful GPUs. Of course, if you have powerful GPUs, you can still continue using this distribution since Ollama supports GPU acceleration.
|
You should use this distribution if you have a regular desktop machine without very powerful GPUs. Of course, if you have powerful GPUs, you can still continue using this distribution since Ollama supports GPU acceleration.
|
||||||
|
|
||||||
{%- if run_config_env_vars %}
|
{% if run_config_env_vars %}
|
||||||
### Environment Variables
|
### Environment Variables
|
||||||
|
|
||||||
The following environment variables can be configured:
|
The following environment variables can be configured:
|
||||||
|
|
|
@ -121,3 +121,5 @@ tool_groups:
|
||||||
provider_id: rag-runtime
|
provider_id: rag-runtime
|
||||||
- toolgroup_id: builtin::code_interpreter
|
- toolgroup_id: builtin::code_interpreter
|
||||||
provider_id: code-interpreter
|
provider_id: code-interpreter
|
||||||
|
server:
|
||||||
|
port: 8321
|
||||||
|
|
|
@ -110,3 +110,5 @@ tool_groups:
|
||||||
provider_id: rag-runtime
|
provider_id: rag-runtime
|
||||||
- toolgroup_id: builtin::code_interpreter
|
- toolgroup_id: builtin::code_interpreter
|
||||||
provider_id: code-interpreter
|
provider_id: code-interpreter
|
||||||
|
server:
|
||||||
|
port: 8321
|
||||||
|
|
|
@ -126,3 +126,5 @@ tool_groups:
|
||||||
provider_id: rag-runtime
|
provider_id: rag-runtime
|
||||||
- toolgroup_id: builtin::code_interpreter
|
- toolgroup_id: builtin::code_interpreter
|
||||||
provider_id: code-interpreter
|
provider_id: code-interpreter
|
||||||
|
server:
|
||||||
|
port: 8321
|
||||||
|
|
|
@ -115,3 +115,5 @@ tool_groups:
|
||||||
provider_id: rag-runtime
|
provider_id: rag-runtime
|
||||||
- toolgroup_id: builtin::code_interpreter
|
- toolgroup_id: builtin::code_interpreter
|
||||||
provider_id: code-interpreter
|
provider_id: code-interpreter
|
||||||
|
server:
|
||||||
|
port: 8321
|
||||||
|
|
|
@ -126,3 +126,5 @@ tool_groups:
|
||||||
provider_id: rag-runtime
|
provider_id: rag-runtime
|
||||||
- toolgroup_id: builtin::code_interpreter
|
- toolgroup_id: builtin::code_interpreter
|
||||||
provider_id: code-interpreter
|
provider_id: code-interpreter
|
||||||
|
server:
|
||||||
|
port: 8321
|
||||||
|
|
|
@ -131,8 +131,15 @@ class DistributionTemplate(BaseModel):
|
||||||
providers_str = ", ".join(f"`{p}`" for p in providers)
|
providers_str = ", ".join(f"`{p}`" for p in providers)
|
||||||
providers_table += f"| {api} | {providers_str} |\n"
|
providers_table += f"| {api} | {providers_str} |\n"
|
||||||
|
|
||||||
template = "<!-- This file was auto-generated by distro_codegen.py, please edit source -->\n"
|
template = self.template_path.read_text()
|
||||||
template += self.template_path.read_text()
|
comment = "<!-- This file was auto-generated by distro_codegen.py, please edit source -->\n"
|
||||||
|
orphantext = "---\norphan: true\n---\n"
|
||||||
|
|
||||||
|
if template.startswith(orphantext):
|
||||||
|
template = template.replace(orphantext, orphantext + comment)
|
||||||
|
else:
|
||||||
|
template = comment + template
|
||||||
|
|
||||||
# Render template with rich-generated table
|
# Render template with rich-generated table
|
||||||
env = jinja2.Environment(
|
env = jinja2.Environment(
|
||||||
trim_blocks=True,
|
trim_blocks=True,
|
||||||
|
|
|
@ -114,3 +114,5 @@ tool_groups:
|
||||||
provider_id: rag-runtime
|
provider_id: rag-runtime
|
||||||
- toolgroup_id: builtin::code_interpreter
|
- toolgroup_id: builtin::code_interpreter
|
||||||
provider_id: code-interpreter
|
provider_id: code-interpreter
|
||||||
|
server:
|
||||||
|
port: 8321
|
||||||
|
|
|
@ -113,3 +113,5 @@ tool_groups:
|
||||||
provider_id: rag-runtime
|
provider_id: rag-runtime
|
||||||
- toolgroup_id: builtin::code_interpreter
|
- toolgroup_id: builtin::code_interpreter
|
||||||
provider_id: code-interpreter
|
provider_id: code-interpreter
|
||||||
|
server:
|
||||||
|
port: 8321
|
||||||
|
|
|
@ -167,3 +167,5 @@ tool_groups:
|
||||||
provider_id: rag-runtime
|
provider_id: rag-runtime
|
||||||
- toolgroup_id: builtin::code_interpreter
|
- toolgroup_id: builtin::code_interpreter
|
||||||
provider_id: code-interpreter
|
provider_id: code-interpreter
|
||||||
|
server:
|
||||||
|
port: 8321
|
||||||
|
|
|
@ -156,3 +156,5 @@ tool_groups:
|
||||||
provider_id: rag-runtime
|
provider_id: rag-runtime
|
||||||
- toolgroup_id: builtin::code_interpreter
|
- toolgroup_id: builtin::code_interpreter
|
||||||
provider_id: code-interpreter
|
provider_id: code-interpreter
|
||||||
|
server:
|
||||||
|
port: 8321
|
||||||
|
|
|
@ -117,3 +117,5 @@ tool_groups:
|
||||||
provider_id: rag-runtime
|
provider_id: rag-runtime
|
||||||
- toolgroup_id: builtin::code_interpreter
|
- toolgroup_id: builtin::code_interpreter
|
||||||
provider_id: code-interpreter
|
provider_id: code-interpreter
|
||||||
|
server:
|
||||||
|
port: 8321
|
||||||
|
|
|
@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
|
||||||
|
|
||||||
[project]
|
[project]
|
||||||
name = "llama_stack"
|
name = "llama_stack"
|
||||||
version = "0.1.1"
|
version = "0.1.2"
|
||||||
authors = [{ name = "Meta Llama", email = "llama-oss@meta.com" }]
|
authors = [{ name = "Meta Llama", email = "llama-oss@meta.com" }]
|
||||||
description = "Llama Stack"
|
description = "Llama Stack"
|
||||||
readme = "README.md"
|
readme = "README.md"
|
||||||
|
@ -25,8 +25,8 @@ dependencies = [
|
||||||
"fire",
|
"fire",
|
||||||
"httpx",
|
"httpx",
|
||||||
"huggingface-hub",
|
"huggingface-hub",
|
||||||
"llama-models>=0.1.1",
|
"llama-models>=0.1.2",
|
||||||
"llama-stack-client>=0.1.1",
|
"llama-stack-client>=0.1.2",
|
||||||
"prompt-toolkit",
|
"prompt-toolkit",
|
||||||
"python-dotenv",
|
"python-dotenv",
|
||||||
"pydantic>=2",
|
"pydantic>=2",
|
||||||
|
|
|
@ -4,6 +4,7 @@ annotated-types==0.7.0
|
||||||
anyio==4.8.0
|
anyio==4.8.0
|
||||||
blobfile==3.0.0
|
blobfile==3.0.0
|
||||||
certifi==2025.1.31
|
certifi==2025.1.31
|
||||||
|
chardet==5.2.0
|
||||||
charset-normalizer==3.4.1
|
charset-normalizer==3.4.1
|
||||||
click==8.1.8
|
click==8.1.8
|
||||||
colorama==0.4.6 ; sys_platform == 'win32'
|
colorama==0.4.6 ; sys_platform == 'win32'
|
||||||
|
@ -18,8 +19,8 @@ httpx==0.28.1
|
||||||
huggingface-hub==0.28.1
|
huggingface-hub==0.28.1
|
||||||
idna==3.10
|
idna==3.10
|
||||||
jinja2==3.1.5
|
jinja2==3.1.5
|
||||||
llama-models==0.1.1
|
llama-models==0.1.2
|
||||||
llama-stack-client==0.1.1
|
llama-stack-client==0.1.2
|
||||||
lxml==5.3.0
|
lxml==5.3.0
|
||||||
markdown-it-py==3.0.0
|
markdown-it-py==3.0.0
|
||||||
markupsafe==3.0.2
|
markupsafe==3.0.2
|
||||||
|
@ -34,6 +35,7 @@ pycryptodomex==3.21.0
|
||||||
pydantic==2.10.6
|
pydantic==2.10.6
|
||||||
pydantic-core==2.27.2
|
pydantic-core==2.27.2
|
||||||
pygments==2.19.1
|
pygments==2.19.1
|
||||||
|
pypdf==5.2.0
|
||||||
python-dateutil==2.9.0.post0
|
python-dateutil==2.9.0.post0
|
||||||
python-dotenv==1.0.1
|
python-dotenv==1.0.1
|
||||||
pytz==2025.1
|
pytz==2025.1
|
||||||
|
|
|
@ -4,18 +4,18 @@ You can run llama stack integration tests on either a Llama Stack Library or a L
|
||||||
To test on a Llama Stack library with certain configuration, run
|
To test on a Llama Stack library with certain configuration, run
|
||||||
```bash
|
```bash
|
||||||
LLAMA_STACK_CONFIG=./llama_stack/templates/cerebras/run.yaml
|
LLAMA_STACK_CONFIG=./llama_stack/templates/cerebras/run.yaml
|
||||||
pytest -s -v tests/client-sdk/inference/test_inference.py
|
pytest -s -v tests/client-sdk/inference/
|
||||||
```
|
```
|
||||||
or just the template name
|
or just the template name
|
||||||
```bash
|
```bash
|
||||||
LLAMA_STACK_CONFIG=together
|
LLAMA_STACK_CONFIG=together
|
||||||
pytest -s -v tests/client-sdk/inference/test_inference.py
|
pytest -s -v tests/client-sdk/inference/
|
||||||
```
|
```
|
||||||
|
|
||||||
To test on a Llama Stack endpoint, run
|
To test on a Llama Stack endpoint, run
|
||||||
```bash
|
```bash
|
||||||
LLAMA_STACK_BASE_URL=http//localhost:8089
|
LLAMA_STACK_BASE_URL=http//localhost:8089
|
||||||
pytest -s -v tests/client-sdk/inference/test_inference.py
|
pytest -s -v tests/client-sdk/inference
|
||||||
```
|
```
|
||||||
|
|
||||||
## Report Generation
|
## Report Generation
|
||||||
|
|
|
@ -263,12 +263,14 @@ def test_custom_tool(llama_stack_client, agent_config):
|
||||||
assert "CustomTool" in logs_str
|
assert "CustomTool" in logs_str
|
||||||
|
|
||||||
|
|
||||||
def test_override_system_message_behavior(llama_stack_client, agent_config):
|
# TODO: fix this flaky test
|
||||||
|
def xtest_override_system_message_behavior(llama_stack_client, agent_config):
|
||||||
client_tool = TestClientTool()
|
client_tool = TestClientTool()
|
||||||
agent_config = {
|
agent_config = {
|
||||||
**agent_config,
|
**agent_config,
|
||||||
"instructions": "You are a pirate",
|
"instructions": "You are a pirate",
|
||||||
"client_tools": [client_tool.get_tool_definition()],
|
"client_tools": [client_tool.get_tool_definition()],
|
||||||
|
"model": "meta-llama/Llama-3.2-3B-Instruct",
|
||||||
}
|
}
|
||||||
|
|
||||||
agent = Agent(llama_stack_client, agent_config, client_tools=(client_tool,))
|
agent = Agent(llama_stack_client, agent_config, client_tools=(client_tool,))
|
||||||
|
|
|
@ -4,9 +4,6 @@
|
||||||
# This source code is licensed under the terms described in the LICENSE file in
|
# This source code is licensed under the terms described in the LICENSE file in
|
||||||
# the root directory of this source tree.
|
# the root directory of this source tree.
|
||||||
|
|
||||||
import base64
|
|
||||||
import pathlib
|
|
||||||
|
|
||||||
import pytest
|
import pytest
|
||||||
from pydantic import BaseModel
|
from pydantic import BaseModel
|
||||||
|
|
||||||
|
@ -14,6 +11,7 @@ PROVIDER_TOOL_PROMPT_FORMAT = {
|
||||||
"remote::ollama": "json",
|
"remote::ollama": "json",
|
||||||
"remote::together": "json",
|
"remote::together": "json",
|
||||||
"remote::fireworks": "json",
|
"remote::fireworks": "json",
|
||||||
|
"remote::vllm": "json",
|
||||||
}
|
}
|
||||||
|
|
||||||
PROVIDER_LOGPROBS_TOP_K = set(
|
PROVIDER_LOGPROBS_TOP_K = set(
|
||||||
|
@ -56,23 +54,6 @@ def get_weather_tool_definition():
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@pytest.fixture
|
|
||||||
def image_path():
|
|
||||||
return pathlib.Path(__file__).parent / "dog.png"
|
|
||||||
|
|
||||||
|
|
||||||
@pytest.fixture
|
|
||||||
def base64_image_data(image_path):
|
|
||||||
# Convert the image to base64
|
|
||||||
return base64.b64encode(image_path.read_bytes()).decode("utf-8")
|
|
||||||
|
|
||||||
|
|
||||||
@pytest.fixture
|
|
||||||
def base64_image_url(base64_image_data, image_path):
|
|
||||||
# suffix includes the ., so we remove it
|
|
||||||
return f"data:image/{image_path.suffix[1:]};base64,{base64_image_data}"
|
|
||||||
|
|
||||||
|
|
||||||
def test_text_completion_non_streaming(llama_stack_client, text_model_id):
|
def test_text_completion_non_streaming(llama_stack_client, text_model_id):
|
||||||
response = llama_stack_client.inference.completion(
|
response = llama_stack_client.inference.completion(
|
||||||
content="Complete the sentence using one word: Roses are red, violets are ",
|
content="Complete the sentence using one word: Roses are red, violets are ",
|
||||||
|
@ -176,8 +157,8 @@ def test_text_completion_structured_output(llama_stack_client, text_model_id, in
|
||||||
@pytest.mark.parametrize(
|
@pytest.mark.parametrize(
|
||||||
"question,expected",
|
"question,expected",
|
||||||
[
|
[
|
||||||
("What are the names of planets in our solar system?", "Earth"),
|
("Which planet do humans live on?", "Earth"),
|
||||||
("What are the names of the planets that have rings around them?", "Saturn"),
|
("Which planet has rings around it with a name starting with letter S?", "Saturn"),
|
||||||
],
|
],
|
||||||
)
|
)
|
||||||
def test_text_chat_completion_non_streaming(llama_stack_client, text_model_id, question, expected):
|
def test_text_chat_completion_non_streaming(llama_stack_client, text_model_id, question, expected):
|
||||||
|
@ -299,101 +280,3 @@ def test_text_chat_completion_structured_output(llama_stack_client, text_model_i
|
||||||
assert answer.last_name == "Jordan"
|
assert answer.last_name == "Jordan"
|
||||||
assert answer.year_of_birth == 1963
|
assert answer.year_of_birth == 1963
|
||||||
assert answer.num_seasons_in_nba == 15
|
assert answer.num_seasons_in_nba == 15
|
||||||
|
|
||||||
|
|
||||||
def test_image_chat_completion_non_streaming(llama_stack_client, vision_model_id):
|
|
||||||
message = {
|
|
||||||
"role": "user",
|
|
||||||
"content": [
|
|
||||||
{
|
|
||||||
"type": "image",
|
|
||||||
"image": {
|
|
||||||
"url": {
|
|
||||||
# TODO: Replace with Github based URI to resources/sample1.jpg
|
|
||||||
"uri": "https://www.healthypawspetinsurance.com/Images/V3/DogAndPuppyInsurance/Dog_CTA_Desktop_HeroImage.jpg"
|
|
||||||
},
|
|
||||||
},
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"type": "text",
|
|
||||||
"text": "Describe what is in this image.",
|
|
||||||
},
|
|
||||||
],
|
|
||||||
}
|
|
||||||
response = llama_stack_client.inference.chat_completion(
|
|
||||||
model_id=vision_model_id,
|
|
||||||
messages=[message],
|
|
||||||
stream=False,
|
|
||||||
)
|
|
||||||
message_content = response.completion_message.content.lower().strip()
|
|
||||||
assert len(message_content) > 0
|
|
||||||
assert any(expected in message_content for expected in {"dog", "puppy", "pup"})
|
|
||||||
|
|
||||||
|
|
||||||
def test_image_chat_completion_streaming(llama_stack_client, vision_model_id):
|
|
||||||
message = {
|
|
||||||
"role": "user",
|
|
||||||
"content": [
|
|
||||||
{
|
|
||||||
"type": "image",
|
|
||||||
"image": {
|
|
||||||
"url": {
|
|
||||||
# TODO: Replace with Github based URI to resources/sample1.jpg
|
|
||||||
"uri": "https://www.healthypawspetinsurance.com/Images/V3/DogAndPuppyInsurance/Dog_CTA_Desktop_HeroImage.jpg"
|
|
||||||
},
|
|
||||||
},
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"type": "text",
|
|
||||||
"text": "Describe what is in this image.",
|
|
||||||
},
|
|
||||||
],
|
|
||||||
}
|
|
||||||
response = llama_stack_client.inference.chat_completion(
|
|
||||||
model_id=vision_model_id,
|
|
||||||
messages=[message],
|
|
||||||
stream=True,
|
|
||||||
)
|
|
||||||
streamed_content = ""
|
|
||||||
for chunk in response:
|
|
||||||
streamed_content += chunk.event.delta.text.lower()
|
|
||||||
assert len(streamed_content) > 0
|
|
||||||
assert any(expected in streamed_content for expected in {"dog", "puppy", "pup"})
|
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.parametrize("type_", ["url", "data"])
|
|
||||||
def test_image_chat_completion_base64(llama_stack_client, vision_model_id, base64_image_data, base64_image_url, type_):
|
|
||||||
image_spec = {
|
|
||||||
"url": {
|
|
||||||
"type": "image",
|
|
||||||
"image": {
|
|
||||||
"url": {
|
|
||||||
"uri": base64_image_url,
|
|
||||||
},
|
|
||||||
},
|
|
||||||
},
|
|
||||||
"data": {
|
|
||||||
"type": "image",
|
|
||||||
"image": {
|
|
||||||
"data": base64_image_data,
|
|
||||||
},
|
|
||||||
},
|
|
||||||
}[type_]
|
|
||||||
|
|
||||||
message = {
|
|
||||||
"role": "user",
|
|
||||||
"content": [
|
|
||||||
image_spec,
|
|
||||||
{
|
|
||||||
"type": "text",
|
|
||||||
"text": "Describe what is in this image.",
|
|
||||||
},
|
|
||||||
],
|
|
||||||
}
|
|
||||||
response = llama_stack_client.inference.chat_completion(
|
|
||||||
model_id=vision_model_id,
|
|
||||||
messages=[message],
|
|
||||||
stream=False,
|
|
||||||
)
|
|
||||||
message_content = response.completion_message.content.lower().strip()
|
|
||||||
assert len(message_content) > 0
|
|
133
tests/client-sdk/inference/test_vision_inference.py
Normal file
133
tests/client-sdk/inference/test_vision_inference.py
Normal file
|
@ -0,0 +1,133 @@
|
||||||
|
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||||
|
# All rights reserved.
|
||||||
|
#
|
||||||
|
# This source code is licensed under the terms described in the LICENSE file in
|
||||||
|
# the root directory of this source tree.
|
||||||
|
|
||||||
|
import base64
|
||||||
|
import pathlib
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture(scope="session")
|
||||||
|
def inference_provider_type(llama_stack_client):
|
||||||
|
providers = llama_stack_client.providers.list()
|
||||||
|
inference_providers = [p for p in providers if p.api == "inference"]
|
||||||
|
assert len(inference_providers) > 0, "No inference providers found"
|
||||||
|
return inference_providers[0].provider_type
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
|
def image_path():
|
||||||
|
return pathlib.Path(__file__).parent / "dog.png"
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
|
def base64_image_data(image_path):
|
||||||
|
# Convert the image to base64
|
||||||
|
return base64.b64encode(image_path.read_bytes()).decode("utf-8")
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
|
def base64_image_url(base64_image_data, image_path):
|
||||||
|
# suffix includes the ., so we remove it
|
||||||
|
return f"data:image/{image_path.suffix[1:]};base64,{base64_image_data}"
|
||||||
|
|
||||||
|
|
||||||
|
def test_image_chat_completion_non_streaming(llama_stack_client, vision_model_id):
|
||||||
|
message = {
|
||||||
|
"role": "user",
|
||||||
|
"content": [
|
||||||
|
{
|
||||||
|
"type": "image",
|
||||||
|
"image": {
|
||||||
|
"url": {
|
||||||
|
# TODO: Replace with Github based URI to resources/sample1.jpg
|
||||||
|
"uri": "https://www.healthypawspetinsurance.com/Images/V3/DogAndPuppyInsurance/Dog_CTA_Desktop_HeroImage.jpg"
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"type": "text",
|
||||||
|
"text": "Describe what is in this image.",
|
||||||
|
},
|
||||||
|
],
|
||||||
|
}
|
||||||
|
response = llama_stack_client.inference.chat_completion(
|
||||||
|
model_id=vision_model_id,
|
||||||
|
messages=[message],
|
||||||
|
stream=False,
|
||||||
|
)
|
||||||
|
message_content = response.completion_message.content.lower().strip()
|
||||||
|
assert len(message_content) > 0
|
||||||
|
assert any(expected in message_content for expected in {"dog", "puppy", "pup"})
|
||||||
|
|
||||||
|
|
||||||
|
def test_image_chat_completion_streaming(llama_stack_client, vision_model_id):
|
||||||
|
message = {
|
||||||
|
"role": "user",
|
||||||
|
"content": [
|
||||||
|
{
|
||||||
|
"type": "image",
|
||||||
|
"image": {
|
||||||
|
"url": {
|
||||||
|
# TODO: Replace with Github based URI to resources/sample1.jpg
|
||||||
|
"uri": "https://www.healthypawspetinsurance.com/Images/V3/DogAndPuppyInsurance/Dog_CTA_Desktop_HeroImage.jpg"
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"type": "text",
|
||||||
|
"text": "Describe what is in this image.",
|
||||||
|
},
|
||||||
|
],
|
||||||
|
}
|
||||||
|
response = llama_stack_client.inference.chat_completion(
|
||||||
|
model_id=vision_model_id,
|
||||||
|
messages=[message],
|
||||||
|
stream=True,
|
||||||
|
)
|
||||||
|
streamed_content = ""
|
||||||
|
for chunk in response:
|
||||||
|
streamed_content += chunk.event.delta.text.lower()
|
||||||
|
assert len(streamed_content) > 0
|
||||||
|
assert any(expected in streamed_content for expected in {"dog", "puppy", "pup"})
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.parametrize("type_", ["url", "data"])
|
||||||
|
def test_image_chat_completion_base64(llama_stack_client, vision_model_id, base64_image_data, base64_image_url, type_):
|
||||||
|
image_spec = {
|
||||||
|
"url": {
|
||||||
|
"type": "image",
|
||||||
|
"image": {
|
||||||
|
"url": {
|
||||||
|
"uri": base64_image_url,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
"data": {
|
||||||
|
"type": "image",
|
||||||
|
"image": {
|
||||||
|
"data": base64_image_data,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
}[type_]
|
||||||
|
|
||||||
|
message = {
|
||||||
|
"role": "user",
|
||||||
|
"content": [
|
||||||
|
image_spec,
|
||||||
|
{
|
||||||
|
"type": "text",
|
||||||
|
"text": "Describe what is in this image.",
|
||||||
|
},
|
||||||
|
],
|
||||||
|
}
|
||||||
|
response = llama_stack_client.inference.chat_completion(
|
||||||
|
model_id=vision_model_id,
|
||||||
|
messages=[message],
|
||||||
|
stream=False,
|
||||||
|
)
|
||||||
|
message_content = response.completion_message.content.lower().strip()
|
||||||
|
assert len(message_content) > 0
|
18
uv.lock
generated
18
uv.lock
generated
|
@ -687,7 +687,7 @@ wheels = [
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "llama-models"
|
name = "llama-models"
|
||||||
version = "0.1.1"
|
version = "0.1.2"
|
||||||
source = { registry = "https://pypi.org/simple" }
|
source = { registry = "https://pypi.org/simple" }
|
||||||
dependencies = [
|
dependencies = [
|
||||||
{ name = "jinja2" },
|
{ name = "jinja2" },
|
||||||
|
@ -696,14 +696,14 @@ dependencies = [
|
||||||
{ name = "pyyaml" },
|
{ name = "pyyaml" },
|
||||||
{ name = "tiktoken" },
|
{ name = "tiktoken" },
|
||||||
]
|
]
|
||||||
sdist = { url = "https://files.pythonhosted.org/packages/df/80/4a4595cf5e55f71c0e15b85ff2f4c04b0742bf664ede062a09c9d383bf7b/llama_models-0.1.1.tar.gz", hash = "sha256:7cb5a9fe38485b47aff4c93e183d6d390a676a7619f3355502576b652f17733a", size = 1608412 }
|
sdist = { url = "https://files.pythonhosted.org/packages/b5/f2/ed8310d4677cd38ab45ffba45aea2a4e9882b640045ad9c3198ac69e5a85/llama_models-0.1.2.tar.gz", hash = "sha256:1266eaec7a8db336e4ed034d2b494189ccb7fd6d6b7aefe874eee749a4340b9b", size = 1608069 }
|
||||||
wheels = [
|
wheels = [
|
||||||
{ url = "https://files.pythonhosted.org/packages/d9/93/d49dd0f0cd37df1a7a7fb25444d010f626cdf42b21eea11d839b0f6a808a/llama_models-0.1.1-py3-none-any.whl", hash = "sha256:7e4f15dc4f6f011852ea2c42f9770b75140f5eca670b32cc67fc0a4605c55f89", size = 1650981 },
|
{ url = "https://files.pythonhosted.org/packages/55/a7/34b9e88ef4109759c8881f43b8006139e3d13d54c440b8c571b253655f54/llama_models-0.1.2-py3-none-any.whl", hash = "sha256:8aa5287d1c6325698991ff677e71148cac347e07493bb5b3ab891e614b89e1f8", size = 1651273 },
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "llama-stack"
|
name = "llama-stack"
|
||||||
version = "0.1.1"
|
version = "0.1.2"
|
||||||
source = { editable = "." }
|
source = { editable = "." }
|
||||||
dependencies = [
|
dependencies = [
|
||||||
{ name = "blobfile" },
|
{ name = "blobfile" },
|
||||||
|
@ -751,8 +751,8 @@ requires-dist = [
|
||||||
{ name = "fire" },
|
{ name = "fire" },
|
||||||
{ name = "httpx" },
|
{ name = "httpx" },
|
||||||
{ name = "huggingface-hub" },
|
{ name = "huggingface-hub" },
|
||||||
{ name = "llama-models", specifier = ">=0.1.1" },
|
{ name = "llama-models", specifier = ">=0.1.2" },
|
||||||
{ name = "llama-stack-client", specifier = ">=0.1.1" },
|
{ name = "llama-stack-client", specifier = ">=0.1.2" },
|
||||||
{ name = "myst-parser", marker = "extra == 'docs'" },
|
{ name = "myst-parser", marker = "extra == 'docs'" },
|
||||||
{ name = "nbval", marker = "extra == 'dev'" },
|
{ name = "nbval", marker = "extra == 'dev'" },
|
||||||
{ name = "pre-commit", marker = "extra == 'dev'" },
|
{ name = "pre-commit", marker = "extra == 'dev'" },
|
||||||
|
@ -780,7 +780,7 @@ requires-dist = [
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "llama-stack-client"
|
name = "llama-stack-client"
|
||||||
version = "0.1.1"
|
version = "0.1.2"
|
||||||
source = { registry = "https://pypi.org/simple" }
|
source = { registry = "https://pypi.org/simple" }
|
||||||
dependencies = [
|
dependencies = [
|
||||||
{ name = "anyio" },
|
{ name = "anyio" },
|
||||||
|
@ -797,9 +797,9 @@ dependencies = [
|
||||||
{ name = "tqdm" },
|
{ name = "tqdm" },
|
||||||
{ name = "typing-extensions" },
|
{ name = "typing-extensions" },
|
||||||
]
|
]
|
||||||
sdist = { url = "https://files.pythonhosted.org/packages/07/42/7004958ac1a6da9a8060decf0d9120fdeb3b2775de090a0a473f2ee4a27d/llama_stack_client-0.1.1.tar.gz", hash = "sha256:3e549a848ade959d342fa52ec49b1913b7bb615a77b5b8dcaefe6ff94409049e", size = 179729 }
|
sdist = { url = "https://files.pythonhosted.org/packages/9e/75/8b41a3026c871a8650cd8d2cfda9f891a9163458813574f36518bb40afe4/llama_stack_client-0.1.2.tar.gz", hash = "sha256:94277ddae52be557d771dcdc15d85af9012b5aa87439dd69ec1dc0ff486b0c8e", size = 188023 }
|
||||||
wheels = [
|
wheels = [
|
||||||
{ url = "https://files.pythonhosted.org/packages/80/66/5255c09dc001ff437fd6fe6fad27142035b60073df243f7df0494095f605/llama_stack_client-0.1.1-py3-none-any.whl", hash = "sha256:e07d58fdcc1eaa370dd00b94c2dd1a8169c0ac60c37f6f2772cbc2c5b63f2e62", size = 348665 },
|
{ url = "https://files.pythonhosted.org/packages/c4/32/3a3a97eecff1f1e3a1dc90e9b00681abea11ec4f43a7ca549981261e18b6/llama_stack_client-0.1.2-py3-none-any.whl", hash = "sha256:85ff0fb57a62d7d0470cfaa2b07a595c9fb3483297944d5e5a066db850d38ccd", size = 359415 },
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue