mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-08-07 02:58:21 +00:00
Merge branch 'main' into fiddlecube-guard
This commit is contained in:
commit
42d6e7e4a1
69 changed files with 721 additions and 367 deletions
4
.github/workflows/pre-commit.yml
vendored
4
.github/workflows/pre-commit.yml
vendored
|
@ -23,3 +23,7 @@ jobs:
|
|||
.pre-commit-config.yaml
|
||||
|
||||
- uses: pre-commit/action@v3.0.1
|
||||
|
||||
- name: Verify if there are any diff files after pre-commit
|
||||
run: |
|
||||
git diff --exit-code || (echo "There are uncommitted changes, run pre-commit locally and commit again" && exit 1)
|
||||
|
|
2
.github/workflows/tests.yml
vendored
2
.github/workflows/tests.yml
vendored
|
@ -54,7 +54,7 @@ jobs:
|
|||
echo "REPORT_FILE=${REPORT_OUTPUT}" >> "$GITHUB_ENV"
|
||||
|
||||
export INFERENCE_MODEL=meta-llama/Llama-3.1-8B-Instruct
|
||||
LLAMA_STACK_CONFIG=./llama_stack/templates/${{ matrix.provider }}/run.yaml pytest --md-report --md-report-verbose=1 ./tests/client-sdk/inference/test_inference.py --md-report-output "$REPORT_OUTPUT"
|
||||
LLAMA_STACK_CONFIG=./llama_stack/templates/${{ matrix.provider }}/run.yaml pytest --md-report --md-report-verbose=1 ./tests/client-sdk/inference/ --md-report-output "$REPORT_OUTPUT"
|
||||
|
||||
- name: Output reports to the job summary
|
||||
if: always()
|
||||
|
|
|
@ -48,6 +48,7 @@ repos:
|
|||
hooks:
|
||||
- id: uv-export
|
||||
args: ["--frozen", "--no-hashes", "--no-emit-project"]
|
||||
- id: uv-sync
|
||||
|
||||
# - repo: https://github.com/pre-commit/mirrors-mypy
|
||||
# rev: v1.14.0
|
||||
|
|
44
CHANGELOG.md
44
CHANGELOG.md
|
@ -1,44 +0,0 @@
|
|||
# Changelog
|
||||
|
||||
## 0.2.0
|
||||
|
||||
### Added
|
||||
|
||||
### Changed
|
||||
|
||||
### Removed
|
||||
|
||||
|
||||
## 0.0.53
|
||||
|
||||
### Added
|
||||
- Resource-oriented design for models, shields, memory banks, datasets and eval tasks
|
||||
- Persistence for registered objects with distribution
|
||||
- Ability to persist memory banks created for FAISS
|
||||
- PostgreSQL KVStore implementation
|
||||
- Environment variable placeholder support in run.yaml files
|
||||
- Comprehensive Zero-to-Hero notebooks and quickstart guides
|
||||
- Support for quantized models in Ollama
|
||||
- Vision models support for Together, Fireworks, Meta-Reference, and Ollama, and vLLM
|
||||
- Bedrock distribution with safety shields support
|
||||
- Evals API with task registration and scoring functions
|
||||
- MMLU and SimpleQA benchmark scoring functions
|
||||
- Huggingface dataset provider integration for benchmarks
|
||||
- Support for custom dataset registration from local paths
|
||||
- Benchmark evaluation CLI tools with visualization tables
|
||||
- RAG evaluation scoring functions and metrics
|
||||
- Local persistence for datasets and eval tasks
|
||||
|
||||
### Changed
|
||||
- Split safety into distinct providers (llama-guard, prompt-guard, code-scanner)
|
||||
- Changed provider naming convention (`impls` → `inline`, `adapters` → `remote`)
|
||||
- Updated API signatures for dataset and eval task registration
|
||||
- Restructured folder organization for providers
|
||||
- Enhanced Docker build configuration
|
||||
- Added version prefixing for REST API routes
|
||||
- Enhanced evaluation task registration workflow
|
||||
- Improved benchmark evaluation output formatting
|
||||
- Restructured evals folder organization for better modularity
|
||||
|
||||
### Removed
|
||||
- `llama stack configure` command
|
32
README.md
32
README.md
|
@ -34,22 +34,22 @@ By reducing friction and complexity, Llama Stack empowers developers to focus on
|
|||
### API Providers
|
||||
Here is a list of the various API providers and available distributions to developers started easily,
|
||||
|
||||
| **API Provider Builder** | **Environments** | **Agents** | **Inference** | **Memory** | **Safety** | **Telemetry** |
|
||||
|:------------------------------------------------------------------------------------------:|:----------------------:|:------------------:|:------------------:|:------------------:|:------------------:|:------------------:|
|
||||
| Meta Reference | Single Node | :heavy_check_mark: | :heavy_check_mark: | :heavy_check_mark: | :heavy_check_mark: | :heavy_check_mark: |
|
||||
| SambaNova | Hosted | | :heavy_check_mark: | | | |
|
||||
| Cerebras | Hosted | | :heavy_check_mark: | | | |
|
||||
| Fireworks | Hosted | :heavy_check_mark: | :heavy_check_mark: | :heavy_check_mark: | | |
|
||||
| AWS Bedrock | Hosted | | :heavy_check_mark: | | :heavy_check_mark: | |
|
||||
| Together | Hosted | :heavy_check_mark: | :heavy_check_mark: | | :heavy_check_mark: | |
|
||||
| Groq | Hosted | | :heavy_check_mark: | | | |
|
||||
| Ollama | Single Node | | :heavy_check_mark: | | | |
|
||||
| TGI | Hosted and Single Node | | :heavy_check_mark: | | | |
|
||||
| NVIDIA NIM | Hosted and Single Node | | :heavy_check_mark: | | | |
|
||||
| Chroma | Single Node | | | :heavy_check_mark: | | |
|
||||
| PG Vector | Single Node | | | :heavy_check_mark: | | |
|
||||
| PyTorch ExecuTorch | On-device iOS | :heavy_check_mark: | :heavy_check_mark: | | | |
|
||||
| vLLM | Hosted and Single Node | | :heavy_check_mark: | | | |
|
||||
| **API Provider Builder** | **Environments** | **Agents** | **Inference** | **Memory** | **Safety** | **Telemetry** |
|
||||
|:------------------------:|:----------------------:|:----------:|:-------------:|:----------:|:----------:|:-------------:|
|
||||
| Meta Reference | Single Node | ✅ | ✅ | ✅ | ✅ | ✅ |
|
||||
| SambaNova | Hosted | | ✅ | | | |
|
||||
| Cerebras | Hosted | | ✅ | | | |
|
||||
| Fireworks | Hosted | ✅ | ✅ | ✅ | | |
|
||||
| AWS Bedrock | Hosted | | ✅ | | ✅ | |
|
||||
| Together | Hosted | ✅ | ✅ | | ✅ | |
|
||||
| Groq | Hosted | | ✅ | | | |
|
||||
| Ollama | Single Node | | ✅ | | | |
|
||||
| TGI | Hosted and Single Node | | ✅ | | | |
|
||||
| NVIDIA NIM | Hosted and Single Node | | ✅ | | | |
|
||||
| Chroma | Single Node | | | ✅ | | |
|
||||
| PG Vector | Single Node | | | ✅ | | |
|
||||
| PyTorch ExecuTorch | On-device iOS | ✅ | ✅ | | | |
|
||||
| vLLM | Hosted and Single Node | | ✅ | | | |
|
||||
|
||||
### Distributions
|
||||
|
||||
|
|
|
@ -69,6 +69,40 @@
|
|||
"fiddlecube": [
|
||||
"httpx"
|
||||
],
|
||||
"dell": [
|
||||
"aiohttp",
|
||||
"aiosqlite",
|
||||
"autoevals",
|
||||
"blobfile",
|
||||
"chardet",
|
||||
"chromadb-client",
|
||||
"datasets",
|
||||
"faiss-cpu",
|
||||
"fastapi",
|
||||
"fire",
|
||||
"httpx",
|
||||
"huggingface_hub",
|
||||
"matplotlib",
|
||||
"nltk",
|
||||
"numpy",
|
||||
"openai",
|
||||
"opentelemetry-exporter-otlp-proto-http",
|
||||
"opentelemetry-sdk",
|
||||
"pandas",
|
||||
"pillow",
|
||||
"psycopg2-binary",
|
||||
"pypdf",
|
||||
"redis",
|
||||
"requests",
|
||||
"scikit-learn",
|
||||
"scipy",
|
||||
"sentencepiece",
|
||||
"tqdm",
|
||||
"transformers",
|
||||
"uvicorn",
|
||||
"sentence-transformers --no-deps",
|
||||
"torch torchvision --index-url https://download.pytorch.org/whl/cpu"
|
||||
],
|
||||
"fireworks": [
|
||||
"aiosqlite",
|
||||
"autoevals",
|
||||
|
@ -255,6 +289,38 @@
|
|||
"sentence-transformers --no-deps",
|
||||
"torch torchvision --index-url https://download.pytorch.org/whl/cpu"
|
||||
],
|
||||
"nvidia": [
|
||||
"aiosqlite",
|
||||
"autoevals",
|
||||
"blobfile",
|
||||
"chardet",
|
||||
"datasets",
|
||||
"faiss-cpu",
|
||||
"fastapi",
|
||||
"fire",
|
||||
"httpx",
|
||||
"matplotlib",
|
||||
"mcp",
|
||||
"nltk",
|
||||
"numpy",
|
||||
"openai",
|
||||
"opentelemetry-exporter-otlp-proto-http",
|
||||
"opentelemetry-sdk",
|
||||
"pandas",
|
||||
"pillow",
|
||||
"psycopg2-binary",
|
||||
"pypdf",
|
||||
"redis",
|
||||
"requests",
|
||||
"scikit-learn",
|
||||
"scipy",
|
||||
"sentencepiece",
|
||||
"tqdm",
|
||||
"transformers",
|
||||
"uvicorn",
|
||||
"sentence-transformers --no-deps",
|
||||
"torch torchvision --index-url https://download.pytorch.org/whl/cpu"
|
||||
],
|
||||
"ollama": [
|
||||
"aiohttp",
|
||||
"aiosqlite",
|
||||
|
@ -322,6 +388,36 @@
|
|||
"sentence-transformers --no-deps",
|
||||
"torch torchvision --index-url https://download.pytorch.org/whl/cpu"
|
||||
],
|
||||
"sambanova": [
|
||||
"aiosqlite",
|
||||
"blobfile",
|
||||
"chardet",
|
||||
"chromadb-client",
|
||||
"faiss-cpu",
|
||||
"fastapi",
|
||||
"fire",
|
||||
"httpx",
|
||||
"matplotlib",
|
||||
"nltk",
|
||||
"numpy",
|
||||
"openai",
|
||||
"opentelemetry-exporter-otlp-proto-http",
|
||||
"opentelemetry-sdk",
|
||||
"pandas",
|
||||
"pillow",
|
||||
"psycopg2-binary",
|
||||
"pypdf",
|
||||
"redis",
|
||||
"requests",
|
||||
"scikit-learn",
|
||||
"scipy",
|
||||
"sentencepiece",
|
||||
"tqdm",
|
||||
"transformers",
|
||||
"uvicorn",
|
||||
"sentence-transformers --no-deps",
|
||||
"torch torchvision --index-url https://download.pytorch.org/whl/cpu"
|
||||
],
|
||||
"tgi": [
|
||||
"aiohttp",
|
||||
"aiosqlite",
|
||||
|
@ -424,101 +520,5 @@
|
|||
"vllm",
|
||||
"sentence-transformers --no-deps",
|
||||
"torch torchvision --index-url https://download.pytorch.org/whl/cpu"
|
||||
],
|
||||
"nvidia": [
|
||||
"aiosqlite",
|
||||
"autoevals",
|
||||
"blobfile",
|
||||
"chardet",
|
||||
"datasets",
|
||||
"faiss-cpu",
|
||||
"fastapi",
|
||||
"fire",
|
||||
"httpx",
|
||||
"matplotlib",
|
||||
"mcp",
|
||||
"nltk",
|
||||
"numpy",
|
||||
"openai",
|
||||
"opentelemetry-exporter-otlp-proto-http",
|
||||
"opentelemetry-sdk",
|
||||
"pandas",
|
||||
"pillow",
|
||||
"psycopg2-binary",
|
||||
"pypdf",
|
||||
"redis",
|
||||
"requests",
|
||||
"scikit-learn",
|
||||
"scipy",
|
||||
"sentencepiece",
|
||||
"tqdm",
|
||||
"transformers",
|
||||
"uvicorn",
|
||||
"sentence-transformers --no-deps",
|
||||
"torch torchvision --index-url https://download.pytorch.org/whl/cpu"
|
||||
],
|
||||
"sambanova": [
|
||||
"aiosqlite",
|
||||
"blobfile",
|
||||
"chardet",
|
||||
"chromadb-client",
|
||||
"faiss-cpu",
|
||||
"fastapi",
|
||||
"fire",
|
||||
"httpx",
|
||||
"matplotlib",
|
||||
"nltk",
|
||||
"numpy",
|
||||
"openai",
|
||||
"opentelemetry-exporter-otlp-proto-http",
|
||||
"opentelemetry-sdk",
|
||||
"pandas",
|
||||
"pillow",
|
||||
"psycopg2-binary",
|
||||
"pypdf",
|
||||
"redis",
|
||||
"requests",
|
||||
"scikit-learn",
|
||||
"scipy",
|
||||
"sentencepiece",
|
||||
"tqdm",
|
||||
"transformers",
|
||||
"uvicorn",
|
||||
"sentence-transformers --no-deps",
|
||||
"torch torchvision --index-url https://download.pytorch.org/whl/cpu"
|
||||
],
|
||||
"dell": [
|
||||
"aiohttp",
|
||||
"aiosqlite",
|
||||
"autoevals",
|
||||
"blobfile",
|
||||
"chardet",
|
||||
"chromadb-client",
|
||||
"datasets",
|
||||
"faiss-cpu",
|
||||
"fastapi",
|
||||
"fire",
|
||||
"httpx",
|
||||
"huggingface_hub",
|
||||
"matplotlib",
|
||||
"nltk",
|
||||
"numpy",
|
||||
"openai",
|
||||
"opentelemetry-exporter-otlp-proto-http",
|
||||
"opentelemetry-sdk",
|
||||
"pandas",
|
||||
"pillow",
|
||||
"psycopg2-binary",
|
||||
"pypdf",
|
||||
"redis",
|
||||
"requests",
|
||||
"scikit-learn",
|
||||
"scipy",
|
||||
"sentencepiece",
|
||||
"tqdm",
|
||||
"transformers",
|
||||
"uvicorn",
|
||||
"sentence-transformers --no-deps",
|
||||
"torch torchvision --index-url https://download.pytorch.org/whl/cpu"
|
||||
]
|
||||
}
|
||||
|
|
9
docs/conftest.py
Normal file
9
docs/conftest.py
Normal file
|
@ -0,0 +1,9 @@
|
|||
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
# All rights reserved.
|
||||
#
|
||||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
||||
def pytest_collection_modifyitems(items):
|
||||
for item in items:
|
||||
item.name = item.name.replace(' ', '_')
|
|
@ -86,7 +86,6 @@
|
|||
"# NBVAL_SKIP\n",
|
||||
"\n",
|
||||
"!apt-get install -y bubblewrap\n",
|
||||
"# install a branch of llama stack\n",
|
||||
"import os\n",
|
||||
"os.environ[\"UV_SYSTEM_PYTHON\"] = \"1\"\n",
|
||||
"!pip install uv\n",
|
||||
|
@ -3397,6 +3396,231 @@
|
|||
"response = client.scoring.score(input_rows=rows, scoring_functions=scoring_params)\n",
|
||||
"pprint(response)\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "ad077440",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## 4. Image Understanding with Llama 3.2\n",
|
||||
"\n",
|
||||
"Below is a complete example of using Together's Llama Stack 0.1 server at https://llama-stack.together.ai to ask Llama 3.2 questions about an image."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "82e381ec",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### 4.1 Setup and helpers\n",
|
||||
"\n",
|
||||
"Below we install the Llama Stack client 0.1, download the example image, define two image helpers, and set Llama Stack Together server URL and Llama 3.2 model name.\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "865fc5a8",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"!pip install llama-stack-client==0.1.0"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "44e05e16",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"!wget https://raw.githubusercontent.com/meta-llama/llama-models/refs/heads/main/Llama_Repo.jpeg"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "469750f7",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from PIL import Image\n",
|
||||
"import matplotlib.pyplot as plt\n",
|
||||
"\n",
|
||||
"def display_image(path):\n",
|
||||
" img = Image.open(path)\n",
|
||||
" plt.imshow(img)\n",
|
||||
" plt.axis('off')\n",
|
||||
" plt.show()\n",
|
||||
"\n",
|
||||
"display_image(\"Llama_Repo.jpeg\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "a2c1e1c2",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import base64\n",
|
||||
"\n",
|
||||
"def encode_image(image_path):\n",
|
||||
" with open(image_path, \"rb\") as image_file:\n",
|
||||
" base64_string = base64.b64encode(image_file.read()).decode(\"utf-8\")\n",
|
||||
" base64_url = f\"data:image/png;base64,{base64_string}\"\n",
|
||||
" return base64_url"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "c565f99e",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from llama_stack_client import LlamaStackClient\n",
|
||||
"\n",
|
||||
"LLAMA_STACK_API_TOGETHER_URL=\"https://llama-stack.together.ai\"\n",
|
||||
"LLAMA32_11B_INSTRUCT = \"meta-llama/Llama-3.2-11B-Vision-Instruct\""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "7737cd41",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### 4.2 Using Llama Stack Chat API\n",
|
||||
"\n",
|
||||
"The code below uses the Llama Stack 0.1's chat API to interact with Llama 3.2:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "d7914894",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from llama_stack_client.lib.inference.event_logger import EventLogger\n",
|
||||
"\n",
|
||||
"async def run_main(image_path: str, prompt):\n",
|
||||
" client = LlamaStackClient(\n",
|
||||
" base_url=LLAMA_STACK_API_TOGETHER_URL,\n",
|
||||
" )\n",
|
||||
"\n",
|
||||
" message = {\n",
|
||||
" \"role\": \"user\",\n",
|
||||
" \"content\": [\n",
|
||||
" {\n",
|
||||
" \"type\": \"image\",\n",
|
||||
" \"image\": {\n",
|
||||
" \"url\": {\n",
|
||||
" \"uri\": encode_image(image_path)\n",
|
||||
" }\n",
|
||||
" }\n",
|
||||
" },\n",
|
||||
" {\n",
|
||||
" \"type\": \"text\",\n",
|
||||
" \"text\": prompt,\n",
|
||||
" }\n",
|
||||
" ]\n",
|
||||
" }\n",
|
||||
"\n",
|
||||
" response = client.inference.chat_completion(\n",
|
||||
" messages=[message],\n",
|
||||
" model_id=LLAMA32_11B_INSTRUCT,\n",
|
||||
" stream=False,\n",
|
||||
" )\n",
|
||||
"\n",
|
||||
" print(response.completion_message.content.lower().strip())"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "4ee09b97",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"await run_main(\"Llama_Repo.jpeg\",\n",
|
||||
" \"How many different colors are those llamas?\\\n",
|
||||
" What are those colors?\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "e741d7b9",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### 4.3 Using Llama Stack Agent API\n",
|
||||
"\n",
|
||||
"The code below uses the Llama Stack 0.1's Agent API to interact with Llama 3.2:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "f9a83275",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from llama_stack_client.lib.agents.agent import Agent\n",
|
||||
"from llama_stack_client.lib.agents.event_logger import EventLogger\n",
|
||||
"from llama_stack_client.types.agent_create_params import AgentConfig\n",
|
||||
"\n",
|
||||
"async def run_main(image_path, prompt):\n",
|
||||
" base64_image = encode_image(image_path)\n",
|
||||
"\n",
|
||||
" client = LlamaStackClient(\n",
|
||||
" base_url=LLAMA_STACK_API_TOGETHER_URL,\n",
|
||||
" )\n",
|
||||
"\n",
|
||||
" agent_config = AgentConfig(\n",
|
||||
" model=LLAMA32_11B_INSTRUCT,\n",
|
||||
" instructions=\"You are a helpful assistant\",\n",
|
||||
" enable_session_persistence=False,\n",
|
||||
" )\n",
|
||||
"\n",
|
||||
" agent = Agent(client, agent_config)\n",
|
||||
" session_id = agent.create_session(\"test-session\")\n",
|
||||
"\n",
|
||||
" response = agent.create_turn(\n",
|
||||
" messages=[{\n",
|
||||
" \"role\": \"user\",\n",
|
||||
" \"content\": [\n",
|
||||
" {\n",
|
||||
" \"type\": \"image\",\n",
|
||||
" \"image\": {\n",
|
||||
" \"url\": {\n",
|
||||
" \"uri\": encode_image(image_path)\n",
|
||||
" }\n",
|
||||
" }\n",
|
||||
" },\n",
|
||||
" {\n",
|
||||
" \"type\": \"text\",\n",
|
||||
" \"text\": prompt,\n",
|
||||
" }\n",
|
||||
" ]\n",
|
||||
" }],\n",
|
||||
" session_id=session_id,\n",
|
||||
" )\n",
|
||||
"\n",
|
||||
" for log in EventLogger().log(response):\n",
|
||||
" log.print()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "15d0098b",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"await run_main(\"Llama_Repo.jpeg\",\n",
|
||||
" \"How many different colors are those llamas?\\\n",
|
||||
" What are those colors?\")"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
|
|
|
@ -4,7 +4,7 @@ Llama Stack provides all the building blocks needed to create sophisticated AI a
|
|||
|
||||
The best way to get started is to look at this notebook which walks through the various APIs (from basic inference, to RAG agents) and how to use them.
|
||||
|
||||
**Notebook**: [Building AI Applications](https://github.com/meta-llama/llama-stack/blob/main/docs/notebooks/Llama_Stack_Benchmark_Evals.ipynb)
|
||||
**Notebook**: [Building AI Applications](https://github.com/meta-llama/llama-stack/blob/main/docs/getting_started.ipynb)
|
||||
|
||||
Here are some key topics that will help you build effective agents:
|
||||
|
||||
|
|
|
@ -36,13 +36,12 @@ chunks = [
|
|||
"content": "Your document text here",
|
||||
"mime_type": "text/plain",
|
||||
},
|
||||
...,
|
||||
]
|
||||
client.vector_io.insert(vector_db_id, chunks)
|
||||
client.vector_io.insert(vector_db_id=vector_db_id, chunks=chunks)
|
||||
|
||||
# You can then query for these chunks
|
||||
chunks_response = client.vector_io.query(
|
||||
vector_db_id, query="What do you know about..."
|
||||
vector_db_id=vector_db_id, query="What do you know about..."
|
||||
)
|
||||
```
|
||||
|
||||
|
@ -72,8 +71,8 @@ client.tool_runtime.rag_tool.insert(
|
|||
|
||||
# Query documents
|
||||
results = client.tool_runtime.rag_tool.query(
|
||||
vector_db_id=vector_db_id,
|
||||
query="What do you know about...",
|
||||
vector_db_ids=[vector_db_id],
|
||||
content="What do you know about...",
|
||||
)
|
||||
```
|
||||
|
||||
|
@ -82,10 +81,14 @@ results = client.tool_runtime.rag_tool.query(
|
|||
One of the most powerful patterns is combining agents with RAG capabilities. Here's a complete example:
|
||||
|
||||
```python
|
||||
from llama_stack_client.types.agent_create_params import AgentConfig
|
||||
from llama_stack_client.lib.agents.agent import Agent
|
||||
|
||||
# Configure agent with memory
|
||||
agent_config = AgentConfig(
|
||||
model="Llama3.2-3B-Instruct",
|
||||
model="meta-llama/Llama-3.2-3B-Instruct",
|
||||
instructions="You are a helpful assistant",
|
||||
enable_session_persistence=False,
|
||||
toolgroups=[
|
||||
{
|
||||
"name": "builtin::rag",
|
||||
|
@ -105,10 +108,10 @@ response = agent.create_turn(
|
|||
{"role": "user", "content": "I am providing some documents for reference."}
|
||||
],
|
||||
documents=[
|
||||
dict(
|
||||
content="https://raw.githubusercontent.com/example/doc.rst",
|
||||
mime_type="text/plain",
|
||||
)
|
||||
{
|
||||
"content": "https://raw.githubusercontent.com/example/doc.rst",
|
||||
"mime_type": "text/plain",
|
||||
}
|
||||
],
|
||||
session_id=session_id,
|
||||
)
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
<!-- This file was auto-generated by distro_codegen.py, please edit source -->
|
||||
---
|
||||
orphan: true
|
||||
---
|
||||
<!-- This file was auto-generated by distro_codegen.py, please edit source -->
|
||||
|
||||
# Dell Distribution of Llama Stack
|
||||
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
<!-- This file was auto-generated by distro_codegen.py, please edit source -->
|
||||
---
|
||||
orphan: true
|
||||
---
|
||||
<!-- This file was auto-generated by distro_codegen.py, please edit source -->
|
||||
# Fireworks Distribution
|
||||
|
||||
```{toctree}
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
<!-- This file was auto-generated by distro_codegen.py, please edit source -->
|
||||
---
|
||||
orphan: true
|
||||
---
|
||||
<!-- This file was auto-generated by distro_codegen.py, please edit source -->
|
||||
# Meta Reference Distribution
|
||||
|
||||
```{toctree}
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
<!-- This file was auto-generated by distro_codegen.py, please edit source -->
|
||||
---
|
||||
orphan: true
|
||||
---
|
||||
<!-- This file was auto-generated by distro_codegen.py, please edit source -->
|
||||
# Meta Reference Quantized Distribution
|
||||
|
||||
```{toctree}
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
<!-- This file was auto-generated by distro_codegen.py, please edit source -->
|
||||
---
|
||||
orphan: true
|
||||
---
|
||||
<!-- This file was auto-generated by distro_codegen.py, please edit source -->
|
||||
# Ollama Distribution
|
||||
|
||||
```{toctree}
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
<!-- This file was auto-generated by distro_codegen.py, please edit source -->
|
||||
---
|
||||
orphan: true
|
||||
---
|
||||
<!-- This file was auto-generated by distro_codegen.py, please edit source -->
|
||||
# Remote vLLM Distribution
|
||||
```{toctree}
|
||||
:maxdepth: 2
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
<!-- This file was auto-generated by distro_codegen.py, please edit source -->
|
||||
---
|
||||
orphan: true
|
||||
---
|
||||
<!-- This file was auto-generated by distro_codegen.py, please edit source -->
|
||||
# SambaNova Distribution
|
||||
|
||||
```{toctree}
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
<!-- This file was auto-generated by distro_codegen.py, please edit source -->
|
||||
---
|
||||
orphan: true
|
||||
---
|
||||
<!-- This file was auto-generated by distro_codegen.py, please edit source -->
|
||||
|
||||
# TGI Distribution
|
||||
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
<!-- This file was auto-generated by distro_codegen.py, please edit source -->
|
||||
---
|
||||
orphan: true
|
||||
---
|
||||
<!-- This file was auto-generated by distro_codegen.py, please edit source -->
|
||||
# Together Distribution
|
||||
|
||||
```{toctree}
|
||||
|
|
|
@ -2,7 +2,7 @@
|
|||
```{admonition} News
|
||||
:class: tip
|
||||
|
||||
Llama Stack 0.1.1 is now available! See the [release notes](https://github.com/meta-llama/llama-stack/releases/tag/v0.1.1) for more details.
|
||||
Llama Stack 0.1.2 is now available! See the [release notes](https://github.com/meta-llama/llama-stack/releases/tag/v0.1.2) for more details.
|
||||
```
|
||||
|
||||
# Llama Stack
|
||||
|
|
|
@ -22,9 +22,9 @@ class StackListProviders(Subcommand):
|
|||
self.parser.set_defaults(func=self._run_providers_list_cmd)
|
||||
|
||||
def _add_arguments(self):
|
||||
from llama_stack.distribution.datatypes import Api
|
||||
from llama_stack.distribution.distribution import providable_apis
|
||||
|
||||
api_values = [a.value for a in Api]
|
||||
api_values = [api.value for api in providable_apis()]
|
||||
self.parser.add_argument(
|
||||
"api",
|
||||
type=str,
|
||||
|
|
|
@ -55,6 +55,16 @@ class StackRun(Subcommand):
|
|||
default=[],
|
||||
metavar="KEY=VALUE",
|
||||
)
|
||||
self.parser.add_argument(
|
||||
"--tls-keyfile",
|
||||
type=str,
|
||||
help="Path to TLS key file for HTTPS",
|
||||
)
|
||||
self.parser.add_argument(
|
||||
"--tls-certfile",
|
||||
type=str,
|
||||
help="Path to TLS certificate file for HTTPS",
|
||||
)
|
||||
|
||||
def _run_stack_run_cmd(self, args: argparse.Namespace) -> None:
|
||||
import importlib.resources
|
||||
|
@ -178,4 +188,7 @@ class StackRun(Subcommand):
|
|||
return
|
||||
run_args.extend(["--env", f"{key}={value}"])
|
||||
|
||||
if args.tls_keyfile and args.tls_certfile:
|
||||
run_args.extend(["--tls-keyfile", args.tls_keyfile, "--tls-certfile", args.tls_certfile])
|
||||
|
||||
run_with_pty(run_args)
|
||||
|
|
|
@ -117,6 +117,23 @@ class Provider(BaseModel):
|
|||
config: Dict[str, Any]
|
||||
|
||||
|
||||
class ServerConfig(BaseModel):
|
||||
port: int = Field(
|
||||
default=8321,
|
||||
description="Port to listen on",
|
||||
ge=1024,
|
||||
le=65535,
|
||||
)
|
||||
tls_certfile: Optional[str] = Field(
|
||||
default=None,
|
||||
description="Path to TLS certificate file for HTTPS",
|
||||
)
|
||||
tls_keyfile: Optional[str] = Field(
|
||||
default=None,
|
||||
description="Path to TLS key file for HTTPS",
|
||||
)
|
||||
|
||||
|
||||
class StackRunConfig(BaseModel):
|
||||
version: str = LLAMA_STACK_RUN_CONFIG_VERSION
|
||||
|
||||
|
@ -159,6 +176,11 @@ a default SQLite store will be used.""",
|
|||
eval_tasks: List[EvalTaskInput] = Field(default_factory=list)
|
||||
tool_groups: List[ToolGroupInput] = Field(default_factory=list)
|
||||
|
||||
server: ServerConfig = Field(
|
||||
default_factory=ServerConfig,
|
||||
description="Configuration for the HTTP(S) server",
|
||||
)
|
||||
|
||||
|
||||
class BuildConfig(BaseModel):
|
||||
version: str = LLAMA_STACK_BUILD_CONFIG_VERSION
|
||||
|
|
|
@ -17,17 +17,6 @@ from typing import Any, get_args, get_origin, Optional, TypeVar
|
|||
|
||||
import httpx
|
||||
import yaml
|
||||
from llama_stack_client import (
|
||||
APIResponse,
|
||||
AsyncAPIResponse,
|
||||
AsyncLlamaStackClient,
|
||||
AsyncStream,
|
||||
LlamaStackClient,
|
||||
NOT_GIVEN,
|
||||
)
|
||||
from pydantic import BaseModel, TypeAdapter
|
||||
from rich.console import Console
|
||||
from termcolor import cprint
|
||||
|
||||
from llama_stack.distribution.build import print_pip_install_help
|
||||
from llama_stack.distribution.configure import parse_and_maybe_upgrade_config
|
||||
|
@ -46,6 +35,17 @@ from llama_stack.providers.utils.telemetry.tracing import (
|
|||
setup_logger,
|
||||
start_trace,
|
||||
)
|
||||
from llama_stack_client import (
|
||||
APIResponse,
|
||||
AsyncAPIResponse,
|
||||
AsyncLlamaStackClient,
|
||||
AsyncStream,
|
||||
LlamaStackClient,
|
||||
NOT_GIVEN,
|
||||
)
|
||||
from pydantic import BaseModel, TypeAdapter
|
||||
from rich.console import Console
|
||||
from termcolor import cprint
|
||||
|
||||
T = TypeVar("T")
|
||||
|
||||
|
@ -198,6 +198,7 @@ class AsyncLlamaStackAsLibraryClient(AsyncLlamaStackClient):
|
|||
|
||||
async def initialize(self) -> bool:
|
||||
try:
|
||||
self.endpoint_impls = None
|
||||
self.impls = await construct_stack(self.config, self.custom_provider_registry)
|
||||
except ModuleNotFoundError as _e:
|
||||
cprint(_e.msg, "red")
|
||||
|
@ -213,7 +214,7 @@ class AsyncLlamaStackAsLibraryClient(AsyncLlamaStackClient):
|
|||
f"Please run:\n\n{prefix}llama stack build --template {self.config_path_or_template_name} --image-type venv\n\n",
|
||||
"yellow",
|
||||
)
|
||||
return False
|
||||
raise _e
|
||||
|
||||
if Api.telemetry in self.impls:
|
||||
setup_logger(self.impls[Api.telemetry])
|
||||
|
|
|
@ -282,8 +282,19 @@ def main():
|
|||
action="append",
|
||||
help="Environment variables in KEY=value format. Can be specified multiple times.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--tls-keyfile",
|
||||
help="Path to TLS key file for HTTPS",
|
||||
required="--tls-certfile" in sys.argv,
|
||||
)
|
||||
parser.add_argument(
|
||||
"--tls-certfile",
|
||||
help="Path to TLS certificate file for HTTPS",
|
||||
required="--tls-keyfile" in sys.argv,
|
||||
)
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
if args.env:
|
||||
for env_pair in args.env:
|
||||
try:
|
||||
|
@ -381,11 +392,36 @@ def main():
|
|||
|
||||
import uvicorn
|
||||
|
||||
# FYI this does not do hot-reloads
|
||||
# Configure SSL if certificates are provided
|
||||
port = args.port or config.server.port
|
||||
|
||||
ssl_config = None
|
||||
if args.tls_keyfile:
|
||||
keyfile = args.tls_keyfile
|
||||
certfile = args.tls_certfile
|
||||
else:
|
||||
keyfile = config.server.tls_keyfile
|
||||
certfile = config.server.tls_certfile
|
||||
|
||||
if keyfile and certfile:
|
||||
ssl_config = {
|
||||
"ssl_keyfile": keyfile,
|
||||
"ssl_certfile": certfile,
|
||||
}
|
||||
print(f"HTTPS enabled with certificates:\n Key: {keyfile}\n Cert: {certfile}")
|
||||
|
||||
listen_host = ["::", "0.0.0.0"] if not args.disable_ipv6 else "0.0.0.0"
|
||||
print(f"Listening on {listen_host}:{args.port}")
|
||||
uvicorn.run(app, host=listen_host, port=args.port)
|
||||
print(f"Listening on {listen_host}:{port}")
|
||||
|
||||
uvicorn_config = {
|
||||
"app": app,
|
||||
"host": listen_host,
|
||||
"port": port,
|
||||
}
|
||||
if ssl_config:
|
||||
uvicorn_config.update(ssl_config)
|
||||
|
||||
uvicorn.run(**uvicorn_config)
|
||||
|
||||
|
||||
def extract_path_params(route: str) -> List[str]:
|
||||
|
|
|
@ -34,6 +34,7 @@ shift
|
|||
|
||||
# Process environment variables from --env arguments
|
||||
env_vars=""
|
||||
other_args=""
|
||||
while [[ $# -gt 0 ]]; do
|
||||
case "$1" in
|
||||
--env)
|
||||
|
@ -48,6 +49,7 @@ while [[ $# -gt 0 ]]; do
|
|||
fi
|
||||
;;
|
||||
*)
|
||||
other_args="$other_args $1"
|
||||
shift
|
||||
;;
|
||||
esac
|
||||
|
@ -61,4 +63,5 @@ $CONDA_PREFIX/bin/python \
|
|||
-m llama_stack.distribution.server.server \
|
||||
--yaml-config "$yaml_config" \
|
||||
--port "$port" \
|
||||
$env_vars
|
||||
$env_vars \
|
||||
$other_args
|
||||
|
|
|
@ -40,8 +40,12 @@ shift
|
|||
port="$1"
|
||||
shift
|
||||
|
||||
# Initialize other_args
|
||||
other_args=""
|
||||
|
||||
# Process environment variables from --env arguments
|
||||
env_vars=""
|
||||
|
||||
while [[ $# -gt 0 ]]; do
|
||||
case "$1" in
|
||||
--env)
|
||||
|
@ -55,6 +59,7 @@ while [[ $# -gt 0 ]]; do
|
|||
fi
|
||||
;;
|
||||
*)
|
||||
other_args="$other_args $1"
|
||||
shift
|
||||
;;
|
||||
esac
|
||||
|
@ -93,5 +98,8 @@ $CONTAINER_BINARY run $CONTAINER_OPTS -it \
|
|||
-v "$yaml_config:/app/config.yaml" \
|
||||
$mounts \
|
||||
--env LLAMA_STACK_PORT=$port \
|
||||
--entrypoint='["python", "-m", "llama_stack.distribution.server.server", "--yaml-config", "/app/config.yaml"]' \
|
||||
$container_image:$version_tag
|
||||
--entrypoint python \
|
||||
$container_image:$version_tag \
|
||||
-m llama_stack.distribution.server.server \
|
||||
--yaml-config /app/config.yaml \
|
||||
$other_args
|
||||
|
|
|
@ -67,7 +67,6 @@ def generate_bwrap_command(bind_dirs: List[str]) -> str:
|
|||
@dataclass
|
||||
class CodeExecutionContext:
|
||||
matplotlib_dump_dir: str
|
||||
use_proxy: bool = False
|
||||
|
||||
|
||||
@dataclass
|
||||
|
|
|
@ -26,6 +26,7 @@ from llama_stack.apis.inference import (
|
|||
Message,
|
||||
ResponseFormat,
|
||||
ToolChoice,
|
||||
ToolConfig,
|
||||
)
|
||||
from llama_stack.distribution.request_headers import NeedsRequestProviderData
|
||||
from llama_stack.providers.remote.inference.groq.config import GroqConfig
|
||||
|
|
|
@ -352,24 +352,20 @@ class OllamaInferenceAdapter(Inference, ModelsProtocolPrivate):
|
|||
return EmbeddingsResponse(embeddings=embeddings)
|
||||
|
||||
async def register_model(self, model: Model) -> Model:
|
||||
# ollama does not have embedding models running. Check if the model is in list of available models.
|
||||
if model.model_type == ModelType.embedding:
|
||||
response = await self.client.list()
|
||||
async def check_model_availability(model_id: str):
|
||||
response = await self.client.ps()
|
||||
available_models = [m["model"] for m in response["models"]]
|
||||
if model.provider_resource_id not in available_models:
|
||||
if model_id not in available_models:
|
||||
raise ValueError(
|
||||
f"Model '{model.provider_resource_id}' is not available in Ollama. "
|
||||
f"Available models: {', '.join(available_models)}"
|
||||
f"Model '{model_id}' is not available in Ollama. Available models: {', '.join(available_models)}"
|
||||
)
|
||||
|
||||
if model.model_type == ModelType.embedding:
|
||||
await check_model_availability(model.provider_resource_id)
|
||||
return model
|
||||
|
||||
model = await self.register_helper.register_model(model)
|
||||
models = await self.client.ps()
|
||||
available_models = [m["model"] for m in models["models"]]
|
||||
if model.provider_resource_id not in available_models:
|
||||
raise ValueError(
|
||||
f"Model '{model.provider_resource_id}' is not available in Ollama. "
|
||||
f"Available models: {', '.join(available_models)}"
|
||||
)
|
||||
await check_model_availability(model.provider_resource_id)
|
||||
|
||||
return model
|
||||
|
||||
|
|
|
@ -12,8 +12,8 @@ from .config import QdrantConfig
|
|||
|
||||
|
||||
async def get_adapter_impl(config: QdrantConfig, deps: Dict[Api, ProviderSpec]):
|
||||
from .qdrant import QdrantVectorMemoryAdapter
|
||||
from .qdrant import QdrantVectorDBAdapter
|
||||
|
||||
impl = QdrantVectorMemoryAdapter(config, deps[Api.inference])
|
||||
impl = QdrantVectorDBAdapter(config, deps[Api.inference])
|
||||
await impl.initialize()
|
||||
return impl
|
||||
|
|
|
@ -55,7 +55,7 @@ class QdrantIndex(EmbeddingIndex):
|
|||
|
||||
points = []
|
||||
for i, (chunk, embedding) in enumerate(zip(chunks, embeddings)):
|
||||
chunk_id = f"{chunk.document_id}:chunk-{i}"
|
||||
chunk_id = f"{chunk.metadata['document_id']}:chunk-{i}"
|
||||
points.append(
|
||||
PointStruct(
|
||||
id=convert_id(chunk_id),
|
||||
|
@ -93,6 +93,9 @@ class QdrantIndex(EmbeddingIndex):
|
|||
|
||||
return QueryChunksResponse(chunks=chunks, scores=scores)
|
||||
|
||||
async def delete(self):
|
||||
await self.client.delete_collection(collection_name=self.collection_name)
|
||||
|
||||
|
||||
class QdrantVectorDBAdapter(VectorIO, VectorDBsProtocolPrivate):
|
||||
def __init__(self, config: QdrantConfig, inference_api: Api.inference) -> None:
|
||||
|
|
|
@ -95,7 +95,7 @@ class TestDatasetIO:
|
|||
assert len(response) == 1
|
||||
assert response[0].identifier == "test_dataset"
|
||||
|
||||
with pytest.raises(Exception) as exc_info:
|
||||
with pytest.raises(ValueError):
|
||||
# unregister a dataset that does not exist
|
||||
await datasets_impl.unregister_dataset("test_dataset2")
|
||||
|
||||
|
@ -104,7 +104,7 @@ class TestDatasetIO:
|
|||
assert isinstance(response, list)
|
||||
assert len(response) == 0
|
||||
|
||||
with pytest.raises(Exception) as exc_info:
|
||||
with pytest.raises(ValueError):
|
||||
await datasets_impl.unregister_dataset("test_dataset")
|
||||
|
||||
@pytest.mark.asyncio
|
||||
|
|
|
@ -32,7 +32,7 @@ class TestModelRegistration:
|
|||
)
|
||||
|
||||
# Try to register a model that's too large for local inference
|
||||
with pytest.raises(ValueError) as exc_info:
|
||||
with pytest.raises(ValueError):
|
||||
await models_impl.register_model(
|
||||
model_id="Llama3.1-70B-Instruct",
|
||||
)
|
||||
|
@ -42,7 +42,7 @@ class TestModelRegistration:
|
|||
_, models_impl = inference_stack
|
||||
|
||||
# Try to register a non-existent model
|
||||
with pytest.raises(Exception) as exc_info:
|
||||
with pytest.raises(ValueError):
|
||||
await models_impl.register_model(
|
||||
model_id="Llama3-NonExistent-Model",
|
||||
)
|
||||
|
@ -59,7 +59,7 @@ class TestModelRegistration:
|
|||
},
|
||||
)
|
||||
|
||||
with pytest.raises(ValueError) as exc_info:
|
||||
with pytest.raises(ValueError):
|
||||
await models_impl.register_model(
|
||||
model_id="custom-model-2",
|
||||
metadata={
|
||||
|
@ -88,7 +88,7 @@ class TestModelRegistration:
|
|||
async def test_register_with_invalid_llama_model(self, inference_stack):
|
||||
_, models_impl = inference_stack
|
||||
|
||||
with pytest.raises(ValueError) as exc_info:
|
||||
with pytest.raises(ValueError):
|
||||
await models_impl.register_model(
|
||||
model_id="custom-model-2",
|
||||
metadata={"llama_model": "invalid-llama-model"},
|
||||
|
|
|
@ -4,12 +4,12 @@
|
|||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
||||
import base64
|
||||
from pathlib import Path
|
||||
|
||||
import pytest
|
||||
|
||||
from llama_stack.apis.common.content_types import ImageContentItem, TextContentItem, URL
|
||||
|
||||
from llama_stack.apis.common.content_types import URL, ImageContentItem, TextContentItem
|
||||
from llama_stack.apis.inference import (
|
||||
ChatCompletionResponse,
|
||||
ChatCompletionResponseEventType,
|
||||
|
@ -23,7 +23,7 @@ from .utils import group_chunks
|
|||
THIS_DIR = Path(__file__).parent
|
||||
|
||||
with open(THIS_DIR / "pasta.jpeg", "rb") as f:
|
||||
PASTA_IMAGE = f.read()
|
||||
PASTA_IMAGE = base64.b64encode(f.read()).decode("utf-8")
|
||||
|
||||
|
||||
class TestVisionModelInference:
|
||||
|
|
|
@ -29,7 +29,7 @@ def find_template_dirs(templates_dir: Path) -> Iterator[Path]:
|
|||
if not templates_dir.exists():
|
||||
raise FileNotFoundError(f"Templates directory not found: {templates_dir}")
|
||||
|
||||
return (d for d in templates_dir.iterdir() if d.is_dir() and d.name != "__pycache__")
|
||||
return sorted(d for d in templates_dir.iterdir() if d.is_dir() and d.name != "__pycache__")
|
||||
|
||||
|
||||
def process_template(template_dir: Path, progress) -> None:
|
||||
|
|
|
@ -115,3 +115,5 @@ tool_groups:
|
|||
provider_id: rag-runtime
|
||||
- toolgroup_id: builtin::code_interpreter
|
||||
provider_id: code-interpreter
|
||||
server:
|
||||
port: 8321
|
||||
|
|
|
@ -117,3 +117,5 @@ tool_groups:
|
|||
provider_id: rag-runtime
|
||||
- toolgroup_id: builtin::code_interpreter
|
||||
provider_id: code-interpreter
|
||||
server:
|
||||
port: 8321
|
||||
|
|
|
@ -116,3 +116,5 @@ tool_groups:
|
|||
provider_id: rag-runtime
|
||||
- toolgroup_id: builtin::code_interpreter
|
||||
provider_id: code-interpreter
|
||||
server:
|
||||
port: 8321
|
||||
|
|
|
@ -107,3 +107,5 @@ tool_groups:
|
|||
provider_id: rag-runtime
|
||||
- toolgroup_id: builtin::code_interpreter
|
||||
provider_id: code-interpreter
|
||||
server:
|
||||
port: 8321
|
||||
|
|
|
@ -172,3 +172,5 @@ tool_groups:
|
|||
provider_id: rag-runtime
|
||||
- toolgroup_id: builtin::code_interpreter
|
||||
provider_id: code-interpreter
|
||||
server:
|
||||
port: 8321
|
||||
|
|
|
@ -161,3 +161,5 @@ tool_groups:
|
|||
provider_id: rag-runtime
|
||||
- toolgroup_id: builtin::code_interpreter
|
||||
provider_id: code-interpreter
|
||||
server:
|
||||
port: 8321
|
||||
|
|
|
@ -124,3 +124,5 @@ tool_groups:
|
|||
provider_id: rag-runtime
|
||||
- toolgroup_id: builtin::code_interpreter
|
||||
provider_id: code-interpreter
|
||||
server:
|
||||
port: 8321
|
||||
|
|
|
@ -114,3 +114,5 @@ tool_groups:
|
|||
provider_id: rag-runtime
|
||||
- toolgroup_id: builtin::code_interpreter
|
||||
provider_id: code-interpreter
|
||||
server:
|
||||
port: 8321
|
||||
|
|
|
@ -124,3 +124,5 @@ tool_groups:
|
|||
provider_id: rag-runtime
|
||||
- toolgroup_id: builtin::code_interpreter
|
||||
provider_id: code-interpreter
|
||||
server:
|
||||
port: 8321
|
||||
|
|
|
@ -114,3 +114,5 @@ tool_groups:
|
|||
provider_id: rag-runtime
|
||||
- toolgroup_id: builtin::code_interpreter
|
||||
provider_id: code-interpreter
|
||||
server:
|
||||
port: 8321
|
||||
|
|
|
@ -126,3 +126,5 @@ tool_groups:
|
|||
provider_id: rag-runtime
|
||||
- toolgroup_id: builtin::code_interpreter
|
||||
provider_id: code-interpreter
|
||||
server:
|
||||
port: 8321
|
||||
|
|
|
@ -115,3 +115,5 @@ tool_groups:
|
|||
provider_id: rag-runtime
|
||||
- toolgroup_id: builtin::code_interpreter
|
||||
provider_id: code-interpreter
|
||||
server:
|
||||
port: 8321
|
||||
|
|
|
@ -117,3 +117,5 @@ tool_groups:
|
|||
provider_id: rag-runtime
|
||||
- toolgroup_id: builtin::code_interpreter
|
||||
provider_id: code-interpreter
|
||||
server:
|
||||
port: 8321
|
||||
|
|
|
@ -147,3 +147,5 @@ tool_groups:
|
|||
provider_id: rag-runtime
|
||||
- toolgroup_id: builtin::code_interpreter
|
||||
provider_id: code-interpreter
|
||||
server:
|
||||
port: 8321
|
||||
|
|
|
@ -16,7 +16,7 @@ The `llamastack/distribution-{{ name }}` distribution consists of the following
|
|||
|
||||
You should use this distribution if you have a regular desktop machine without very powerful GPUs. Of course, if you have powerful GPUs, you can still continue using this distribution since Ollama supports GPU acceleration.
|
||||
|
||||
{%- if run_config_env_vars %}
|
||||
{% if run_config_env_vars %}
|
||||
### Environment Variables
|
||||
|
||||
The following environment variables can be configured:
|
||||
|
|
|
@ -121,3 +121,5 @@ tool_groups:
|
|||
provider_id: rag-runtime
|
||||
- toolgroup_id: builtin::code_interpreter
|
||||
provider_id: code-interpreter
|
||||
server:
|
||||
port: 8321
|
||||
|
|
|
@ -110,3 +110,5 @@ tool_groups:
|
|||
provider_id: rag-runtime
|
||||
- toolgroup_id: builtin::code_interpreter
|
||||
provider_id: code-interpreter
|
||||
server:
|
||||
port: 8321
|
||||
|
|
|
@ -126,3 +126,5 @@ tool_groups:
|
|||
provider_id: rag-runtime
|
||||
- toolgroup_id: builtin::code_interpreter
|
||||
provider_id: code-interpreter
|
||||
server:
|
||||
port: 8321
|
||||
|
|
|
@ -115,3 +115,5 @@ tool_groups:
|
|||
provider_id: rag-runtime
|
||||
- toolgroup_id: builtin::code_interpreter
|
||||
provider_id: code-interpreter
|
||||
server:
|
||||
port: 8321
|
||||
|
|
|
@ -126,3 +126,5 @@ tool_groups:
|
|||
provider_id: rag-runtime
|
||||
- toolgroup_id: builtin::code_interpreter
|
||||
provider_id: code-interpreter
|
||||
server:
|
||||
port: 8321
|
||||
|
|
|
@ -131,8 +131,15 @@ class DistributionTemplate(BaseModel):
|
|||
providers_str = ", ".join(f"`{p}`" for p in providers)
|
||||
providers_table += f"| {api} | {providers_str} |\n"
|
||||
|
||||
template = "<!-- This file was auto-generated by distro_codegen.py, please edit source -->\n"
|
||||
template += self.template_path.read_text()
|
||||
template = self.template_path.read_text()
|
||||
comment = "<!-- This file was auto-generated by distro_codegen.py, please edit source -->\n"
|
||||
orphantext = "---\norphan: true\n---\n"
|
||||
|
||||
if template.startswith(orphantext):
|
||||
template = template.replace(orphantext, orphantext + comment)
|
||||
else:
|
||||
template = comment + template
|
||||
|
||||
# Render template with rich-generated table
|
||||
env = jinja2.Environment(
|
||||
trim_blocks=True,
|
||||
|
|
|
@ -114,3 +114,5 @@ tool_groups:
|
|||
provider_id: rag-runtime
|
||||
- toolgroup_id: builtin::code_interpreter
|
||||
provider_id: code-interpreter
|
||||
server:
|
||||
port: 8321
|
||||
|
|
|
@ -113,3 +113,5 @@ tool_groups:
|
|||
provider_id: rag-runtime
|
||||
- toolgroup_id: builtin::code_interpreter
|
||||
provider_id: code-interpreter
|
||||
server:
|
||||
port: 8321
|
||||
|
|
|
@ -167,3 +167,5 @@ tool_groups:
|
|||
provider_id: rag-runtime
|
||||
- toolgroup_id: builtin::code_interpreter
|
||||
provider_id: code-interpreter
|
||||
server:
|
||||
port: 8321
|
||||
|
|
|
@ -156,3 +156,5 @@ tool_groups:
|
|||
provider_id: rag-runtime
|
||||
- toolgroup_id: builtin::code_interpreter
|
||||
provider_id: code-interpreter
|
||||
server:
|
||||
port: 8321
|
||||
|
|
|
@ -117,3 +117,5 @@ tool_groups:
|
|||
provider_id: rag-runtime
|
||||
- toolgroup_id: builtin::code_interpreter
|
||||
provider_id: code-interpreter
|
||||
server:
|
||||
port: 8321
|
||||
|
|
|
@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
|
|||
|
||||
[project]
|
||||
name = "llama_stack"
|
||||
version = "0.1.1"
|
||||
version = "0.1.2"
|
||||
authors = [{ name = "Meta Llama", email = "llama-oss@meta.com" }]
|
||||
description = "Llama Stack"
|
||||
readme = "README.md"
|
||||
|
@ -25,8 +25,8 @@ dependencies = [
|
|||
"fire",
|
||||
"httpx",
|
||||
"huggingface-hub",
|
||||
"llama-models>=0.1.1",
|
||||
"llama-stack-client>=0.1.1",
|
||||
"llama-models>=0.1.2",
|
||||
"llama-stack-client>=0.1.2",
|
||||
"prompt-toolkit",
|
||||
"python-dotenv",
|
||||
"pydantic>=2",
|
||||
|
|
|
@ -4,6 +4,7 @@ annotated-types==0.7.0
|
|||
anyio==4.8.0
|
||||
blobfile==3.0.0
|
||||
certifi==2025.1.31
|
||||
chardet==5.2.0
|
||||
charset-normalizer==3.4.1
|
||||
click==8.1.8
|
||||
colorama==0.4.6 ; sys_platform == 'win32'
|
||||
|
@ -18,8 +19,8 @@ httpx==0.28.1
|
|||
huggingface-hub==0.28.1
|
||||
idna==3.10
|
||||
jinja2==3.1.5
|
||||
llama-models==0.1.1
|
||||
llama-stack-client==0.1.1
|
||||
llama-models==0.1.2
|
||||
llama-stack-client==0.1.2
|
||||
lxml==5.3.0
|
||||
markdown-it-py==3.0.0
|
||||
markupsafe==3.0.2
|
||||
|
@ -34,6 +35,7 @@ pycryptodomex==3.21.0
|
|||
pydantic==2.10.6
|
||||
pydantic-core==2.27.2
|
||||
pygments==2.19.1
|
||||
pypdf==5.2.0
|
||||
python-dateutil==2.9.0.post0
|
||||
python-dotenv==1.0.1
|
||||
pytz==2025.1
|
||||
|
|
|
@ -4,18 +4,18 @@ You can run llama stack integration tests on either a Llama Stack Library or a L
|
|||
To test on a Llama Stack library with certain configuration, run
|
||||
```bash
|
||||
LLAMA_STACK_CONFIG=./llama_stack/templates/cerebras/run.yaml
|
||||
pytest -s -v tests/client-sdk/inference/test_inference.py
|
||||
pytest -s -v tests/client-sdk/inference/
|
||||
```
|
||||
or just the template name
|
||||
```bash
|
||||
LLAMA_STACK_CONFIG=together
|
||||
pytest -s -v tests/client-sdk/inference/test_inference.py
|
||||
pytest -s -v tests/client-sdk/inference/
|
||||
```
|
||||
|
||||
To test on a Llama Stack endpoint, run
|
||||
```bash
|
||||
LLAMA_STACK_BASE_URL=http//localhost:8089
|
||||
pytest -s -v tests/client-sdk/inference/test_inference.py
|
||||
pytest -s -v tests/client-sdk/inference
|
||||
```
|
||||
|
||||
## Report Generation
|
||||
|
|
|
@ -263,12 +263,14 @@ def test_custom_tool(llama_stack_client, agent_config):
|
|||
assert "CustomTool" in logs_str
|
||||
|
||||
|
||||
def test_override_system_message_behavior(llama_stack_client, agent_config):
|
||||
# TODO: fix this flaky test
|
||||
def xtest_override_system_message_behavior(llama_stack_client, agent_config):
|
||||
client_tool = TestClientTool()
|
||||
agent_config = {
|
||||
**agent_config,
|
||||
"instructions": "You are a pirate",
|
||||
"client_tools": [client_tool.get_tool_definition()],
|
||||
"model": "meta-llama/Llama-3.2-3B-Instruct",
|
||||
}
|
||||
|
||||
agent = Agent(llama_stack_client, agent_config, client_tools=(client_tool,))
|
||||
|
|
|
@ -4,9 +4,6 @@
|
|||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
||||
import base64
|
||||
import pathlib
|
||||
|
||||
import pytest
|
||||
from pydantic import BaseModel
|
||||
|
||||
|
@ -14,6 +11,7 @@ PROVIDER_TOOL_PROMPT_FORMAT = {
|
|||
"remote::ollama": "json",
|
||||
"remote::together": "json",
|
||||
"remote::fireworks": "json",
|
||||
"remote::vllm": "json",
|
||||
}
|
||||
|
||||
PROVIDER_LOGPROBS_TOP_K = set(
|
||||
|
@ -56,23 +54,6 @@ def get_weather_tool_definition():
|
|||
}
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def image_path():
|
||||
return pathlib.Path(__file__).parent / "dog.png"
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def base64_image_data(image_path):
|
||||
# Convert the image to base64
|
||||
return base64.b64encode(image_path.read_bytes()).decode("utf-8")
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def base64_image_url(base64_image_data, image_path):
|
||||
# suffix includes the ., so we remove it
|
||||
return f"data:image/{image_path.suffix[1:]};base64,{base64_image_data}"
|
||||
|
||||
|
||||
def test_text_completion_non_streaming(llama_stack_client, text_model_id):
|
||||
response = llama_stack_client.inference.completion(
|
||||
content="Complete the sentence using one word: Roses are red, violets are ",
|
||||
|
@ -176,8 +157,8 @@ def test_text_completion_structured_output(llama_stack_client, text_model_id, in
|
|||
@pytest.mark.parametrize(
|
||||
"question,expected",
|
||||
[
|
||||
("What are the names of planets in our solar system?", "Earth"),
|
||||
("What are the names of the planets that have rings around them?", "Saturn"),
|
||||
("Which planet do humans live on?", "Earth"),
|
||||
("Which planet has rings around it with a name starting with letter S?", "Saturn"),
|
||||
],
|
||||
)
|
||||
def test_text_chat_completion_non_streaming(llama_stack_client, text_model_id, question, expected):
|
||||
|
@ -299,101 +280,3 @@ def test_text_chat_completion_structured_output(llama_stack_client, text_model_i
|
|||
assert answer.last_name == "Jordan"
|
||||
assert answer.year_of_birth == 1963
|
||||
assert answer.num_seasons_in_nba == 15
|
||||
|
||||
|
||||
def test_image_chat_completion_non_streaming(llama_stack_client, vision_model_id):
|
||||
message = {
|
||||
"role": "user",
|
||||
"content": [
|
||||
{
|
||||
"type": "image",
|
||||
"image": {
|
||||
"url": {
|
||||
# TODO: Replace with Github based URI to resources/sample1.jpg
|
||||
"uri": "https://www.healthypawspetinsurance.com/Images/V3/DogAndPuppyInsurance/Dog_CTA_Desktop_HeroImage.jpg"
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
"type": "text",
|
||||
"text": "Describe what is in this image.",
|
||||
},
|
||||
],
|
||||
}
|
||||
response = llama_stack_client.inference.chat_completion(
|
||||
model_id=vision_model_id,
|
||||
messages=[message],
|
||||
stream=False,
|
||||
)
|
||||
message_content = response.completion_message.content.lower().strip()
|
||||
assert len(message_content) > 0
|
||||
assert any(expected in message_content for expected in {"dog", "puppy", "pup"})
|
||||
|
||||
|
||||
def test_image_chat_completion_streaming(llama_stack_client, vision_model_id):
|
||||
message = {
|
||||
"role": "user",
|
||||
"content": [
|
||||
{
|
||||
"type": "image",
|
||||
"image": {
|
||||
"url": {
|
||||
# TODO: Replace with Github based URI to resources/sample1.jpg
|
||||
"uri": "https://www.healthypawspetinsurance.com/Images/V3/DogAndPuppyInsurance/Dog_CTA_Desktop_HeroImage.jpg"
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
"type": "text",
|
||||
"text": "Describe what is in this image.",
|
||||
},
|
||||
],
|
||||
}
|
||||
response = llama_stack_client.inference.chat_completion(
|
||||
model_id=vision_model_id,
|
||||
messages=[message],
|
||||
stream=True,
|
||||
)
|
||||
streamed_content = ""
|
||||
for chunk in response:
|
||||
streamed_content += chunk.event.delta.text.lower()
|
||||
assert len(streamed_content) > 0
|
||||
assert any(expected in streamed_content for expected in {"dog", "puppy", "pup"})
|
||||
|
||||
|
||||
@pytest.mark.parametrize("type_", ["url", "data"])
|
||||
def test_image_chat_completion_base64(llama_stack_client, vision_model_id, base64_image_data, base64_image_url, type_):
|
||||
image_spec = {
|
||||
"url": {
|
||||
"type": "image",
|
||||
"image": {
|
||||
"url": {
|
||||
"uri": base64_image_url,
|
||||
},
|
||||
},
|
||||
},
|
||||
"data": {
|
||||
"type": "image",
|
||||
"image": {
|
||||
"data": base64_image_data,
|
||||
},
|
||||
},
|
||||
}[type_]
|
||||
|
||||
message = {
|
||||
"role": "user",
|
||||
"content": [
|
||||
image_spec,
|
||||
{
|
||||
"type": "text",
|
||||
"text": "Describe what is in this image.",
|
||||
},
|
||||
],
|
||||
}
|
||||
response = llama_stack_client.inference.chat_completion(
|
||||
model_id=vision_model_id,
|
||||
messages=[message],
|
||||
stream=False,
|
||||
)
|
||||
message_content = response.completion_message.content.lower().strip()
|
||||
assert len(message_content) > 0
|
133
tests/client-sdk/inference/test_vision_inference.py
Normal file
133
tests/client-sdk/inference/test_vision_inference.py
Normal file
|
@ -0,0 +1,133 @@
|
|||
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
# All rights reserved.
|
||||
#
|
||||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
||||
import base64
|
||||
import pathlib
|
||||
|
||||
import pytest
|
||||
|
||||
|
||||
@pytest.fixture(scope="session")
|
||||
def inference_provider_type(llama_stack_client):
|
||||
providers = llama_stack_client.providers.list()
|
||||
inference_providers = [p for p in providers if p.api == "inference"]
|
||||
assert len(inference_providers) > 0, "No inference providers found"
|
||||
return inference_providers[0].provider_type
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def image_path():
|
||||
return pathlib.Path(__file__).parent / "dog.png"
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def base64_image_data(image_path):
|
||||
# Convert the image to base64
|
||||
return base64.b64encode(image_path.read_bytes()).decode("utf-8")
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def base64_image_url(base64_image_data, image_path):
|
||||
# suffix includes the ., so we remove it
|
||||
return f"data:image/{image_path.suffix[1:]};base64,{base64_image_data}"
|
||||
|
||||
|
||||
def test_image_chat_completion_non_streaming(llama_stack_client, vision_model_id):
|
||||
message = {
|
||||
"role": "user",
|
||||
"content": [
|
||||
{
|
||||
"type": "image",
|
||||
"image": {
|
||||
"url": {
|
||||
# TODO: Replace with Github based URI to resources/sample1.jpg
|
||||
"uri": "https://www.healthypawspetinsurance.com/Images/V3/DogAndPuppyInsurance/Dog_CTA_Desktop_HeroImage.jpg"
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
"type": "text",
|
||||
"text": "Describe what is in this image.",
|
||||
},
|
||||
],
|
||||
}
|
||||
response = llama_stack_client.inference.chat_completion(
|
||||
model_id=vision_model_id,
|
||||
messages=[message],
|
||||
stream=False,
|
||||
)
|
||||
message_content = response.completion_message.content.lower().strip()
|
||||
assert len(message_content) > 0
|
||||
assert any(expected in message_content for expected in {"dog", "puppy", "pup"})
|
||||
|
||||
|
||||
def test_image_chat_completion_streaming(llama_stack_client, vision_model_id):
|
||||
message = {
|
||||
"role": "user",
|
||||
"content": [
|
||||
{
|
||||
"type": "image",
|
||||
"image": {
|
||||
"url": {
|
||||
# TODO: Replace with Github based URI to resources/sample1.jpg
|
||||
"uri": "https://www.healthypawspetinsurance.com/Images/V3/DogAndPuppyInsurance/Dog_CTA_Desktop_HeroImage.jpg"
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
"type": "text",
|
||||
"text": "Describe what is in this image.",
|
||||
},
|
||||
],
|
||||
}
|
||||
response = llama_stack_client.inference.chat_completion(
|
||||
model_id=vision_model_id,
|
||||
messages=[message],
|
||||
stream=True,
|
||||
)
|
||||
streamed_content = ""
|
||||
for chunk in response:
|
||||
streamed_content += chunk.event.delta.text.lower()
|
||||
assert len(streamed_content) > 0
|
||||
assert any(expected in streamed_content for expected in {"dog", "puppy", "pup"})
|
||||
|
||||
|
||||
@pytest.mark.parametrize("type_", ["url", "data"])
|
||||
def test_image_chat_completion_base64(llama_stack_client, vision_model_id, base64_image_data, base64_image_url, type_):
|
||||
image_spec = {
|
||||
"url": {
|
||||
"type": "image",
|
||||
"image": {
|
||||
"url": {
|
||||
"uri": base64_image_url,
|
||||
},
|
||||
},
|
||||
},
|
||||
"data": {
|
||||
"type": "image",
|
||||
"image": {
|
||||
"data": base64_image_data,
|
||||
},
|
||||
},
|
||||
}[type_]
|
||||
|
||||
message = {
|
||||
"role": "user",
|
||||
"content": [
|
||||
image_spec,
|
||||
{
|
||||
"type": "text",
|
||||
"text": "Describe what is in this image.",
|
||||
},
|
||||
],
|
||||
}
|
||||
response = llama_stack_client.inference.chat_completion(
|
||||
model_id=vision_model_id,
|
||||
messages=[message],
|
||||
stream=False,
|
||||
)
|
||||
message_content = response.completion_message.content.lower().strip()
|
||||
assert len(message_content) > 0
|
18
uv.lock
generated
18
uv.lock
generated
|
@ -687,7 +687,7 @@ wheels = [
|
|||
|
||||
[[package]]
|
||||
name = "llama-models"
|
||||
version = "0.1.1"
|
||||
version = "0.1.2"
|
||||
source = { registry = "https://pypi.org/simple" }
|
||||
dependencies = [
|
||||
{ name = "jinja2" },
|
||||
|
@ -696,14 +696,14 @@ dependencies = [
|
|||
{ name = "pyyaml" },
|
||||
{ name = "tiktoken" },
|
||||
]
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/df/80/4a4595cf5e55f71c0e15b85ff2f4c04b0742bf664ede062a09c9d383bf7b/llama_models-0.1.1.tar.gz", hash = "sha256:7cb5a9fe38485b47aff4c93e183d6d390a676a7619f3355502576b652f17733a", size = 1608412 }
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/b5/f2/ed8310d4677cd38ab45ffba45aea2a4e9882b640045ad9c3198ac69e5a85/llama_models-0.1.2.tar.gz", hash = "sha256:1266eaec7a8db336e4ed034d2b494189ccb7fd6d6b7aefe874eee749a4340b9b", size = 1608069 }
|
||||
wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/d9/93/d49dd0f0cd37df1a7a7fb25444d010f626cdf42b21eea11d839b0f6a808a/llama_models-0.1.1-py3-none-any.whl", hash = "sha256:7e4f15dc4f6f011852ea2c42f9770b75140f5eca670b32cc67fc0a4605c55f89", size = 1650981 },
|
||||
{ url = "https://files.pythonhosted.org/packages/55/a7/34b9e88ef4109759c8881f43b8006139e3d13d54c440b8c571b253655f54/llama_models-0.1.2-py3-none-any.whl", hash = "sha256:8aa5287d1c6325698991ff677e71148cac347e07493bb5b3ab891e614b89e1f8", size = 1651273 },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "llama-stack"
|
||||
version = "0.1.1"
|
||||
version = "0.1.2"
|
||||
source = { editable = "." }
|
||||
dependencies = [
|
||||
{ name = "blobfile" },
|
||||
|
@ -751,8 +751,8 @@ requires-dist = [
|
|||
{ name = "fire" },
|
||||
{ name = "httpx" },
|
||||
{ name = "huggingface-hub" },
|
||||
{ name = "llama-models", specifier = ">=0.1.1" },
|
||||
{ name = "llama-stack-client", specifier = ">=0.1.1" },
|
||||
{ name = "llama-models", specifier = ">=0.1.2" },
|
||||
{ name = "llama-stack-client", specifier = ">=0.1.2" },
|
||||
{ name = "myst-parser", marker = "extra == 'docs'" },
|
||||
{ name = "nbval", marker = "extra == 'dev'" },
|
||||
{ name = "pre-commit", marker = "extra == 'dev'" },
|
||||
|
@ -780,7 +780,7 @@ requires-dist = [
|
|||
|
||||
[[package]]
|
||||
name = "llama-stack-client"
|
||||
version = "0.1.1"
|
||||
version = "0.1.2"
|
||||
source = { registry = "https://pypi.org/simple" }
|
||||
dependencies = [
|
||||
{ name = "anyio" },
|
||||
|
@ -797,9 +797,9 @@ dependencies = [
|
|||
{ name = "tqdm" },
|
||||
{ name = "typing-extensions" },
|
||||
]
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/07/42/7004958ac1a6da9a8060decf0d9120fdeb3b2775de090a0a473f2ee4a27d/llama_stack_client-0.1.1.tar.gz", hash = "sha256:3e549a848ade959d342fa52ec49b1913b7bb615a77b5b8dcaefe6ff94409049e", size = 179729 }
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/9e/75/8b41a3026c871a8650cd8d2cfda9f891a9163458813574f36518bb40afe4/llama_stack_client-0.1.2.tar.gz", hash = "sha256:94277ddae52be557d771dcdc15d85af9012b5aa87439dd69ec1dc0ff486b0c8e", size = 188023 }
|
||||
wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/80/66/5255c09dc001ff437fd6fe6fad27142035b60073df243f7df0494095f605/llama_stack_client-0.1.1-py3-none-any.whl", hash = "sha256:e07d58fdcc1eaa370dd00b94c2dd1a8169c0ac60c37f6f2772cbc2c5b63f2e62", size = 348665 },
|
||||
{ url = "https://files.pythonhosted.org/packages/c4/32/3a3a97eecff1f1e3a1dc90e9b00681abea11ec4f43a7ca549981261e18b6/llama_stack_client-0.1.2-py3-none-any.whl", hash = "sha256:85ff0fb57a62d7d0470cfaa2b07a595c9fb3483297944d5e5a066db850d38ccd", size = 359415 },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue