Merge branch 'meta-llama:main' into jwm4-add-qdrant-to-provider-tests

This commit is contained in:
Bill Murdock 2025-02-10 20:43:11 -05:00 committed by GitHub
commit f9087d3a56
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
42 changed files with 207 additions and 144 deletions

View file

@ -23,3 +23,7 @@ jobs:
.pre-commit-config.yaml
- uses: pre-commit/action@v3.0.1
- name: Verify if there are any diff files after pre-commit
run: |
git diff --exit-code || (echo "There are uncommitted changes, run pre-commit locally and commit again" && exit 1)

View file

@ -48,6 +48,7 @@ repos:
hooks:
- id: uv-export
args: ["--frozen", "--no-hashes", "--no-emit-project"]
- id: uv-sync
# - repo: https://github.com/pre-commit/mirrors-mypy
# rev: v1.14.0

View file

@ -34,22 +34,22 @@ By reducing friction and complexity, Llama Stack empowers developers to focus on
### API Providers
Here is a list of the various API providers and available distributions to developers started easily,
| **API Provider Builder** | **Environments** | **Agents** | **Inference** | **Memory** | **Safety** | **Telemetry** |
|:------------------------------------------------------------------------------------------:|:----------------------:|:------------------:|:------------------:|:------------------:|:------------------:|:------------------:|
| Meta Reference | Single Node | :heavy_check_mark: | :heavy_check_mark: | :heavy_check_mark: | :heavy_check_mark: | :heavy_check_mark: |
| SambaNova | Hosted | | :heavy_check_mark: | | | |
| Cerebras | Hosted | | :heavy_check_mark: | | | |
| Fireworks | Hosted | :heavy_check_mark: | :heavy_check_mark: | :heavy_check_mark: | | |
| AWS Bedrock | Hosted | | :heavy_check_mark: | | :heavy_check_mark: | |
| Together | Hosted | :heavy_check_mark: | :heavy_check_mark: | | :heavy_check_mark: | |
| Groq | Hosted | | :heavy_check_mark: | | | |
| Ollama | Single Node | | :heavy_check_mark: | | | |
| TGI | Hosted and Single Node | | :heavy_check_mark: | | | |
| NVIDIA NIM | Hosted and Single Node | | :heavy_check_mark: | | | |
| Chroma | Single Node | | | :heavy_check_mark: | | |
| PG Vector | Single Node | | | :heavy_check_mark: | | |
| PyTorch ExecuTorch | On-device iOS | :heavy_check_mark: | :heavy_check_mark: | | | |
| vLLM | Hosted and Single Node | | :heavy_check_mark: | | | |
| **API Provider Builder** | **Environments** | **Agents** | **Inference** | **Memory** | **Safety** | **Telemetry** |
|:------------------------:|:----------------------:|:----------:|:-------------:|:----------:|:----------:|:-------------:|
| Meta Reference | Single Node | ✅ | ✅ | ✅ | ✅ | ✅ |
| SambaNova | Hosted | | ✅ | | | |
| Cerebras | Hosted | | ✅ | | | |
| Fireworks | Hosted | ✅ | ✅ | ✅ | | |
| AWS Bedrock | Hosted | | ✅ | | ✅ | |
| Together | Hosted | ✅ | ✅ | | ✅ | |
| Groq | Hosted | | ✅ | | | |
| Ollama | Single Node | | ✅ | | | |
| TGI | Hosted and Single Node | | ✅ | | | |
| NVIDIA NIM | Hosted and Single Node | | ✅ | | | |
| Chroma | Single Node | | | ✅ | | |
| PG Vector | Single Node | | | ✅ | | |
| PyTorch ExecuTorch | On-device iOS | ✅ | ✅ | | | |
| vLLM | Hosted and Single Node | | ✅ | | | |
### Distributions

View file

@ -66,6 +66,40 @@
"sentence-transformers --no-deps",
"torch torchvision --index-url https://download.pytorch.org/whl/cpu"
],
"dell": [
"aiohttp",
"aiosqlite",
"autoevals",
"blobfile",
"chardet",
"chromadb-client",
"datasets",
"faiss-cpu",
"fastapi",
"fire",
"httpx",
"huggingface_hub",
"matplotlib",
"nltk",
"numpy",
"openai",
"opentelemetry-exporter-otlp-proto-http",
"opentelemetry-sdk",
"pandas",
"pillow",
"psycopg2-binary",
"pypdf",
"redis",
"requests",
"scikit-learn",
"scipy",
"sentencepiece",
"tqdm",
"transformers",
"uvicorn",
"sentence-transformers --no-deps",
"torch torchvision --index-url https://download.pytorch.org/whl/cpu"
],
"fireworks": [
"aiosqlite",
"autoevals",
@ -252,6 +286,38 @@
"sentence-transformers --no-deps",
"torch torchvision --index-url https://download.pytorch.org/whl/cpu"
],
"nvidia": [
"aiosqlite",
"autoevals",
"blobfile",
"chardet",
"datasets",
"faiss-cpu",
"fastapi",
"fire",
"httpx",
"matplotlib",
"mcp",
"nltk",
"numpy",
"openai",
"opentelemetry-exporter-otlp-proto-http",
"opentelemetry-sdk",
"pandas",
"pillow",
"psycopg2-binary",
"pypdf",
"redis",
"requests",
"scikit-learn",
"scipy",
"sentencepiece",
"tqdm",
"transformers",
"uvicorn",
"sentence-transformers --no-deps",
"torch torchvision --index-url https://download.pytorch.org/whl/cpu"
],
"ollama": [
"aiohttp",
"aiosqlite",
@ -319,6 +385,36 @@
"sentence-transformers --no-deps",
"torch torchvision --index-url https://download.pytorch.org/whl/cpu"
],
"sambanova": [
"aiosqlite",
"blobfile",
"chardet",
"chromadb-client",
"faiss-cpu",
"fastapi",
"fire",
"httpx",
"matplotlib",
"nltk",
"numpy",
"openai",
"opentelemetry-exporter-otlp-proto-http",
"opentelemetry-sdk",
"pandas",
"pillow",
"psycopg2-binary",
"pypdf",
"redis",
"requests",
"scikit-learn",
"scipy",
"sentencepiece",
"tqdm",
"transformers",
"uvicorn",
"sentence-transformers --no-deps",
"torch torchvision --index-url https://download.pytorch.org/whl/cpu"
],
"tgi": [
"aiohttp",
"aiosqlite",
@ -421,101 +517,5 @@
"vllm",
"sentence-transformers --no-deps",
"torch torchvision --index-url https://download.pytorch.org/whl/cpu"
],
"nvidia": [
"aiosqlite",
"autoevals",
"blobfile",
"chardet",
"datasets",
"faiss-cpu",
"fastapi",
"fire",
"httpx",
"matplotlib",
"mcp",
"nltk",
"numpy",
"openai",
"opentelemetry-exporter-otlp-proto-http",
"opentelemetry-sdk",
"pandas",
"pillow",
"psycopg2-binary",
"pypdf",
"redis",
"requests",
"scikit-learn",
"scipy",
"sentencepiece",
"tqdm",
"transformers",
"uvicorn",
"sentence-transformers --no-deps",
"torch torchvision --index-url https://download.pytorch.org/whl/cpu"
],
"sambanova": [
"aiosqlite",
"blobfile",
"chardet",
"chromadb-client",
"faiss-cpu",
"fastapi",
"fire",
"httpx",
"matplotlib",
"nltk",
"numpy",
"openai",
"opentelemetry-exporter-otlp-proto-http",
"opentelemetry-sdk",
"pandas",
"pillow",
"psycopg2-binary",
"pypdf",
"redis",
"requests",
"scikit-learn",
"scipy",
"sentencepiece",
"tqdm",
"transformers",
"uvicorn",
"sentence-transformers --no-deps",
"torch torchvision --index-url https://download.pytorch.org/whl/cpu"
],
"dell": [
"aiohttp",
"aiosqlite",
"autoevals",
"blobfile",
"chardet",
"chromadb-client",
"datasets",
"faiss-cpu",
"fastapi",
"fire",
"httpx",
"huggingface_hub",
"matplotlib",
"nltk",
"numpy",
"openai",
"opentelemetry-exporter-otlp-proto-http",
"opentelemetry-sdk",
"pandas",
"pillow",
"psycopg2-binary",
"pypdf",
"redis",
"requests",
"scikit-learn",
"scipy",
"sentencepiece",
"tqdm",
"transformers",
"uvicorn",
"sentence-transformers --no-deps",
"torch torchvision --index-url https://download.pytorch.org/whl/cpu"
]
}

View file

@ -36,13 +36,12 @@ chunks = [
"content": "Your document text here",
"mime_type": "text/plain",
},
...,
]
client.vector_io.insert(vector_db_id, chunks)
client.vector_io.insert(vector_db_id=vector_db_id, chunks=chunks)
# You can then query for these chunks
chunks_response = client.vector_io.query(
vector_db_id, query="What do you know about..."
vector_db_id=vector_db_id, query="What do you know about..."
)
```
@ -72,8 +71,8 @@ client.tool_runtime.rag_tool.insert(
# Query documents
results = client.tool_runtime.rag_tool.query(
vector_db_id=vector_db_id,
query="What do you know about...",
vector_db_ids=[vector_db_id],
content="What do you know about...",
)
```
@ -82,10 +81,14 @@ results = client.tool_runtime.rag_tool.query(
One of the most powerful patterns is combining agents with RAG capabilities. Here's a complete example:
```python
from llama_stack_client.types.agent_create_params import AgentConfig
from llama_stack_client.lib.agents.agent import Agent
# Configure agent with memory
agent_config = AgentConfig(
model="Llama3.2-3B-Instruct",
model="meta-llama/Llama-3.2-3B-Instruct",
instructions="You are a helpful assistant",
enable_session_persistence=False,
toolgroups=[
{
"name": "builtin::rag",
@ -105,10 +108,10 @@ response = agent.create_turn(
{"role": "user", "content": "I am providing some documents for reference."}
],
documents=[
dict(
content="https://raw.githubusercontent.com/example/doc.rst",
mime_type="text/plain",
)
{
"content": "https://raw.githubusercontent.com/example/doc.rst",
"mime_type": "text/plain",
}
],
session_id=session_id,
)

View file

@ -1,7 +1,7 @@
<!-- This file was auto-generated by distro_codegen.py, please edit source -->
---
orphan: true
---
<!-- This file was auto-generated by distro_codegen.py, please edit source -->
# Dell Distribution of Llama Stack

View file

@ -1,7 +1,7 @@
<!-- This file was auto-generated by distro_codegen.py, please edit source -->
---
orphan: true
---
<!-- This file was auto-generated by distro_codegen.py, please edit source -->
# Fireworks Distribution
```{toctree}

View file

@ -1,7 +1,7 @@
<!-- This file was auto-generated by distro_codegen.py, please edit source -->
---
orphan: true
---
<!-- This file was auto-generated by distro_codegen.py, please edit source -->
# Meta Reference Distribution
```{toctree}

View file

@ -1,7 +1,7 @@
<!-- This file was auto-generated by distro_codegen.py, please edit source -->
---
orphan: true
---
<!-- This file was auto-generated by distro_codegen.py, please edit source -->
# Meta Reference Quantized Distribution
```{toctree}

View file

@ -1,7 +1,7 @@
<!-- This file was auto-generated by distro_codegen.py, please edit source -->
---
orphan: true
---
<!-- This file was auto-generated by distro_codegen.py, please edit source -->
# Ollama Distribution
```{toctree}

View file

@ -1,7 +1,7 @@
<!-- This file was auto-generated by distro_codegen.py, please edit source -->
---
orphan: true
---
<!-- This file was auto-generated by distro_codegen.py, please edit source -->
# Remote vLLM Distribution
```{toctree}
:maxdepth: 2

View file

@ -1,7 +1,7 @@
<!-- This file was auto-generated by distro_codegen.py, please edit source -->
---
orphan: true
---
<!-- This file was auto-generated by distro_codegen.py, please edit source -->
# SambaNova Distribution
```{toctree}

View file

@ -1,7 +1,7 @@
<!-- This file was auto-generated by distro_codegen.py, please edit source -->
---
orphan: true
---
<!-- This file was auto-generated by distro_codegen.py, please edit source -->
# TGI Distribution

View file

@ -1,7 +1,7 @@
<!-- This file was auto-generated by distro_codegen.py, please edit source -->
---
orphan: true
---
<!-- This file was auto-generated by distro_codegen.py, please edit source -->
# Together Distribution
```{toctree}

View file

@ -29,7 +29,7 @@ def find_template_dirs(templates_dir: Path) -> Iterator[Path]:
if not templates_dir.exists():
raise FileNotFoundError(f"Templates directory not found: {templates_dir}")
return (d for d in templates_dir.iterdir() if d.is_dir() and d.name != "__pycache__")
return sorted(d for d in templates_dir.iterdir() if d.is_dir() and d.name != "__pycache__")
def process_template(template_dir: Path, progress) -> None:

View file

@ -115,3 +115,5 @@ tool_groups:
provider_id: rag-runtime
- toolgroup_id: builtin::code_interpreter
provider_id: code-interpreter
server:
port: 8321

View file

@ -117,3 +117,5 @@ tool_groups:
provider_id: rag-runtime
- toolgroup_id: builtin::code_interpreter
provider_id: code-interpreter
server:
port: 8321

View file

@ -116,3 +116,5 @@ tool_groups:
provider_id: rag-runtime
- toolgroup_id: builtin::code_interpreter
provider_id: code-interpreter
server:
port: 8321

View file

@ -107,3 +107,5 @@ tool_groups:
provider_id: rag-runtime
- toolgroup_id: builtin::code_interpreter
provider_id: code-interpreter
server:
port: 8321

View file

@ -172,3 +172,5 @@ tool_groups:
provider_id: rag-runtime
- toolgroup_id: builtin::code_interpreter
provider_id: code-interpreter
server:
port: 8321

View file

@ -161,3 +161,5 @@ tool_groups:
provider_id: rag-runtime
- toolgroup_id: builtin::code_interpreter
provider_id: code-interpreter
server:
port: 8321

View file

@ -124,3 +124,5 @@ tool_groups:
provider_id: rag-runtime
- toolgroup_id: builtin::code_interpreter
provider_id: code-interpreter
server:
port: 8321

View file

@ -114,3 +114,5 @@ tool_groups:
provider_id: rag-runtime
- toolgroup_id: builtin::code_interpreter
provider_id: code-interpreter
server:
port: 8321

View file

@ -124,3 +124,5 @@ tool_groups:
provider_id: rag-runtime
- toolgroup_id: builtin::code_interpreter
provider_id: code-interpreter
server:
port: 8321

View file

@ -114,3 +114,5 @@ tool_groups:
provider_id: rag-runtime
- toolgroup_id: builtin::code_interpreter
provider_id: code-interpreter
server:
port: 8321

View file

@ -126,3 +126,5 @@ tool_groups:
provider_id: rag-runtime
- toolgroup_id: builtin::code_interpreter
provider_id: code-interpreter
server:
port: 8321

View file

@ -115,3 +115,5 @@ tool_groups:
provider_id: rag-runtime
- toolgroup_id: builtin::code_interpreter
provider_id: code-interpreter
server:
port: 8321

View file

@ -117,3 +117,5 @@ tool_groups:
provider_id: rag-runtime
- toolgroup_id: builtin::code_interpreter
provider_id: code-interpreter
server:
port: 8321

View file

@ -147,3 +147,5 @@ tool_groups:
provider_id: rag-runtime
- toolgroup_id: builtin::code_interpreter
provider_id: code-interpreter
server:
port: 8321

View file

@ -16,7 +16,7 @@ The `llamastack/distribution-{{ name }}` distribution consists of the following
You should use this distribution if you have a regular desktop machine without very powerful GPUs. Of course, if you have powerful GPUs, you can still continue using this distribution since Ollama supports GPU acceleration.
{%- if run_config_env_vars %}
{% if run_config_env_vars %}
### Environment Variables
The following environment variables can be configured:

View file

@ -121,3 +121,5 @@ tool_groups:
provider_id: rag-runtime
- toolgroup_id: builtin::code_interpreter
provider_id: code-interpreter
server:
port: 8321

View file

@ -110,3 +110,5 @@ tool_groups:
provider_id: rag-runtime
- toolgroup_id: builtin::code_interpreter
provider_id: code-interpreter
server:
port: 8321

View file

@ -126,3 +126,5 @@ tool_groups:
provider_id: rag-runtime
- toolgroup_id: builtin::code_interpreter
provider_id: code-interpreter
server:
port: 8321

View file

@ -115,3 +115,5 @@ tool_groups:
provider_id: rag-runtime
- toolgroup_id: builtin::code_interpreter
provider_id: code-interpreter
server:
port: 8321

View file

@ -126,3 +126,5 @@ tool_groups:
provider_id: rag-runtime
- toolgroup_id: builtin::code_interpreter
provider_id: code-interpreter
server:
port: 8321

View file

@ -131,8 +131,15 @@ class DistributionTemplate(BaseModel):
providers_str = ", ".join(f"`{p}`" for p in providers)
providers_table += f"| {api} | {providers_str} |\n"
template = "<!-- This file was auto-generated by distro_codegen.py, please edit source -->\n"
template += self.template_path.read_text()
template = self.template_path.read_text()
comment = "<!-- This file was auto-generated by distro_codegen.py, please edit source -->\n"
orphantext = "---\norphan: true\n---\n"
if template.startswith(orphantext):
template = template.replace(orphantext, orphantext + comment)
else:
template = comment + template
# Render template with rich-generated table
env = jinja2.Environment(
trim_blocks=True,

View file

@ -114,3 +114,5 @@ tool_groups:
provider_id: rag-runtime
- toolgroup_id: builtin::code_interpreter
provider_id: code-interpreter
server:
port: 8321

View file

@ -113,3 +113,5 @@ tool_groups:
provider_id: rag-runtime
- toolgroup_id: builtin::code_interpreter
provider_id: code-interpreter
server:
port: 8321

View file

@ -167,3 +167,5 @@ tool_groups:
provider_id: rag-runtime
- toolgroup_id: builtin::code_interpreter
provider_id: code-interpreter
server:
port: 8321

View file

@ -156,3 +156,5 @@ tool_groups:
provider_id: rag-runtime
- toolgroup_id: builtin::code_interpreter
provider_id: code-interpreter
server:
port: 8321

View file

@ -117,3 +117,5 @@ tool_groups:
provider_id: rag-runtime
- toolgroup_id: builtin::code_interpreter
provider_id: code-interpreter
server:
port: 8321

18
uv.lock generated
View file

@ -687,7 +687,7 @@ wheels = [
[[package]]
name = "llama-models"
version = "0.1.1"
version = "0.1.2"
source = { registry = "https://pypi.org/simple" }
dependencies = [
{ name = "jinja2" },
@ -696,14 +696,14 @@ dependencies = [
{ name = "pyyaml" },
{ name = "tiktoken" },
]
sdist = { url = "https://files.pythonhosted.org/packages/df/80/4a4595cf5e55f71c0e15b85ff2f4c04b0742bf664ede062a09c9d383bf7b/llama_models-0.1.1.tar.gz", hash = "sha256:7cb5a9fe38485b47aff4c93e183d6d390a676a7619f3355502576b652f17733a", size = 1608412 }
sdist = { url = "https://files.pythonhosted.org/packages/b5/f2/ed8310d4677cd38ab45ffba45aea2a4e9882b640045ad9c3198ac69e5a85/llama_models-0.1.2.tar.gz", hash = "sha256:1266eaec7a8db336e4ed034d2b494189ccb7fd6d6b7aefe874eee749a4340b9b", size = 1608069 }
wheels = [
{ url = "https://files.pythonhosted.org/packages/d9/93/d49dd0f0cd37df1a7a7fb25444d010f626cdf42b21eea11d839b0f6a808a/llama_models-0.1.1-py3-none-any.whl", hash = "sha256:7e4f15dc4f6f011852ea2c42f9770b75140f5eca670b32cc67fc0a4605c55f89", size = 1650981 },
{ url = "https://files.pythonhosted.org/packages/55/a7/34b9e88ef4109759c8881f43b8006139e3d13d54c440b8c571b253655f54/llama_models-0.1.2-py3-none-any.whl", hash = "sha256:8aa5287d1c6325698991ff677e71148cac347e07493bb5b3ab891e614b89e1f8", size = 1651273 },
]
[[package]]
name = "llama-stack"
version = "0.1.1"
version = "0.1.2"
source = { editable = "." }
dependencies = [
{ name = "blobfile" },
@ -751,8 +751,8 @@ requires-dist = [
{ name = "fire" },
{ name = "httpx" },
{ name = "huggingface-hub" },
{ name = "llama-models", specifier = ">=0.1.1" },
{ name = "llama-stack-client", specifier = ">=0.1.1" },
{ name = "llama-models", specifier = ">=0.1.2" },
{ name = "llama-stack-client", specifier = ">=0.1.2" },
{ name = "myst-parser", marker = "extra == 'docs'" },
{ name = "nbval", marker = "extra == 'dev'" },
{ name = "pre-commit", marker = "extra == 'dev'" },
@ -780,7 +780,7 @@ requires-dist = [
[[package]]
name = "llama-stack-client"
version = "0.1.1"
version = "0.1.2"
source = { registry = "https://pypi.org/simple" }
dependencies = [
{ name = "anyio" },
@ -797,9 +797,9 @@ dependencies = [
{ name = "tqdm" },
{ name = "typing-extensions" },
]
sdist = { url = "https://files.pythonhosted.org/packages/07/42/7004958ac1a6da9a8060decf0d9120fdeb3b2775de090a0a473f2ee4a27d/llama_stack_client-0.1.1.tar.gz", hash = "sha256:3e549a848ade959d342fa52ec49b1913b7bb615a77b5b8dcaefe6ff94409049e", size = 179729 }
sdist = { url = "https://files.pythonhosted.org/packages/9e/75/8b41a3026c871a8650cd8d2cfda9f891a9163458813574f36518bb40afe4/llama_stack_client-0.1.2.tar.gz", hash = "sha256:94277ddae52be557d771dcdc15d85af9012b5aa87439dd69ec1dc0ff486b0c8e", size = 188023 }
wheels = [
{ url = "https://files.pythonhosted.org/packages/80/66/5255c09dc001ff437fd6fe6fad27142035b60073df243f7df0494095f605/llama_stack_client-0.1.1-py3-none-any.whl", hash = "sha256:e07d58fdcc1eaa370dd00b94c2dd1a8169c0ac60c37f6f2772cbc2c5b63f2e62", size = 348665 },
{ url = "https://files.pythonhosted.org/packages/c4/32/3a3a97eecff1f1e3a1dc90e9b00681abea11ec4f43a7ca549981261e18b6/llama_stack_client-0.1.2-py3-none-any.whl", hash = "sha256:85ff0fb57a62d7d0470cfaa2b07a595c9fb3483297944d5e5a066db850d38ccd", size = 359415 },
]
[[package]]