fix: passthrough provider template + fix (#1612)

# What does this PR do?

- Fix issue w/ passthrough provider


[//]: # (If resolving an issue, uncomment and update the line below)
[//]: # (Closes #[issue-number])

## Test Plan
llama stack run

[//]: # (## Documentation)
This commit is contained in:
Xi Yan 2025-03-13 09:44:26 -07:00 committed by GitHub
parent d072b5fa0c
commit 9617468d13
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
8 changed files with 506 additions and 7 deletions

View file

@ -487,6 +487,40 @@
"transformers",
"uvicorn"
],
"passthrough": [
"aiosqlite",
"autoevals",
"blobfile",
"chardet",
"chromadb-client",
"datasets",
"faiss-cpu",
"fastapi",
"fire",
"httpx",
"matplotlib",
"mcp",
"nltk",
"numpy",
"openai",
"opentelemetry-exporter-otlp-proto-http",
"opentelemetry-sdk",
"pandas",
"pillow",
"psycopg2-binary",
"pymongo",
"pypdf",
"redis",
"requests",
"scikit-learn",
"scipy",
"sentencepiece",
"tqdm",
"transformers",
"uvicorn",
"sentence-transformers --no-deps",
"torch torchvision --index-url https://download.pytorch.org/whl/cpu"
],
"remote-vllm": [
"aiosqlite",
"autoevals",

View file

@ -0,0 +1,42 @@
---
orphan: true
---
<!-- This file was auto-generated by distro_codegen.py, please edit source -->
# Passthrough Distribution
```{toctree}
:maxdepth: 2
:hidden:
self
```
The `llamastack/distribution-passthrough` distribution consists of the following provider configurations.
| API | Provider(s) |
|-----|-------------|
| agents | `inline::meta-reference` |
| datasetio | `remote::huggingface`, `inline::localfs` |
| eval | `inline::meta-reference` |
| inference | `remote::passthrough`, `inline::sentence-transformers` |
| safety | `inline::llama-guard` |
| scoring | `inline::basic`, `inline::llm-as-judge`, `inline::braintrust` |
| telemetry | `inline::meta-reference` |
| tool_runtime | `remote::brave-search`, `remote::tavily-search`, `remote::wolfram-alpha`, `inline::code-interpreter`, `inline::rag-runtime`, `remote::model-context-protocol` |
| vector_io | `inline::faiss`, `remote::chromadb`, `remote::pgvector` |
### Environment Variables
The following environment variables can be configured:
- `LLAMA_STACK_PORT`: Port for the Llama Stack distribution server (default: `5001`)
- `PASSTHROUGH_API_KEY`: Passthrough API Key (default: ``)
- `PASSTHROUGH_URL`: Passthrough URL (default: ``)
### Models
The following models are available by default:
- `llama3.1-8b-instruct `
- `llama3.2-11b-vision-instruct `

View file

@ -0,0 +1,7 @@
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.
from .passthrough import get_distribution_template # noqa: F401

View file

@ -1,9 +1,10 @@
version: '2'
distribution_spec:
description: Use for running LLM inference with the endpoint that compatible with Llama Stack API
description: Use Passthrough hosted llama-stack endpoint for LLM inference
providers:
inference:
- remote::passthrough
- inline::sentence-transformers
vector_io:
- inline::faiss
- remote::chromadb
@ -26,6 +27,7 @@ distribution_spec:
tool_runtime:
- remote::brave-search
- remote::tavily-search
- remote::wolfram-alpha
- inline::code-interpreter
- inline::rag-runtime
- remote::model-context-protocol

View file

@ -0,0 +1,35 @@
---
orphan: true
---
# Passthrough Distribution
```{toctree}
:maxdepth: 2
:hidden:
self
```
The `llamastack/distribution-{{ name }}` distribution consists of the following provider configurations.
{{ providers_table }}
{% if run_config_env_vars %}
### Environment Variables
The following environment variables can be configured:
{% for var, (default_value, description) in run_config_env_vars.items() %}
- `{{ var }}`: {{ description }} (default: `{{ default_value }}`)
{% endfor %}
{% endif %}
{% if default_models %}
### Models
The following models are available by default:
{% for model in default_models %}
- `{{ model.model_id }} {{ model.doc_string }}`
{% endfor %}
{% endif %}

View file

@ -0,0 +1,201 @@
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.
from pathlib import Path
from llama_stack.apis.models.models import ModelType
from llama_stack.distribution.datatypes import (
ModelInput,
Provider,
ShieldInput,
ToolGroupInput,
)
from llama_stack.providers.inline.inference.sentence_transformers import (
SentenceTransformersInferenceConfig,
)
from llama_stack.providers.inline.vector_io.faiss.config import FaissVectorIOConfig
from llama_stack.providers.remote.inference.passthrough.config import (
PassthroughImplConfig,
)
from llama_stack.providers.utils.inference.model_registry import ProviderModelEntry
from llama_stack.templates.template import (
DistributionTemplate,
RunConfigSettings,
)
def get_distribution_template() -> DistributionTemplate:
providers = {
"inference": ["remote::passthrough", "inline::sentence-transformers"],
"vector_io": ["inline::faiss", "remote::chromadb", "remote::pgvector"],
"safety": ["inline::llama-guard"],
"agents": ["inline::meta-reference"],
"telemetry": ["inline::meta-reference"],
"eval": ["inline::meta-reference"],
"datasetio": ["remote::huggingface", "inline::localfs"],
"scoring": ["inline::basic", "inline::llm-as-judge", "inline::braintrust"],
"tool_runtime": [
"remote::brave-search",
"remote::tavily-search",
"remote::wolfram-alpha",
"inline::code-interpreter",
"inline::rag-runtime",
"remote::model-context-protocol",
],
}
name = "passthrough"
inference_provider = Provider(
provider_id="passthrough",
provider_type="remote::passthrough",
config=PassthroughImplConfig.sample_run_config(),
)
embedding_provider = Provider(
provider_id="sentence-transformers",
provider_type="inline::sentence-transformers",
config=SentenceTransformersInferenceConfig.sample_run_config(),
)
vector_io_provider = Provider(
provider_id="faiss",
provider_type="inline::faiss",
config=FaissVectorIOConfig.sample_run_config(f"~/.llama/distributions/{name}"),
)
default_models = [
ModelInput(
metadata={},
model_id="meta-llama/Llama-3.1-8B-Instruct",
provider_id="passthrough",
provider_model_id="llama3.1-8b-instruct",
model_type=ModelType.llm,
),
ModelInput(
metadata={},
model_id="meta-llama/Llama-3.2-11B-Vision-Instruct",
provider_id="passthrough",
provider_model_id="llama3.2-11b-vision-instruct",
model_type=ModelType.llm,
),
]
embedding_model = ModelInput(
model_id="all-MiniLM-L6-v2",
provider_id="sentence-transformers",
model_type=ModelType.embedding,
metadata={
"embedding_dimension": 384,
},
)
default_tool_groups = [
ToolGroupInput(
toolgroup_id="builtin::websearch",
provider_id="tavily-search",
),
ToolGroupInput(
toolgroup_id="builtin::wolfram_alpha",
provider_id="wolfram-alpha",
),
ToolGroupInput(
toolgroup_id="builtin::rag",
provider_id="rag-runtime",
),
ToolGroupInput(
toolgroup_id="builtin::code_interpreter",
provider_id="code-interpreter",
),
]
return DistributionTemplate(
name=name,
distro_type="self_hosted",
description="Use Passthrough hosted llama-stack endpoint for LLM inference",
container_image=None,
template_path=Path(__file__).parent / "doc_template.md",
providers=providers,
available_models_by_provider={
"passthrough": [
ProviderModelEntry(
provider_model_id="llama3.1-8b-instruct",
model_type=ModelType.llm,
),
ProviderModelEntry(
provider_model_id="llama3.2-11b-vision-instruct",
model_type=ModelType.llm,
),
],
},
run_configs={
"run.yaml": RunConfigSettings(
provider_overrides={
"inference": [inference_provider, embedding_provider],
"vector_io": [vector_io_provider],
},
default_models=default_models + [embedding_model],
default_shields=[ShieldInput(shield_id="meta-llama/Llama-Guard-3-8B")],
default_tool_groups=default_tool_groups,
),
"run-with-safety.yaml": RunConfigSettings(
provider_overrides={
"inference": [
inference_provider,
embedding_provider,
],
"vector_io": [vector_io_provider],
"safety": [
Provider(
provider_id="llama-guard",
provider_type="inline::llama-guard",
config={},
),
Provider(
provider_id="llama-guard-vision",
provider_type="inline::llama-guard",
config={},
),
Provider(
provider_id="code-scanner",
provider_type="inline::code-scanner",
config={},
),
],
},
default_models=[
*default_models,
embedding_model,
],
default_shields=[
ShieldInput(
shield_id="meta-llama/Llama-Guard-3-8B",
provider_id="llama-guard",
),
ShieldInput(
shield_id="meta-llama/Llama-Guard-3-11B-Vision",
provider_id="llama-guard-vision",
),
ShieldInput(
shield_id="CodeScanner",
provider_id="code-scanner",
),
],
default_tool_groups=default_tool_groups,
),
},
run_config_env_vars={
"LLAMA_STACK_PORT": (
"5001",
"Port for the Llama Stack distribution server",
),
"PASSTHROUGH_API_KEY": (
"",
"Passthrough API Key",
),
"PASSTHROUGH_URL": (
"",
"Passthrough URL",
),
},
)

View file

@ -0,0 +1,154 @@
version: '2'
image_name: passthrough
apis:
- agents
- datasetio
- eval
- inference
- safety
- scoring
- telemetry
- tool_runtime
- vector_io
providers:
inference:
- provider_id: passthrough
provider_type: remote::passthrough
config:
url: ${env.PASSTHROUGH_URL}
api_key: ${env.PASSTHROUGH_API_KEY}
- provider_id: sentence-transformers
provider_type: inline::sentence-transformers
config: {}
vector_io:
- provider_id: faiss
provider_type: inline::faiss
config:
kvstore:
type: sqlite
namespace: null
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/passthrough}/faiss_store.db
safety:
- provider_id: llama-guard
provider_type: inline::llama-guard
config: {}
- provider_id: llama-guard-vision
provider_type: inline::llama-guard
config: {}
- provider_id: code-scanner
provider_type: inline::code-scanner
config: {}
agents:
- provider_id: meta-reference
provider_type: inline::meta-reference
config:
persistence_store:
type: sqlite
namespace: null
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/passthrough}/agents_store.db
telemetry:
- provider_id: meta-reference
provider_type: inline::meta-reference
config:
service_name: ${env.OTEL_SERVICE_NAME:llama-stack}
sinks: ${env.TELEMETRY_SINKS:console,sqlite}
sqlite_db_path: ${env.SQLITE_DB_PATH:~/.llama/distributions/passthrough/trace_store.db}
eval:
- provider_id: meta-reference
provider_type: inline::meta-reference
config:
kvstore:
type: sqlite
namespace: null
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/passthrough}/meta_reference_eval.db
datasetio:
- provider_id: huggingface
provider_type: remote::huggingface
config:
kvstore:
type: sqlite
namespace: null
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/passthrough}/huggingface_datasetio.db
- provider_id: localfs
provider_type: inline::localfs
config:
kvstore:
type: sqlite
namespace: null
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/passthrough}/localfs_datasetio.db
scoring:
- provider_id: basic
provider_type: inline::basic
config: {}
- provider_id: llm-as-judge
provider_type: inline::llm-as-judge
config: {}
- provider_id: braintrust
provider_type: inline::braintrust
config:
openai_api_key: ${env.OPENAI_API_KEY:}
tool_runtime:
- provider_id: brave-search
provider_type: remote::brave-search
config:
api_key: ${env.BRAVE_SEARCH_API_KEY:}
max_results: 3
- provider_id: tavily-search
provider_type: remote::tavily-search
config:
api_key: ${env.TAVILY_SEARCH_API_KEY:}
max_results: 3
- provider_id: wolfram-alpha
provider_type: remote::wolfram-alpha
config:
api_key: ${env.WOLFRAM_ALPHA_API_KEY:}
- provider_id: code-interpreter
provider_type: inline::code-interpreter
config: {}
- provider_id: rag-runtime
provider_type: inline::rag-runtime
config: {}
- provider_id: model-context-protocol
provider_type: remote::model-context-protocol
config: {}
metadata_store:
type: sqlite
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/passthrough}/registry.db
models:
- metadata: {}
model_id: meta-llama/Llama-3.1-8B-Instruct
provider_id: passthrough
provider_model_id: llama3.1-8b-instruct
model_type: llm
- metadata: {}
model_id: meta-llama/Llama-3.2-11B-Vision-Instruct
provider_id: passthrough
provider_model_id: llama3.2-11b-vision-instruct
model_type: llm
- metadata:
embedding_dimension: 384
model_id: all-MiniLM-L6-v2
provider_id: sentence-transformers
model_type: embedding
shields:
- shield_id: meta-llama/Llama-Guard-3-8B
provider_id: llama-guard
- shield_id: meta-llama/Llama-Guard-3-11B-Vision
provider_id: llama-guard-vision
- shield_id: CodeScanner
provider_id: code-scanner
vector_dbs: []
datasets: []
scoring_fns: []
benchmarks: []
tool_groups:
- toolgroup_id: builtin::websearch
provider_id: tavily-search
- toolgroup_id: builtin::wolfram_alpha
provider_id: wolfram-alpha
- toolgroup_id: builtin::rag
provider_id: rag-runtime
- toolgroup_id: builtin::code_interpreter
provider_id: code-interpreter
server:
port: 8321

View file

@ -31,7 +31,8 @@ providers:
safety:
- provider_id: llama-guard
provider_type: inline::llama-guard
config: {}
config:
excluded_categories: []
agents:
- provider_id: meta-reference
provider_type: inline::meta-reference
@ -50,14 +51,26 @@ providers:
eval:
- provider_id: meta-reference
provider_type: inline::meta-reference
config: {}
config:
kvstore:
type: sqlite
namespace: null
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/passthrough}/meta_reference_eval.db
datasetio:
- provider_id: huggingface
provider_type: remote::huggingface
config: {}
config:
kvstore:
type: sqlite
namespace: null
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/passthrough}/huggingface_datasetio.db
- provider_id: localfs
provider_type: inline::localfs
config: {}
config:
kvstore:
type: sqlite
namespace: null
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/passthrough}/localfs_datasetio.db
scoring:
- provider_id: basic
provider_type: inline::basic
@ -80,6 +93,10 @@ providers:
config:
api_key: ${env.TAVILY_SEARCH_API_KEY:}
max_results: 3
- provider_id: wolfram-alpha
provider_type: remote::wolfram-alpha
config:
api_key: ${env.WOLFRAM_ALPHA_API_KEY:}
- provider_id: code-interpreter
provider_type: inline::code-interpreter
config: {}
@ -91,7 +108,7 @@ providers:
config: {}
metadata_store:
type: sqlite
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/meta-llama}/registry.db
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/passthrough}/registry.db
models:
- metadata: {}
model_id: meta-llama/Llama-3.1-8B-Instruct
@ -103,15 +120,22 @@ models:
provider_id: passthrough
provider_model_id: llama3.2-11b-vision-instruct
model_type: llm
- metadata:
embedding_dimension: 384
model_id: all-MiniLM-L6-v2
provider_id: sentence-transformers
model_type: embedding
shields:
- shield_id: meta-llama/Llama-Guard-3-8B
vector_dbs: []
datasets: []
scoring_fns: []
eval_tasks: []
benchmarks: []
tool_groups:
- toolgroup_id: builtin::websearch
provider_id: tavily-search
- toolgroup_id: builtin::wolfram_alpha
provider_id: wolfram-alpha
- toolgroup_id: builtin::rag
provider_id: rag-runtime
- toolgroup_id: builtin::code_interpreter