Merge 5c04d2e0a7 into sapling-pr-archive-ehhuang

This commit is contained in:
ehhuang 2025-10-23 12:27:37 -07:00 committed by GitHub
commit fbd68f9588
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
8 changed files with 189 additions and 129 deletions

View file

@ -61,7 +61,7 @@ jobs:
&& fromJSON('[{"setup": "vllm", "suite": "base"}]') && fromJSON('[{"setup": "vllm", "suite": "base"}]')
|| github.event.inputs.test-setup == 'ollama-vision' || github.event.inputs.test-setup == 'ollama-vision'
&& fromJSON('[{"setup": "ollama-vision", "suite": "vision"}]') && fromJSON('[{"setup": "ollama-vision", "suite": "vision"}]')
|| fromJSON('[{"setup": "ollama", "suite": "base"}, {"setup": "ollama-vision", "suite": "vision"}]') || fromJSON('[{"setup": "ollama", "suite": "base"}, {"setup": "ollama-vision", "suite": "vision"}, {"setup": "gpt", "suite": "responses"}]')
}} }}
steps: steps:

View file

@ -27,28 +27,24 @@ providers:
config: config:
storage_dir: ${env.FILES_STORAGE_DIR:=~/.llama/distributions/starter/files} storage_dir: ${env.FILES_STORAGE_DIR:=~/.llama/distributions/starter/files}
metadata_store: metadata_store:
type: sqlite table_name: files_metadata
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/files_metadata.db backend: sql_default
vector_io: vector_io:
- provider_id: ${env.ENABLE_CHROMADB:+chromadb} - provider_id: ${env.ENABLE_CHROMADB:+chromadb}
provider_type: remote::chromadb provider_type: remote::chromadb
config: config:
url: ${env.CHROMADB_URL:=} url: ${env.CHROMADB_URL:=}
kvstore: persistence:
type: postgres namespace: vector_io::chroma_remote
host: ${env.POSTGRES_HOST:=localhost} backend: kv_default
port: ${env.POSTGRES_PORT:=5432}
db: ${env.POSTGRES_DB:=llamastack}
user: ${env.POSTGRES_USER:=llamastack}
password: ${env.POSTGRES_PASSWORD:=llamastack}
files: files:
- provider_id: meta-reference-files - provider_id: meta-reference-files
provider_type: inline::localfs provider_type: inline::localfs
config: config:
storage_dir: ${env.FILES_STORAGE_DIR:=~/.llama/distributions/starter/files} storage_dir: ${env.FILES_STORAGE_DIR:=~/.llama/distributions/starter/files}
metadata_store: metadata_store:
type: sqlite table_name: files_metadata
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/files_metadata.db backend: sql_default
safety: safety:
- provider_id: llama-guard - provider_id: llama-guard
provider_type: inline::llama-guard provider_type: inline::llama-guard
@ -58,20 +54,15 @@ providers:
- provider_id: meta-reference - provider_id: meta-reference
provider_type: inline::meta-reference provider_type: inline::meta-reference
config: config:
persistence_store: persistence:
type: postgres agent_state:
host: ${env.POSTGRES_HOST:=localhost} namespace: agents
port: ${env.POSTGRES_PORT:=5432} backend: kv_default
db: ${env.POSTGRES_DB:=llamastack} responses:
user: ${env.POSTGRES_USER:=llamastack} table_name: responses
password: ${env.POSTGRES_PASSWORD:=llamastack} backend: sql_default
responses_store: max_write_queue_size: 10000
type: postgres num_writers: 4
host: ${env.POSTGRES_HOST:=localhost}
port: ${env.POSTGRES_PORT:=5432}
db: ${env.POSTGRES_DB:=llamastack}
user: ${env.POSTGRES_USER:=llamastack}
password: ${env.POSTGRES_PASSWORD:=llamastack}
telemetry: telemetry:
- provider_id: meta-reference - provider_id: meta-reference
provider_type: inline::meta-reference provider_type: inline::meta-reference
@ -112,32 +103,45 @@ storage:
db: ${env.POSTGRES_DB:=llamastack} db: ${env.POSTGRES_DB:=llamastack}
user: ${env.POSTGRES_USER:=llamastack} user: ${env.POSTGRES_USER:=llamastack}
password: ${env.POSTGRES_PASSWORD:=llamastack} password: ${env.POSTGRES_PASSWORD:=llamastack}
references: stores:
metadata: metadata:
backend: kv_default
namespace: registry namespace: registry
backend: kv_default
inference: inference:
backend: sql_default
table_name: inference_store table_name: inference_store
models: backend: sql_default
- metadata: max_write_queue_size: 10000
embedding_dimension: 768 num_writers: 4
model_id: nomic-embed-text-v1.5 conversations:
provider_id: sentence-transformers table_name: openai_conversations
model_type: embedding backend: sql_default
- model_id: ${env.INFERENCE_MODEL} registered_resources:
provider_id: vllm-inference models:
model_type: llm - metadata:
shields: embedding_dimension: 768
- shield_id: ${env.SAFETY_MODEL:=meta-llama/Llama-Guard-3-1B} model_id: nomic-embed-text-v1.5
vector_dbs: [] provider_id: sentence-transformers
datasets: [] model_type: embedding
scoring_fns: [] - model_id: ${env.INFERENCE_MODEL}
benchmarks: [] provider_id: vllm-inference
tool_groups: model_type: llm
- toolgroup_id: builtin::websearch shields:
provider_id: tavily-search - shield_id: ${env.SAFETY_MODEL:=meta-llama/Llama-Guard-3-1B}
- toolgroup_id: builtin::rag vector_dbs: []
provider_id: rag-runtime datasets: []
scoring_fns: []
benchmarks: []
tool_groups:
- toolgroup_id: builtin::websearch
provider_id: tavily-search
- toolgroup_id: builtin::rag
provider_id: rag-runtime
server: server:
port: 8323 port: 8323
telemetry:
enabled: true
vector_stores:
default_provider_id: chromadb
default_embedding_model:
provider_id: sentence-transformers
model_id: nomic-ai/nomic-embed-text-v1.5

View file

@ -32,21 +32,17 @@ providers:
provider_type: remote::chromadb provider_type: remote::chromadb
config: config:
url: ${env.CHROMADB_URL:=} url: ${env.CHROMADB_URL:=}
kvstore: persistence:
type: postgres namespace: vector_io::chroma_remote
host: ${env.POSTGRES_HOST:=localhost} backend: kv_default
port: ${env.POSTGRES_PORT:=5432}
db: ${env.POSTGRES_DB:=llamastack}
user: ${env.POSTGRES_USER:=llamastack}
password: ${env.POSTGRES_PASSWORD:=llamastack}
files: files:
- provider_id: meta-reference-files - provider_id: meta-reference-files
provider_type: inline::localfs provider_type: inline::localfs
config: config:
storage_dir: ${env.FILES_STORAGE_DIR:=~/.llama/distributions/starter/files} storage_dir: ${env.FILES_STORAGE_DIR:=~/.llama/distributions/starter/files}
metadata_store: metadata_store:
type: sqlite table_name: files_metadata
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/files_metadata.db backend: sql_default
safety: safety:
- provider_id: llama-guard - provider_id: llama-guard
provider_type: inline::llama-guard provider_type: inline::llama-guard
@ -56,20 +52,15 @@ providers:
- provider_id: meta-reference - provider_id: meta-reference
provider_type: inline::meta-reference provider_type: inline::meta-reference
config: config:
persistence_store: persistence:
type: postgres agent_state:
host: ${env.POSTGRES_HOST:=localhost} namespace: agents
port: ${env.POSTGRES_PORT:=5432} backend: kv_default
db: ${env.POSTGRES_DB:=llamastack} responses:
user: ${env.POSTGRES_USER:=llamastack} table_name: responses
password: ${env.POSTGRES_PASSWORD:=llamastack} backend: sql_default
responses_store: max_write_queue_size: 10000
type: postgres num_writers: 4
host: ${env.POSTGRES_HOST:=localhost}
port: ${env.POSTGRES_PORT:=5432}
db: ${env.POSTGRES_DB:=llamastack}
user: ${env.POSTGRES_USER:=llamastack}
password: ${env.POSTGRES_PASSWORD:=llamastack}
telemetry: telemetry:
- provider_id: meta-reference - provider_id: meta-reference
provider_type: inline::meta-reference provider_type: inline::meta-reference
@ -110,40 +101,53 @@ storage:
db: ${env.POSTGRES_DB:=llamastack} db: ${env.POSTGRES_DB:=llamastack}
user: ${env.POSTGRES_USER:=llamastack} user: ${env.POSTGRES_USER:=llamastack}
password: ${env.POSTGRES_PASSWORD:=llamastack} password: ${env.POSTGRES_PASSWORD:=llamastack}
references: stores:
metadata: metadata:
backend: kv_default
namespace: registry namespace: registry
backend: kv_default
inference: inference:
backend: sql_default
table_name: inference_store table_name: inference_store
models: backend: sql_default
- metadata: max_write_queue_size: 10000
embedding_dimension: 768 num_writers: 4
model_id: nomic-embed-text-v1.5 conversations:
provider_id: sentence-transformers table_name: openai_conversations
model_type: embedding backend: sql_default
- metadata: {} registered_resources:
model_id: ${env.INFERENCE_MODEL} models:
provider_id: vllm-inference - metadata:
model_type: llm embedding_dimension: 768
- metadata: {} model_id: nomic-embed-text-v1.5
model_id: ${env.SAFETY_MODEL:=meta-llama/Llama-Guard-3-1B} provider_id: sentence-transformers
provider_id: vllm-safety model_type: embedding
model_type: llm - metadata: {}
shields: model_id: ${env.INFERENCE_MODEL}
- shield_id: ${env.SAFETY_MODEL:=meta-llama/Llama-Guard-3-1B} provider_id: vllm-inference
vector_dbs: [] model_type: llm
datasets: [] - metadata: {}
scoring_fns: [] model_id: ${env.SAFETY_MODEL:=meta-llama/Llama-Guard-3-1B}
benchmarks: [] provider_id: vllm-safety
tool_groups: model_type: llm
- toolgroup_id: builtin::websearch shields:
provider_id: tavily-search - shield_id: ${env.SAFETY_MODEL:=meta-llama/Llama-Guard-3-1B}
- toolgroup_id: builtin::rag vector_dbs: []
provider_id: rag-runtime datasets: []
scoring_fns: []
benchmarks: []
tool_groups:
- toolgroup_id: builtin::websearch
provider_id: tavily-search
- toolgroup_id: builtin::rag
provider_id: rag-runtime
server: server:
port: 8321 port: 8321
auth: auth:
provider_config: provider_config:
type: github_token type: github_token
telemetry:
enabled: true
vector_stores:
default_provider_id: chromadb
default_embedding_model:
provider_id: sentence-transformers
model_id: nomic-ai/nomic-embed-text-v1.5

View file

@ -174,7 +174,9 @@ class StackApp(FastAPI):
@asynccontextmanager @asynccontextmanager
async def lifespan(app: StackApp): async def lifespan(app: StackApp):
logger.info("Starting up") server_version = parse_version("llama-stack")
logger.info(f"Starting up Llama Stack server (version: {server_version})")
assert app.stack is not None assert app.stack is not None
app.stack.create_registry_refresh_task() app.stack.create_registry_refresh_task()
yield yield

View file

@ -16,14 +16,59 @@
set -Eeuo pipefail set -Eeuo pipefail
if command -v podman &> /dev/null; then # Parse arguments
CONTAINER_RUNTIME="podman" CONTAINER_RUNTIME=""
elif command -v docker &> /dev/null; then
CONTAINER_RUNTIME="docker" print_usage() {
else echo "Usage: $0 [--container docker|podman]"
echo "🚨 Neither Podman nor Docker could be found" echo ""
echo "Install Docker: https://docs.docker.com/get-docker/ or Podman: https://podman.io/getting-started/installation" echo "Options:"
exit 1 echo " -c, --container Choose container runtime (docker or podman)."
echo " -h, --help Show this help."
}
while [[ $# -gt 0 ]]; do
case "$1" in
-c|--container)
if [[ $# -lt 2 ]]; then
echo "🚨 --container requires a value: docker or podman"
exit 1
fi
case "$2" in
docker|podman)
CONTAINER_RUNTIME="$2"
shift 2
;;
*)
echo "🚨 Invalid container runtime: $2"
echo "Valid options are: docker, podman"
exit 1
;;
esac
;;
-h|--help)
print_usage
exit 0
;;
*)
echo "🚨 Unknown argument: $1"
print_usage
exit 1
;;
esac
done
# Detect container runtime if not specified
if [[ -z "$CONTAINER_RUNTIME" ]]; then
if command -v podman &> /dev/null; then
CONTAINER_RUNTIME="podman"
elif command -v docker &> /dev/null; then
CONTAINER_RUNTIME="docker"
else
echo "🚨 Neither Podman nor Docker could be found"
echo "Install Docker: https://docs.docker.com/get-docker/ or Podman: https://podman.io/getting-started/installation"
exit 1
fi
fi fi
echo "🚀 Setting up telemetry stack for Llama Stack using $CONTAINER_RUNTIME..." echo "🚀 Setting up telemetry stack for Llama Stack using $CONTAINER_RUNTIME..."

View file

@ -78,18 +78,18 @@ def data_url_from_file(file_path: str) -> str:
], ],
) )
def test_register_and_iterrows(llama_stack_client, purpose, source, provider_id, limit): def test_register_and_iterrows(llama_stack_client, purpose, source, provider_id, limit):
dataset = llama_stack_client.datasets.register( dataset = llama_stack_client.beta.datasets.register(
purpose=purpose, purpose=purpose,
source=source, source=source,
) )
assert dataset.identifier is not None assert dataset.identifier is not None
assert dataset.provider_id == provider_id assert dataset.provider_id == provider_id
iterrow_response = llama_stack_client.datasets.iterrows(dataset.identifier, limit=limit) iterrow_response = llama_stack_client.beta.datasets.iterrows(dataset.identifier, limit=limit)
assert len(iterrow_response.data) == limit assert len(iterrow_response.data) == limit
dataset_list = llama_stack_client.datasets.list() dataset_list = llama_stack_client.beta.datasets.list()
assert dataset.identifier in [d.identifier for d in dataset_list] assert dataset.identifier in [d.identifier for d in dataset_list]
llama_stack_client.datasets.unregister(dataset.identifier) llama_stack_client.beta.datasets.unregister(dataset.identifier)
dataset_list = llama_stack_client.datasets.list() dataset_list = llama_stack_client.beta.datasets.list()
assert dataset.identifier not in [d.identifier for d in dataset_list] assert dataset.identifier not in [d.identifier for d in dataset_list]

View file

@ -17,17 +17,17 @@ from ..datasets.test_datasets import data_url_from_file
@pytest.mark.parametrize("scoring_fn_id", ["basic::equality"]) @pytest.mark.parametrize("scoring_fn_id", ["basic::equality"])
def test_evaluate_rows(llama_stack_client, text_model_id, scoring_fn_id): def test_evaluate_rows(llama_stack_client, text_model_id, scoring_fn_id):
dataset = llama_stack_client.datasets.register( dataset = llama_stack_client.beta.datasets.register(
purpose="eval/messages-answer", purpose="eval/messages-answer",
source={ source={
"type": "uri", "type": "uri",
"uri": data_url_from_file(Path(__file__).parent.parent / "datasets" / "test_dataset.csv"), "uri": data_url_from_file(Path(__file__).parent.parent / "datasets" / "test_dataset.csv"),
}, },
) )
response = llama_stack_client.datasets.list() response = llama_stack_client.beta.datasets.list()
assert any(x.identifier == dataset.identifier for x in response) assert any(x.identifier == dataset.identifier for x in response)
rows = llama_stack_client.datasets.iterrows( rows = llama_stack_client.beta.datasets.iterrows(
dataset_id=dataset.identifier, dataset_id=dataset.identifier,
limit=3, limit=3,
) )
@ -37,12 +37,12 @@ def test_evaluate_rows(llama_stack_client, text_model_id, scoring_fn_id):
scoring_fn_id, scoring_fn_id,
] ]
benchmark_id = str(uuid.uuid4()) benchmark_id = str(uuid.uuid4())
llama_stack_client.benchmarks.register( llama_stack_client.alpha.benchmarks.register(
benchmark_id=benchmark_id, benchmark_id=benchmark_id,
dataset_id=dataset.identifier, dataset_id=dataset.identifier,
scoring_functions=scoring_functions, scoring_functions=scoring_functions,
) )
list_benchmarks = llama_stack_client.benchmarks.list() list_benchmarks = llama_stack_client.alpha.benchmarks.list()
assert any(x.identifier == benchmark_id for x in list_benchmarks) assert any(x.identifier == benchmark_id for x in list_benchmarks)
response = llama_stack_client.alpha.eval.evaluate_rows( response = llama_stack_client.alpha.eval.evaluate_rows(
@ -66,7 +66,7 @@ def test_evaluate_rows(llama_stack_client, text_model_id, scoring_fn_id):
@pytest.mark.parametrize("scoring_fn_id", ["basic::subset_of"]) @pytest.mark.parametrize("scoring_fn_id", ["basic::subset_of"])
def test_evaluate_benchmark(llama_stack_client, text_model_id, scoring_fn_id): def test_evaluate_benchmark(llama_stack_client, text_model_id, scoring_fn_id):
dataset = llama_stack_client.datasets.register( dataset = llama_stack_client.beta.datasets.register(
purpose="eval/messages-answer", purpose="eval/messages-answer",
source={ source={
"type": "uri", "type": "uri",
@ -74,7 +74,7 @@ def test_evaluate_benchmark(llama_stack_client, text_model_id, scoring_fn_id):
}, },
) )
benchmark_id = str(uuid.uuid4()) benchmark_id = str(uuid.uuid4())
llama_stack_client.benchmarks.register( llama_stack_client.alpha.benchmarks.register(
benchmark_id=benchmark_id, benchmark_id=benchmark_id,
dataset_id=dataset.identifier, dataset_id=dataset.identifier,
scoring_functions=[scoring_fn_id], scoring_functions=[scoring_fn_id],

View file

@ -5,6 +5,7 @@
# the root directory of this source tree. # the root directory of this source tree.
import json import json
import logging # allow-direct-logging
import os import os
import httpx import httpx
@ -198,7 +199,7 @@ def test_response_sequential_file_search(
@pytest.mark.parametrize("case", mcp_tool_test_cases) @pytest.mark.parametrize("case", mcp_tool_test_cases)
def test_response_non_streaming_mcp_tool(compat_client, text_model_id, case): def test_response_non_streaming_mcp_tool(compat_client, text_model_id, case, caplog):
if not isinstance(compat_client, LlamaStackAsLibraryClient): if not isinstance(compat_client, LlamaStackAsLibraryClient):
pytest.skip("in-process MCP server is only supported in library client") pytest.skip("in-process MCP server is only supported in library client")
@ -245,13 +246,17 @@ def test_response_non_streaming_mcp_tool(compat_client, text_model_id, case):
if isinstance(compat_client, LlamaStackAsLibraryClient) if isinstance(compat_client, LlamaStackAsLibraryClient)
else (httpx.HTTPStatusError, openai.AuthenticationError) else (httpx.HTTPStatusError, openai.AuthenticationError)
) )
with pytest.raises(exc_type): # Suppress expected auth error logs only for the failing auth attempt
compat_client.responses.create( with caplog.at_level(
model=text_model_id, logging.CRITICAL, logger="llama_stack.providers.inline.agents.meta_reference.responses.streaming"
input=case.input, ):
tools=tools, with pytest.raises(exc_type):
stream=False, compat_client.responses.create(
) model=text_model_id,
input=case.input,
tools=tools,
stream=False,
)
for tool in tools: for tool in tools:
if tool["type"] == "mcp": if tool["type"] == "mcp":