mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-10-04 20:14:13 +00:00
Some checks failed
Vector IO Integration Tests / test-matrix (push) Failing after 4s
Update ReadTheDocs / update-readthedocs (push) Failing after 3s
UI Tests / ui-tests (22) (push) Successful in 43s
SqlStore Integration Tests / test-postgres (3.13) (push) Failing after 3s
Integration Auth Tests / test-matrix (oauth2_token) (push) Failing after 1s
API Conformance Tests / check-schema-compatibility (push) Successful in 7s
Unit Tests / unit-tests (3.13) (push) Failing after 4s
Pre-commit / pre-commit (push) Successful in 1m21s
Test External Providers Installed via Module / test-external-providers-from-module (venv) (push) Has been skipped
Integration Tests (Replay) / Integration Tests (, , , client=, ) (push) Failing after 3s
Python Package Build Test / build (3.12) (push) Failing after 2s
Python Package Build Test / build (3.13) (push) Failing after 2s
SqlStore Integration Tests / test-postgres (3.12) (push) Failing after 5s
Unit Tests / unit-tests (3.12) (push) Failing after 3s
Test External API and Providers / test-external (venv) (push) Failing after 5s
# What does this PR do? adds dynamic model support to TGI add new overwrite_completion_id feature to OpenAIMixin to deal with TGI always returning id="" ## Test Plan tgi: `docker run --gpus all --shm-size 1g -p 8080:80 -v /data:/data ghcr.io/huggingface/text-generation-inference --model-id Qwen/Qwen3-0.6B` stack: `TGI_URL=http://localhost:8080 uv run llama stack build --image-type venv --distro ci-tests --run` test: `./scripts/integration-tests.sh --stack-config http://localhost:8321 --setup tgi --subdirs inference --pattern openai`
129 lines
3.7 KiB
Python
129 lines
3.7 KiB
Python
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
# All rights reserved.
|
|
#
|
|
# This source code is licensed under the terms described in the LICENSE file in
|
|
# the root directory of this source tree.
|
|
|
|
# Central definition of integration test suites. You can use these suites by passing --suite=name to pytest.
|
|
# For example:
|
|
#
|
|
# ```bash
|
|
# pytest tests/integration/ --suite=vision --setup=ollama
|
|
# ```
|
|
#
|
|
"""
|
|
Each suite defines what to run (roots). Suites can be run with different global setups defined in setups.py.
|
|
Setups provide environment variables and model defaults that can be reused across multiple suites.
|
|
|
|
CLI examples:
|
|
pytest tests/integration --suite=responses --setup=gpt
|
|
pytest tests/integration --suite=vision --setup=ollama
|
|
pytest tests/integration --suite=base --setup=vllm
|
|
"""
|
|
|
|
from pathlib import Path
|
|
|
|
from pydantic import BaseModel, Field
|
|
|
|
this_dir = Path(__file__).parent
|
|
|
|
|
|
class Suite(BaseModel):
|
|
name: str
|
|
roots: list[str]
|
|
default_setup: str | None = None
|
|
|
|
|
|
class Setup(BaseModel):
|
|
"""A reusable test configuration with environment and CLI defaults."""
|
|
|
|
name: str
|
|
description: str
|
|
defaults: dict[str, str] = Field(default_factory=dict)
|
|
env: dict[str, str] = Field(default_factory=dict)
|
|
|
|
|
|
# Global setups - can be used with any suite "technically" but in reality, some setups might work
|
|
# only for specific test suites.
|
|
SETUP_DEFINITIONS: dict[str, Setup] = {
|
|
"ollama": Setup(
|
|
name="ollama",
|
|
description="Local Ollama provider with text + safety models",
|
|
env={
|
|
"OLLAMA_URL": "http://0.0.0.0:11434",
|
|
"SAFETY_MODEL": "ollama/llama-guard3:1b",
|
|
},
|
|
defaults={
|
|
"text_model": "ollama/llama3.2:3b-instruct-fp16",
|
|
"embedding_model": "sentence-transformers/all-MiniLM-L6-v2",
|
|
"safety_model": "ollama/llama-guard3:1b",
|
|
"safety_shield": "llama-guard",
|
|
},
|
|
),
|
|
"ollama-vision": Setup(
|
|
name="ollama",
|
|
description="Local Ollama provider with a vision model",
|
|
env={
|
|
"OLLAMA_URL": "http://0.0.0.0:11434",
|
|
},
|
|
defaults={
|
|
"vision_model": "ollama/llama3.2-vision:11b",
|
|
"embedding_model": "sentence-transformers/all-MiniLM-L6-v2",
|
|
},
|
|
),
|
|
"vllm": Setup(
|
|
name="vllm",
|
|
description="vLLM provider with a text model",
|
|
env={
|
|
"VLLM_URL": "http://localhost:8000/v1",
|
|
},
|
|
defaults={
|
|
"text_model": "vllm/meta-llama/Llama-3.2-1B-Instruct",
|
|
"embedding_model": "sentence-transformers/all-MiniLM-L6-v2",
|
|
},
|
|
),
|
|
"gpt": Setup(
|
|
name="gpt",
|
|
description="OpenAI GPT models for high-quality responses and tool calling",
|
|
defaults={
|
|
"text_model": "openai/gpt-4o",
|
|
"embedding_model": "sentence-transformers/all-MiniLM-L6-v2",
|
|
},
|
|
),
|
|
"tgi": Setup(
|
|
name="tgi",
|
|
description="Text Generation Inference (TGI) provider with a text model",
|
|
env={
|
|
"TGI_URL": "http://localhost:8080",
|
|
},
|
|
defaults={
|
|
"text_model": "tgi/Qwen/Qwen3-0.6B",
|
|
},
|
|
),
|
|
}
|
|
|
|
|
|
base_roots = [
|
|
str(p)
|
|
for p in this_dir.glob("*")
|
|
if p.is_dir()
|
|
and p.name not in ("__pycache__", "fixtures", "test_cases", "recordings", "responses", "post_training")
|
|
]
|
|
|
|
SUITE_DEFINITIONS: dict[str, Suite] = {
|
|
"base": Suite(
|
|
name="base",
|
|
roots=base_roots,
|
|
default_setup="ollama",
|
|
),
|
|
"responses": Suite(
|
|
name="responses",
|
|
roots=["tests/integration/responses"],
|
|
default_setup="gpt",
|
|
),
|
|
"vision": Suite(
|
|
name="vision",
|
|
roots=["tests/integration/inference/test_vision_inference.py"],
|
|
default_setup="ollama-vision",
|
|
),
|
|
}
|