mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-10-04 04:04:14 +00:00
# What does this PR do? use a logger * update the distro to add the Files API otherwise it won't start since it is a dependency of vector * clarify project_id and api_key requirements * disable openai compatible calls since the endpoint returns 404 * disable text_inference structured format tests * fixed openai client initialization ## Test Plan Execute text_inference: ``` WATSONX_API_KEY=... WATSONX_PROJECT_ID=... python -m llama_stack.core.server.server llama_stack/distributions/watsonx/run.yaml LLAMA_STACK_CONFIG=http://localhost:8321 uv run --group test pytest -vvvv -ra --text-model watsonx/meta-llama/llama-3-3-70b-instruct tests/integration/inference/test_text_inference.py ============================================= test session starts ============================================== platform darwin -- Python 3.12.8, pytest-8.4.2, pluggy-1.6.0 -- /Users/leseb/Documents/AI/llama-stack/.venv/bin/python3 cachedir: .pytest_cache metadata: {'Python': '3.12.8', 'Platform': 'macOS-15.6.1-arm64-arm-64bit', 'Packages': {'pytest': '8.4.2', 'pluggy': '1.6.0'}, 'Plugins': {'anyio': '4.9.0', 'html': '4.1.1', 'socket': '0.7.0', 'asyncio': '1.1.0', 'json-report': '1.5.0', 'timeout': '2.4.0', 'metadata': '3.1.1', 'cov': '6.2.1', 'nbval': '0.11.0', 'hydra-core': '1.3.2'}} rootdir: /Users/leseb/Documents/AI/llama-stack configfile: pyproject.toml plugins: anyio-4.9.0, html-4.1.1, socket-0.7.0, asyncio-1.1.0, json-report-1.5.0, timeout-2.4.0, metadata-3.1.1, cov-6.2.1, nbval-0.11.0, hydra-core-1.3.2 asyncio: mode=Mode.AUTO, asyncio_default_fixture_loop_scope=None, asyncio_default_test_loop_scope=function collected 20 items tests/integration/inference/test_text_inference.py::test_text_completion_non_streaming[txt=watsonx/meta-llama/llama-3-3-70b-instruct-inference:completion:sanity] PASSED [ 5%] tests/integration/inference/test_text_inference.py::test_text_completion_streaming[txt=watsonx/meta-llama/llama-3-3-70b-instruct-inference:completion:sanity] PASSED [ 10%] tests/integration/inference/test_text_inference.py::test_text_completion_stop_sequence[txt=watsonx/meta-llama/llama-3-3-70b-instruct-inference:completion:stop_sequence] XFAIL [ 15%] tests/integration/inference/test_text_inference.py::test_text_completion_log_probs_non_streaming[txt=watsonx/meta-llama/llama-3-3-70b-instruct-inference:completion:log_probs] XFAIL [ 20%] tests/integration/inference/test_text_inference.py::test_text_completion_log_probs_streaming[txt=watsonx/meta-llama/llama-3-3-70b-instruct-inference:completion:log_probs] XFAIL [ 25%] tests/integration/inference/test_text_inference.py::test_text_completion_structured_output[txt=watsonx/meta-llama/llama-3-3-70b-instruct-inference:completion:structured_output] SKIPPED structured output) [ 30%] tests/integration/inference/test_text_inference.py::test_text_chat_completion_non_streaming[txt=watsonx/meta-llama/llama-3-3-70b-instruct-inference:chat_completion:non_streaming_01] PASSED [ 35%] tests/integration/inference/test_text_inference.py::test_text_chat_completion_streaming[txt=watsonx/meta-llama/llama-3-3-70b-instruct-inference:chat_completion:streaming_01] PASSED [ 40%] tests/integration/inference/test_text_inference.py::test_text_chat_completion_with_tool_calling_and_non_streaming[txt=watsonx/meta-llama/llama-3-3-70b-instruct-inference:chat_completion:tool_calling] PASSED [ 45%] tests/integration/inference/test_text_inference.py::test_text_chat_completion_with_tool_calling_and_streaming[txt=watsonx/meta-llama/llama-3-3-70b-instruct-inference:chat_completion:tool_calling] PASSED [ 50%] tests/integration/inference/test_text_inference.py::test_text_chat_completion_with_tool_choice_required[txt=watsonx/meta-llama/llama-3-3-70b-instruct-inference:chat_completion:tool_calling] PASSED [ 55%] tests/integration/inference/test_text_inference.py::test_text_chat_completion_with_tool_choice_none[txt=watsonx/meta-llama/llama-3-3-70b-instruct-inference:chat_completion:tool_calling] PASSED [ 60%] tests/integration/inference/test_text_inference.py::test_text_chat_completion_structured_output[txt=watsonx/meta-llama/llama-3-3-70b-instruct-inference:chat_completion:structured_output] SKIPPEDstructured output) [ 65%] tests/integration/inference/test_text_inference.py::test_text_chat_completion_tool_calling_tools_not_in_request[txt=watsonx/meta-llama/llama-3-3-70b-instruct-inference:chat_completion:tool_calling_tools_absent-True] PASSED [ 70%] tests/integration/inference/test_text_inference.py::test_text_chat_completion_with_multi_turn_tool_calling[txt=watsonx/meta-llama/llama-3-3-70b-instruct-inference:chat_completion:text_then_tool] XFAIL [ 75%] tests/integration/inference/test_text_inference.py::test_text_chat_completion_non_streaming[txt=watsonx/meta-llama/llama-3-3-70b-instruct-inference:chat_completion:non_streaming_02] PASSED [ 80%] tests/integration/inference/test_text_inference.py::test_text_chat_completion_streaming[txt=watsonx/meta-llama/llama-3-3-70b-instruct-inference:chat_completion:streaming_02] PASSED [ 85%] tests/integration/inference/test_text_inference.py::test_text_chat_completion_tool_calling_tools_not_in_request[txt=watsonx/meta-llama/llama-3-3-70b-instruct-inference:chat_completion:tool_calling_tools_absent-False] PASSED [ 90%] tests/integration/inference/test_text_inference.py::test_text_chat_completion_with_multi_turn_tool_calling[txt=watsonx/meta-llama/llama-3-3-70b-instruct-inference:chat_completion:tool_then_answer] XFAIL [ 95%] tests/integration/inference/test_text_inference.py::test_text_chat_completion_with_multi_turn_tool_calling[txt=watsonx/meta-llama/llama-3-3-70b-instruct-inference:chat_completion:array_parameter] XFAIL [100%] =========================================== short test summary info ============================================ SKIPPED [2] tests/integration/inference/test_text_inference.py:49: Model watsonx/meta-llama/llama-3-3-70b-instruct hosted by remote::watsonx doesn't support json_schema structured output XFAIL tests/integration/inference/test_text_inference.py::test_text_completion_stop_sequence[txt=watsonx/meta-llama/llama-3-3-70b-instruct-inference:completion:stop_sequence] - remote::watsonx doesn't support 'stop' parameter yet XFAIL tests/integration/inference/test_text_inference.py::test_text_completion_log_probs_non_streaming[txt=watsonx/meta-llama/llama-3-3-70b-instruct-inference:completion:log_probs] - remote::watsonx doesn't support log probs yet XFAIL tests/integration/inference/test_text_inference.py::test_text_completion_log_probs_streaming[txt=watsonx/meta-llama/llama-3-3-70b-instruct-inference:completion:log_probs] - remote::watsonx doesn't support log probs yet XFAIL tests/integration/inference/test_text_inference.py::test_text_chat_completion_with_multi_turn_tool_calling[txt=watsonx/meta-llama/llama-3-3-70b-instruct-inference:chat_completion:text_then_tool] - Not tested for non-llama4 models yet XFAIL tests/integration/inference/test_text_inference.py::test_text_chat_completion_with_multi_turn_tool_calling[txt=watsonx/meta-llama/llama-3-3-70b-instruct-inference:chat_completion:tool_then_answer] - Not tested for non-llama4 models yet XFAIL tests/integration/inference/test_text_inference.py::test_text_chat_completion_with_multi_turn_tool_calling[txt=watsonx/meta-llama/llama-3-3-70b-instruct-inference:chat_completion:array_parameter] - Not tested for non-llama4 models yet ============================ 12 passed, 2 skipped, 6 xfailed, 14 warnings in 36.88s ============================ ``` --------- Signed-off-by: Sébastien Han <seb@redhat.com>
219 lines
6.5 KiB
YAML
219 lines
6.5 KiB
YAML
version: 2
|
|
image_name: watsonx
|
|
apis:
|
|
- agents
|
|
- datasetio
|
|
- eval
|
|
- inference
|
|
- safety
|
|
- scoring
|
|
- telemetry
|
|
- tool_runtime
|
|
- vector_io
|
|
- files
|
|
providers:
|
|
inference:
|
|
- provider_id: watsonx
|
|
provider_type: remote::watsonx
|
|
config:
|
|
url: ${env.WATSONX_BASE_URL:=https://us-south.ml.cloud.ibm.com}
|
|
api_key: ${env.WATSONX_API_KEY:=}
|
|
project_id: ${env.WATSONX_PROJECT_ID:=}
|
|
- provider_id: sentence-transformers
|
|
provider_type: inline::sentence-transformers
|
|
vector_io:
|
|
- provider_id: faiss
|
|
provider_type: inline::faiss
|
|
config:
|
|
kvstore:
|
|
type: sqlite
|
|
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/watsonx}/faiss_store.db
|
|
safety:
|
|
- provider_id: llama-guard
|
|
provider_type: inline::llama-guard
|
|
config:
|
|
excluded_categories: []
|
|
agents:
|
|
- provider_id: meta-reference
|
|
provider_type: inline::meta-reference
|
|
config:
|
|
persistence_store:
|
|
type: sqlite
|
|
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/watsonx}/agents_store.db
|
|
responses_store:
|
|
type: sqlite
|
|
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/watsonx}/responses_store.db
|
|
telemetry:
|
|
- provider_id: meta-reference
|
|
provider_type: inline::meta-reference
|
|
config:
|
|
service_name: "${env.OTEL_SERVICE_NAME:=\u200B}"
|
|
sinks: ${env.TELEMETRY_SINKS:=console,sqlite}
|
|
sqlite_db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/watsonx}/trace_store.db
|
|
otel_exporter_otlp_endpoint: ${env.OTEL_EXPORTER_OTLP_ENDPOINT:=}
|
|
eval:
|
|
- provider_id: meta-reference
|
|
provider_type: inline::meta-reference
|
|
config:
|
|
kvstore:
|
|
type: sqlite
|
|
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/watsonx}/meta_reference_eval.db
|
|
datasetio:
|
|
- provider_id: huggingface
|
|
provider_type: remote::huggingface
|
|
config:
|
|
kvstore:
|
|
type: sqlite
|
|
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/watsonx}/huggingface_datasetio.db
|
|
- provider_id: localfs
|
|
provider_type: inline::localfs
|
|
config:
|
|
kvstore:
|
|
type: sqlite
|
|
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/watsonx}/localfs_datasetio.db
|
|
scoring:
|
|
- provider_id: basic
|
|
provider_type: inline::basic
|
|
- provider_id: llm-as-judge
|
|
provider_type: inline::llm-as-judge
|
|
- provider_id: braintrust
|
|
provider_type: inline::braintrust
|
|
config:
|
|
openai_api_key: ${env.OPENAI_API_KEY:=}
|
|
tool_runtime:
|
|
- provider_id: brave-search
|
|
provider_type: remote::brave-search
|
|
config:
|
|
api_key: ${env.BRAVE_SEARCH_API_KEY:=}
|
|
max_results: 3
|
|
- provider_id: tavily-search
|
|
provider_type: remote::tavily-search
|
|
config:
|
|
api_key: ${env.TAVILY_SEARCH_API_KEY:=}
|
|
max_results: 3
|
|
- provider_id: rag-runtime
|
|
provider_type: inline::rag-runtime
|
|
- provider_id: model-context-protocol
|
|
provider_type: remote::model-context-protocol
|
|
files:
|
|
- provider_id: meta-reference-files
|
|
provider_type: inline::localfs
|
|
config:
|
|
storage_dir: ${env.FILES_STORAGE_DIR:=~/.llama/distributions/watsonx/files}
|
|
metadata_store:
|
|
type: sqlite
|
|
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/watsonx}/files_metadata.db
|
|
metadata_store:
|
|
type: sqlite
|
|
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/watsonx}/registry.db
|
|
inference_store:
|
|
type: sqlite
|
|
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/watsonx}/inference_store.db
|
|
models:
|
|
- metadata: {}
|
|
model_id: meta-llama/llama-3-3-70b-instruct
|
|
provider_id: watsonx
|
|
provider_model_id: meta-llama/llama-3-3-70b-instruct
|
|
model_type: llm
|
|
- metadata: {}
|
|
model_id: meta-llama/Llama-3.3-70B-Instruct
|
|
provider_id: watsonx
|
|
provider_model_id: meta-llama/llama-3-3-70b-instruct
|
|
model_type: llm
|
|
- metadata: {}
|
|
model_id: meta-llama/llama-2-13b-chat
|
|
provider_id: watsonx
|
|
provider_model_id: meta-llama/llama-2-13b-chat
|
|
model_type: llm
|
|
- metadata: {}
|
|
model_id: meta-llama/Llama-2-13b
|
|
provider_id: watsonx
|
|
provider_model_id: meta-llama/llama-2-13b-chat
|
|
model_type: llm
|
|
- metadata: {}
|
|
model_id: meta-llama/llama-3-1-70b-instruct
|
|
provider_id: watsonx
|
|
provider_model_id: meta-llama/llama-3-1-70b-instruct
|
|
model_type: llm
|
|
- metadata: {}
|
|
model_id: meta-llama/Llama-3.1-70B-Instruct
|
|
provider_id: watsonx
|
|
provider_model_id: meta-llama/llama-3-1-70b-instruct
|
|
model_type: llm
|
|
- metadata: {}
|
|
model_id: meta-llama/llama-3-1-8b-instruct
|
|
provider_id: watsonx
|
|
provider_model_id: meta-llama/llama-3-1-8b-instruct
|
|
model_type: llm
|
|
- metadata: {}
|
|
model_id: meta-llama/Llama-3.1-8B-Instruct
|
|
provider_id: watsonx
|
|
provider_model_id: meta-llama/llama-3-1-8b-instruct
|
|
model_type: llm
|
|
- metadata: {}
|
|
model_id: meta-llama/llama-3-2-11b-vision-instruct
|
|
provider_id: watsonx
|
|
provider_model_id: meta-llama/llama-3-2-11b-vision-instruct
|
|
model_type: llm
|
|
- metadata: {}
|
|
model_id: meta-llama/Llama-3.2-11B-Vision-Instruct
|
|
provider_id: watsonx
|
|
provider_model_id: meta-llama/llama-3-2-11b-vision-instruct
|
|
model_type: llm
|
|
- metadata: {}
|
|
model_id: meta-llama/llama-3-2-1b-instruct
|
|
provider_id: watsonx
|
|
provider_model_id: meta-llama/llama-3-2-1b-instruct
|
|
model_type: llm
|
|
- metadata: {}
|
|
model_id: meta-llama/Llama-3.2-1B-Instruct
|
|
provider_id: watsonx
|
|
provider_model_id: meta-llama/llama-3-2-1b-instruct
|
|
model_type: llm
|
|
- metadata: {}
|
|
model_id: meta-llama/llama-3-2-3b-instruct
|
|
provider_id: watsonx
|
|
provider_model_id: meta-llama/llama-3-2-3b-instruct
|
|
model_type: llm
|
|
- metadata: {}
|
|
model_id: meta-llama/Llama-3.2-3B-Instruct
|
|
provider_id: watsonx
|
|
provider_model_id: meta-llama/llama-3-2-3b-instruct
|
|
model_type: llm
|
|
- metadata: {}
|
|
model_id: meta-llama/llama-3-2-90b-vision-instruct
|
|
provider_id: watsonx
|
|
provider_model_id: meta-llama/llama-3-2-90b-vision-instruct
|
|
model_type: llm
|
|
- metadata: {}
|
|
model_id: meta-llama/Llama-3.2-90B-Vision-Instruct
|
|
provider_id: watsonx
|
|
provider_model_id: meta-llama/llama-3-2-90b-vision-instruct
|
|
model_type: llm
|
|
- metadata: {}
|
|
model_id: meta-llama/llama-guard-3-11b-vision
|
|
provider_id: watsonx
|
|
provider_model_id: meta-llama/llama-guard-3-11b-vision
|
|
model_type: llm
|
|
- metadata: {}
|
|
model_id: meta-llama/Llama-Guard-3-11B-Vision
|
|
provider_id: watsonx
|
|
provider_model_id: meta-llama/llama-guard-3-11b-vision
|
|
model_type: llm
|
|
- metadata:
|
|
embedding_dimension: 384
|
|
model_id: all-MiniLM-L6-v2
|
|
provider_id: sentence-transformers
|
|
model_type: embedding
|
|
shields: []
|
|
vector_dbs: []
|
|
datasets: []
|
|
scoring_fns: []
|
|
benchmarks: []
|
|
tool_groups:
|
|
- toolgroup_id: builtin::websearch
|
|
provider_id: tavily-search
|
|
- toolgroup_id: builtin::rag
|
|
provider_id: rag-runtime
|
|
server:
|
|
port: 8321
|