mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-10-04 12:07:34 +00:00
# What does this PR do? use a logger * update the distro to add the Files API otherwise it won't start since it is a dependency of vector * clarify project_id and api_key requirements * disable openai compatible calls since the endpoint returns 404 * disable text_inference structured format tests * fixed openai client initialization ## Test Plan Execute text_inference: ``` WATSONX_API_KEY=... WATSONX_PROJECT_ID=... python -m llama_stack.core.server.server llama_stack/distributions/watsonx/run.yaml LLAMA_STACK_CONFIG=http://localhost:8321 uv run --group test pytest -vvvv -ra --text-model watsonx/meta-llama/llama-3-3-70b-instruct tests/integration/inference/test_text_inference.py ============================================= test session starts ============================================== platform darwin -- Python 3.12.8, pytest-8.4.2, pluggy-1.6.0 -- /Users/leseb/Documents/AI/llama-stack/.venv/bin/python3 cachedir: .pytest_cache metadata: {'Python': '3.12.8', 'Platform': 'macOS-15.6.1-arm64-arm-64bit', 'Packages': {'pytest': '8.4.2', 'pluggy': '1.6.0'}, 'Plugins': {'anyio': '4.9.0', 'html': '4.1.1', 'socket': '0.7.0', 'asyncio': '1.1.0', 'json-report': '1.5.0', 'timeout': '2.4.0', 'metadata': '3.1.1', 'cov': '6.2.1', 'nbval': '0.11.0', 'hydra-core': '1.3.2'}} rootdir: /Users/leseb/Documents/AI/llama-stack configfile: pyproject.toml plugins: anyio-4.9.0, html-4.1.1, socket-0.7.0, asyncio-1.1.0, json-report-1.5.0, timeout-2.4.0, metadata-3.1.1, cov-6.2.1, nbval-0.11.0, hydra-core-1.3.2 asyncio: mode=Mode.AUTO, asyncio_default_fixture_loop_scope=None, asyncio_default_test_loop_scope=function collected 20 items tests/integration/inference/test_text_inference.py::test_text_completion_non_streaming[txt=watsonx/meta-llama/llama-3-3-70b-instruct-inference:completion:sanity] PASSED [ 5%] tests/integration/inference/test_text_inference.py::test_text_completion_streaming[txt=watsonx/meta-llama/llama-3-3-70b-instruct-inference:completion:sanity] PASSED [ 10%] tests/integration/inference/test_text_inference.py::test_text_completion_stop_sequence[txt=watsonx/meta-llama/llama-3-3-70b-instruct-inference:completion:stop_sequence] XFAIL [ 15%] tests/integration/inference/test_text_inference.py::test_text_completion_log_probs_non_streaming[txt=watsonx/meta-llama/llama-3-3-70b-instruct-inference:completion:log_probs] XFAIL [ 20%] tests/integration/inference/test_text_inference.py::test_text_completion_log_probs_streaming[txt=watsonx/meta-llama/llama-3-3-70b-instruct-inference:completion:log_probs] XFAIL [ 25%] tests/integration/inference/test_text_inference.py::test_text_completion_structured_output[txt=watsonx/meta-llama/llama-3-3-70b-instruct-inference:completion:structured_output] SKIPPED structured output) [ 30%] tests/integration/inference/test_text_inference.py::test_text_chat_completion_non_streaming[txt=watsonx/meta-llama/llama-3-3-70b-instruct-inference:chat_completion:non_streaming_01] PASSED [ 35%] tests/integration/inference/test_text_inference.py::test_text_chat_completion_streaming[txt=watsonx/meta-llama/llama-3-3-70b-instruct-inference:chat_completion:streaming_01] PASSED [ 40%] tests/integration/inference/test_text_inference.py::test_text_chat_completion_with_tool_calling_and_non_streaming[txt=watsonx/meta-llama/llama-3-3-70b-instruct-inference:chat_completion:tool_calling] PASSED [ 45%] tests/integration/inference/test_text_inference.py::test_text_chat_completion_with_tool_calling_and_streaming[txt=watsonx/meta-llama/llama-3-3-70b-instruct-inference:chat_completion:tool_calling] PASSED [ 50%] tests/integration/inference/test_text_inference.py::test_text_chat_completion_with_tool_choice_required[txt=watsonx/meta-llama/llama-3-3-70b-instruct-inference:chat_completion:tool_calling] PASSED [ 55%] tests/integration/inference/test_text_inference.py::test_text_chat_completion_with_tool_choice_none[txt=watsonx/meta-llama/llama-3-3-70b-instruct-inference:chat_completion:tool_calling] PASSED [ 60%] tests/integration/inference/test_text_inference.py::test_text_chat_completion_structured_output[txt=watsonx/meta-llama/llama-3-3-70b-instruct-inference:chat_completion:structured_output] SKIPPEDstructured output) [ 65%] tests/integration/inference/test_text_inference.py::test_text_chat_completion_tool_calling_tools_not_in_request[txt=watsonx/meta-llama/llama-3-3-70b-instruct-inference:chat_completion:tool_calling_tools_absent-True] PASSED [ 70%] tests/integration/inference/test_text_inference.py::test_text_chat_completion_with_multi_turn_tool_calling[txt=watsonx/meta-llama/llama-3-3-70b-instruct-inference:chat_completion:text_then_tool] XFAIL [ 75%] tests/integration/inference/test_text_inference.py::test_text_chat_completion_non_streaming[txt=watsonx/meta-llama/llama-3-3-70b-instruct-inference:chat_completion:non_streaming_02] PASSED [ 80%] tests/integration/inference/test_text_inference.py::test_text_chat_completion_streaming[txt=watsonx/meta-llama/llama-3-3-70b-instruct-inference:chat_completion:streaming_02] PASSED [ 85%] tests/integration/inference/test_text_inference.py::test_text_chat_completion_tool_calling_tools_not_in_request[txt=watsonx/meta-llama/llama-3-3-70b-instruct-inference:chat_completion:tool_calling_tools_absent-False] PASSED [ 90%] tests/integration/inference/test_text_inference.py::test_text_chat_completion_with_multi_turn_tool_calling[txt=watsonx/meta-llama/llama-3-3-70b-instruct-inference:chat_completion:tool_then_answer] XFAIL [ 95%] tests/integration/inference/test_text_inference.py::test_text_chat_completion_with_multi_turn_tool_calling[txt=watsonx/meta-llama/llama-3-3-70b-instruct-inference:chat_completion:array_parameter] XFAIL [100%] =========================================== short test summary info ============================================ SKIPPED [2] tests/integration/inference/test_text_inference.py:49: Model watsonx/meta-llama/llama-3-3-70b-instruct hosted by remote::watsonx doesn't support json_schema structured output XFAIL tests/integration/inference/test_text_inference.py::test_text_completion_stop_sequence[txt=watsonx/meta-llama/llama-3-3-70b-instruct-inference:completion:stop_sequence] - remote::watsonx doesn't support 'stop' parameter yet XFAIL tests/integration/inference/test_text_inference.py::test_text_completion_log_probs_non_streaming[txt=watsonx/meta-llama/llama-3-3-70b-instruct-inference:completion:log_probs] - remote::watsonx doesn't support log probs yet XFAIL tests/integration/inference/test_text_inference.py::test_text_completion_log_probs_streaming[txt=watsonx/meta-llama/llama-3-3-70b-instruct-inference:completion:log_probs] - remote::watsonx doesn't support log probs yet XFAIL tests/integration/inference/test_text_inference.py::test_text_chat_completion_with_multi_turn_tool_calling[txt=watsonx/meta-llama/llama-3-3-70b-instruct-inference:chat_completion:text_then_tool] - Not tested for non-llama4 models yet XFAIL tests/integration/inference/test_text_inference.py::test_text_chat_completion_with_multi_turn_tool_calling[txt=watsonx/meta-llama/llama-3-3-70b-instruct-inference:chat_completion:tool_then_answer] - Not tested for non-llama4 models yet XFAIL tests/integration/inference/test_text_inference.py::test_text_chat_completion_with_multi_turn_tool_calling[txt=watsonx/meta-llama/llama-3-3-70b-instruct-inference:chat_completion:array_parameter] - Not tested for non-llama4 models yet ============================ 12 passed, 2 skipped, 6 xfailed, 14 warnings in 36.88s ============================ ``` --------- Signed-off-by: Sébastien Han <seb@redhat.com>
122 lines
4.5 KiB
Python
122 lines
4.5 KiB
Python
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
# All rights reserved.
|
|
#
|
|
# This source code is licensed under the terms described in the LICENSE file in
|
|
# the root directory of this source tree.
|
|
|
|
from pathlib import Path
|
|
|
|
from llama_stack.apis.models import ModelType
|
|
from llama_stack.core.datatypes import BuildProvider, ModelInput, Provider, ToolGroupInput
|
|
from llama_stack.distributions.template import DistributionTemplate, RunConfigSettings, get_model_registry
|
|
from llama_stack.providers.inline.files.localfs.config import LocalfsFilesImplConfig
|
|
from llama_stack.providers.inline.inference.sentence_transformers import (
|
|
SentenceTransformersInferenceConfig,
|
|
)
|
|
from llama_stack.providers.remote.inference.watsonx import WatsonXConfig
|
|
from llama_stack.providers.remote.inference.watsonx.models import MODEL_ENTRIES
|
|
|
|
|
|
def get_distribution_template(name: str = "watsonx") -> DistributionTemplate:
|
|
providers = {
|
|
"inference": [
|
|
BuildProvider(provider_type="remote::watsonx"),
|
|
BuildProvider(provider_type="inline::sentence-transformers"),
|
|
],
|
|
"vector_io": [BuildProvider(provider_type="inline::faiss")],
|
|
"safety": [BuildProvider(provider_type="inline::llama-guard")],
|
|
"agents": [BuildProvider(provider_type="inline::meta-reference")],
|
|
"telemetry": [BuildProvider(provider_type="inline::meta-reference")],
|
|
"eval": [BuildProvider(provider_type="inline::meta-reference")],
|
|
"datasetio": [
|
|
BuildProvider(provider_type="remote::huggingface"),
|
|
BuildProvider(provider_type="inline::localfs"),
|
|
],
|
|
"scoring": [
|
|
BuildProvider(provider_type="inline::basic"),
|
|
BuildProvider(provider_type="inline::llm-as-judge"),
|
|
BuildProvider(provider_type="inline::braintrust"),
|
|
],
|
|
"tool_runtime": [
|
|
BuildProvider(provider_type="remote::brave-search"),
|
|
BuildProvider(provider_type="remote::tavily-search"),
|
|
BuildProvider(provider_type="inline::rag-runtime"),
|
|
BuildProvider(provider_type="remote::model-context-protocol"),
|
|
],
|
|
"files": [BuildProvider(provider_type="inline::localfs")],
|
|
}
|
|
|
|
inference_provider = Provider(
|
|
provider_id="watsonx",
|
|
provider_type="remote::watsonx",
|
|
config=WatsonXConfig.sample_run_config(),
|
|
)
|
|
|
|
embedding_provider = Provider(
|
|
provider_id="sentence-transformers",
|
|
provider_type="inline::sentence-transformers",
|
|
config=SentenceTransformersInferenceConfig.sample_run_config(),
|
|
)
|
|
|
|
available_models = {
|
|
"watsonx": MODEL_ENTRIES,
|
|
}
|
|
default_tool_groups = [
|
|
ToolGroupInput(
|
|
toolgroup_id="builtin::websearch",
|
|
provider_id="tavily-search",
|
|
),
|
|
ToolGroupInput(
|
|
toolgroup_id="builtin::rag",
|
|
provider_id="rag-runtime",
|
|
),
|
|
]
|
|
|
|
embedding_model = ModelInput(
|
|
model_id="all-MiniLM-L6-v2",
|
|
provider_id="sentence-transformers",
|
|
model_type=ModelType.embedding,
|
|
metadata={
|
|
"embedding_dimension": 384,
|
|
},
|
|
)
|
|
|
|
files_provider = Provider(
|
|
provider_id="meta-reference-files",
|
|
provider_type="inline::localfs",
|
|
config=LocalfsFilesImplConfig.sample_run_config(f"~/.llama/distributions/{name}"),
|
|
)
|
|
default_models, _ = get_model_registry(available_models)
|
|
return DistributionTemplate(
|
|
name=name,
|
|
distro_type="remote_hosted",
|
|
description="Use watsonx for running LLM inference",
|
|
container_image=None,
|
|
template_path=Path(__file__).parent / "doc_template.md",
|
|
providers=providers,
|
|
available_models_by_provider=available_models,
|
|
run_configs={
|
|
"run.yaml": RunConfigSettings(
|
|
provider_overrides={
|
|
"inference": [inference_provider, embedding_provider],
|
|
"files": [files_provider],
|
|
},
|
|
default_models=default_models + [embedding_model],
|
|
default_tool_groups=default_tool_groups,
|
|
),
|
|
},
|
|
run_config_env_vars={
|
|
"LLAMASTACK_PORT": (
|
|
"5001",
|
|
"Port for the Llama Stack distribution server",
|
|
),
|
|
"WATSONX_API_KEY": (
|
|
"",
|
|
"watsonx API Key",
|
|
),
|
|
"WATSONX_PROJECT_ID": (
|
|
"",
|
|
"watsonx Project ID",
|
|
),
|
|
},
|
|
)
|