mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-10-04 12:07:34 +00:00
chore: various watsonx fixes
* use a logger * update the distro to add the Files API otherwise it won't start since it is a dependency of vector * clarify project_id and api_key requirements * disable text_inference structured format tests * fixed openai client initialization Test plan: Execute text_inference: ``` WATSONX_API_KEY=... WATSONX_PROJECT_ID=... python -m llama_stack.core.server.server llama_stack/distributions/watsonx/run.yaml LLAMA_STACK_CONFIG=http://localhost:8321 uv run --group test pytest -vvvv -ra --text-model watsonx/meta-llama/llama-3-3-70b-instruct tests/integration/inference/test_text_inference.py ============================================= test session starts ============================================== platform darwin -- Python 3.12.8, pytest-8.4.2, pluggy-1.6.0 -- /Users/leseb/Documents/AI/llama-stack/.venv/bin/python3 cachedir: .pytest_cache metadata: {'Python': '3.12.8', 'Platform': 'macOS-15.6.1-arm64-arm-64bit', 'Packages': {'pytest': '8.4.2', 'pluggy': '1.6.0'}, 'Plugins': {'anyio': '4.9.0', 'html': '4.1.1', 'socket': '0.7.0', 'asyncio': '1.1.0', 'json-report': '1.5.0', 'timeout': '2.4.0', 'metadata': '3.1.1', 'cov': '6.2.1', 'nbval': '0.11.0', 'hydra-core': '1.3.2'}} rootdir: /Users/leseb/Documents/AI/llama-stack configfile: pyproject.toml plugins: anyio-4.9.0, html-4.1.1, socket-0.7.0, asyncio-1.1.0, json-report-1.5.0, timeout-2.4.0, metadata-3.1.1, cov-6.2.1, nbval-0.11.0, hydra-core-1.3.2 asyncio: mode=Mode.AUTO, asyncio_default_fixture_loop_scope=None, asyncio_default_test_loop_scope=function collected 20 items tests/integration/inference/test_text_inference.py::test_text_completion_non_streaming[txt=watsonx/meta-llama/llama-3-3-70b-instruct-inference:completion:sanity] PASSED [ 5%] tests/integration/inference/test_text_inference.py::test_text_completion_streaming[txt=watsonx/meta-llama/llama-3-3-70b-instruct-inference:completion:sanity] PASSED [ 10%] tests/integration/inference/test_text_inference.py::test_text_completion_stop_sequence[txt=watsonx/meta-llama/llama-3-3-70b-instruct-inference:completion:stop_sequence] XFAIL [ 15%] tests/integration/inference/test_text_inference.py::test_text_completion_log_probs_non_streaming[txt=watsonx/meta-llama/llama-3-3-70b-instruct-inference:completion:log_probs] XFAIL [ 20%] tests/integration/inference/test_text_inference.py::test_text_completion_log_probs_streaming[txt=watsonx/meta-llama/llama-3-3-70b-instruct-inference:completion:log_probs] XFAIL [ 25%] tests/integration/inference/test_text_inference.py::test_text_completion_structured_output[txt=watsonx/meta-llama/llama-3-3-70b-instruct-inference:completion:structured_output] SKIPPED structured output) [ 30%] tests/integration/inference/test_text_inference.py::test_text_chat_completion_non_streaming[txt=watsonx/meta-llama/llama-3-3-70b-instruct-inference:chat_completion:non_streaming_01] PASSED [ 35%] tests/integration/inference/test_text_inference.py::test_text_chat_completion_streaming[txt=watsonx/meta-llama/llama-3-3-70b-instruct-inference:chat_completion:streaming_01] PASSED [ 40%] tests/integration/inference/test_text_inference.py::test_text_chat_completion_with_tool_calling_and_non_streaming[txt=watsonx/meta-llama/llama-3-3-70b-instruct-inference:chat_completion:tool_calling] PASSED [ 45%] tests/integration/inference/test_text_inference.py::test_text_chat_completion_with_tool_calling_and_streaming[txt=watsonx/meta-llama/llama-3-3-70b-instruct-inference:chat_completion:tool_calling] PASSED [ 50%] tests/integration/inference/test_text_inference.py::test_text_chat_completion_with_tool_choice_required[txt=watsonx/meta-llama/llama-3-3-70b-instruct-inference:chat_completion:tool_calling] PASSED [ 55%] tests/integration/inference/test_text_inference.py::test_text_chat_completion_with_tool_choice_none[txt=watsonx/meta-llama/llama-3-3-70b-instruct-inference:chat_completion:tool_calling] PASSED [ 60%] tests/integration/inference/test_text_inference.py::test_text_chat_completion_structured_output[txt=watsonx/meta-llama/llama-3-3-70b-instruct-inference:chat_completion:structured_output] SKIPPEDstructured output) [ 65%] tests/integration/inference/test_text_inference.py::test_text_chat_completion_tool_calling_tools_not_in_request[txt=watsonx/meta-llama/llama-3-3-70b-instruct-inference:chat_completion:tool_calling_tools_absent-True] PASSED [ 70%] tests/integration/inference/test_text_inference.py::test_text_chat_completion_with_multi_turn_tool_calling[txt=watsonx/meta-llama/llama-3-3-70b-instruct-inference:chat_completion:text_then_tool] XFAIL [ 75%] tests/integration/inference/test_text_inference.py::test_text_chat_completion_non_streaming[txt=watsonx/meta-llama/llama-3-3-70b-instruct-inference:chat_completion:non_streaming_02] PASSED [ 80%] tests/integration/inference/test_text_inference.py::test_text_chat_completion_streaming[txt=watsonx/meta-llama/llama-3-3-70b-instruct-inference:chat_completion:streaming_02] PASSED [ 85%] tests/integration/inference/test_text_inference.py::test_text_chat_completion_tool_calling_tools_not_in_request[txt=watsonx/meta-llama/llama-3-3-70b-instruct-inference:chat_completion:tool_calling_tools_absent-False] PASSED [ 90%] tests/integration/inference/test_text_inference.py::test_text_chat_completion_with_multi_turn_tool_calling[txt=watsonx/meta-llama/llama-3-3-70b-instruct-inference:chat_completion:tool_then_answer] XFAIL [ 95%] tests/integration/inference/test_text_inference.py::test_text_chat_completion_with_multi_turn_tool_calling[txt=watsonx/meta-llama/llama-3-3-70b-instruct-inference:chat_completion:array_parameter] XFAIL [100%] =========================================== short test summary info ============================================ SKIPPED [2] tests/integration/inference/test_text_inference.py:49: Model watsonx/meta-llama/llama-3-3-70b-instruct hosted by remote::watsonx doesn't support json_schema structured output XFAIL tests/integration/inference/test_text_inference.py::test_text_completion_stop_sequence[txt=watsonx/meta-llama/llama-3-3-70b-instruct-inference:completion:stop_sequence] - remote::watsonx doesn't support 'stop' parameter yet XFAIL tests/integration/inference/test_text_inference.py::test_text_completion_log_probs_non_streaming[txt=watsonx/meta-llama/llama-3-3-70b-instruct-inference:completion:log_probs] - remote::watsonx doesn't support log probs yet XFAIL tests/integration/inference/test_text_inference.py::test_text_completion_log_probs_streaming[txt=watsonx/meta-llama/llama-3-3-70b-instruct-inference:completion:log_probs] - remote::watsonx doesn't support log probs yet XFAIL tests/integration/inference/test_text_inference.py::test_text_chat_completion_with_multi_turn_tool_calling[txt=watsonx/meta-llama/llama-3-3-70b-instruct-inference:chat_completion:text_then_tool] - Not tested for non-llama4 models yet XFAIL tests/integration/inference/test_text_inference.py::test_text_chat_completion_with_multi_turn_tool_calling[txt=watsonx/meta-llama/llama-3-3-70b-instruct-inference:chat_completion:tool_then_answer] - Not tested for non-llama4 models yet XFAIL tests/integration/inference/test_text_inference.py::test_text_chat_completion_with_multi_turn_tool_calling[txt=watsonx/meta-llama/llama-3-3-70b-instruct-inference:chat_completion:array_parameter] - Not tested for non-llama4 models yet ============================ 12 passed, 2 skipped, 6 xfailed, 14 warnings in 36.88s ============================ ``` Signed-off-by: Sébastien Han <seb@redhat.com>
This commit is contained in:
parent
f4ab154ade
commit
ed8b884a71
7 changed files with 46 additions and 9 deletions
|
@ -9,8 +9,8 @@ IBM WatsonX inference provider for accessing AI models on IBM's WatsonX platform
|
||||||
| Field | Type | Required | Default | Description |
|
| Field | Type | Required | Default | Description |
|
||||||
|-------|------|----------|---------|-------------|
|
|-------|------|----------|---------|-------------|
|
||||||
| `url` | `<class 'str'>` | No | https://us-south.ml.cloud.ibm.com | A base url for accessing the watsonx.ai |
|
| `url` | `<class 'str'>` | No | https://us-south.ml.cloud.ibm.com | A base url for accessing the watsonx.ai |
|
||||||
| `api_key` | `pydantic.types.SecretStr \| None` | No | | The watsonx API key, only needed of using the hosted service |
|
| `api_key` | `pydantic.types.SecretStr \| None` | No | | The watsonx API key |
|
||||||
| `project_id` | `str \| None` | No | | The Project ID key, only needed of using the hosted service |
|
| `project_id` | `str \| None` | No | | The Project ID key |
|
||||||
| `timeout` | `<class 'int'>` | No | 60 | Timeout for the HTTP requests |
|
| `timeout` | `<class 'int'>` | No | 60 | Timeout for the HTTP requests |
|
||||||
|
|
||||||
## Sample Configuration
|
## Sample Configuration
|
||||||
|
|
|
@ -10,6 +10,7 @@ apis:
|
||||||
- telemetry
|
- telemetry
|
||||||
- tool_runtime
|
- tool_runtime
|
||||||
- vector_io
|
- vector_io
|
||||||
|
- files
|
||||||
providers:
|
providers:
|
||||||
inference:
|
inference:
|
||||||
- provider_id: watsonx
|
- provider_id: watsonx
|
||||||
|
@ -94,6 +95,14 @@ providers:
|
||||||
provider_type: inline::rag-runtime
|
provider_type: inline::rag-runtime
|
||||||
- provider_id: model-context-protocol
|
- provider_id: model-context-protocol
|
||||||
provider_type: remote::model-context-protocol
|
provider_type: remote::model-context-protocol
|
||||||
|
files:
|
||||||
|
- provider_id: meta-reference-files
|
||||||
|
provider_type: inline::localfs
|
||||||
|
config:
|
||||||
|
storage_dir: ${env.FILES_STORAGE_DIR:=~/.llama/distributions/watsonx/files}
|
||||||
|
metadata_store:
|
||||||
|
type: sqlite
|
||||||
|
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/watsonx}/files_metadata.db
|
||||||
metadata_store:
|
metadata_store:
|
||||||
type: sqlite
|
type: sqlite
|
||||||
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/watsonx}/registry.db
|
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/watsonx}/registry.db
|
||||||
|
|
|
@ -9,6 +9,7 @@ from pathlib import Path
|
||||||
from llama_stack.apis.models import ModelType
|
from llama_stack.apis.models import ModelType
|
||||||
from llama_stack.core.datatypes import BuildProvider, ModelInput, Provider, ToolGroupInput
|
from llama_stack.core.datatypes import BuildProvider, ModelInput, Provider, ToolGroupInput
|
||||||
from llama_stack.distributions.template import DistributionTemplate, RunConfigSettings, get_model_registry
|
from llama_stack.distributions.template import DistributionTemplate, RunConfigSettings, get_model_registry
|
||||||
|
from llama_stack.providers.inline.files.localfs.config import LocalfsFilesImplConfig
|
||||||
from llama_stack.providers.inline.inference.sentence_transformers import (
|
from llama_stack.providers.inline.inference.sentence_transformers import (
|
||||||
SentenceTransformersInferenceConfig,
|
SentenceTransformersInferenceConfig,
|
||||||
)
|
)
|
||||||
|
@ -16,7 +17,7 @@ from llama_stack.providers.remote.inference.watsonx import WatsonXConfig
|
||||||
from llama_stack.providers.remote.inference.watsonx.models import MODEL_ENTRIES
|
from llama_stack.providers.remote.inference.watsonx.models import MODEL_ENTRIES
|
||||||
|
|
||||||
|
|
||||||
def get_distribution_template() -> DistributionTemplate:
|
def get_distribution_template(name: str = "watsonx") -> DistributionTemplate:
|
||||||
providers = {
|
providers = {
|
||||||
"inference": [
|
"inference": [
|
||||||
BuildProvider(provider_type="remote::watsonx"),
|
BuildProvider(provider_type="remote::watsonx"),
|
||||||
|
@ -42,6 +43,7 @@ def get_distribution_template() -> DistributionTemplate:
|
||||||
BuildProvider(provider_type="inline::rag-runtime"),
|
BuildProvider(provider_type="inline::rag-runtime"),
|
||||||
BuildProvider(provider_type="remote::model-context-protocol"),
|
BuildProvider(provider_type="remote::model-context-protocol"),
|
||||||
],
|
],
|
||||||
|
"files": [BuildProvider(provider_type="inline::localfs")],
|
||||||
}
|
}
|
||||||
|
|
||||||
inference_provider = Provider(
|
inference_provider = Provider(
|
||||||
|
@ -79,9 +81,14 @@ def get_distribution_template() -> DistributionTemplate:
|
||||||
},
|
},
|
||||||
)
|
)
|
||||||
|
|
||||||
|
files_provider = Provider(
|
||||||
|
provider_id="meta-reference-files",
|
||||||
|
provider_type="inline::localfs",
|
||||||
|
config=LocalfsFilesImplConfig.sample_run_config(f"~/.llama/distributions/{name}"),
|
||||||
|
)
|
||||||
default_models, _ = get_model_registry(available_models)
|
default_models, _ = get_model_registry(available_models)
|
||||||
return DistributionTemplate(
|
return DistributionTemplate(
|
||||||
name="watsonx",
|
name=name,
|
||||||
distro_type="remote_hosted",
|
distro_type="remote_hosted",
|
||||||
description="Use watsonx for running LLM inference",
|
description="Use watsonx for running LLM inference",
|
||||||
container_image=None,
|
container_image=None,
|
||||||
|
@ -92,6 +99,7 @@ def get_distribution_template() -> DistributionTemplate:
|
||||||
"run.yaml": RunConfigSettings(
|
"run.yaml": RunConfigSettings(
|
||||||
provider_overrides={
|
provider_overrides={
|
||||||
"inference": [inference_provider, embedding_provider],
|
"inference": [inference_provider, embedding_provider],
|
||||||
|
"files": [files_provider],
|
||||||
},
|
},
|
||||||
default_models=default_models + [embedding_model],
|
default_models=default_models + [embedding_model],
|
||||||
default_tool_groups=default_tool_groups,
|
default_tool_groups=default_tool_groups,
|
||||||
|
|
|
@ -26,11 +26,11 @@ class WatsonXConfig(BaseModel):
|
||||||
)
|
)
|
||||||
api_key: SecretStr | None = Field(
|
api_key: SecretStr | None = Field(
|
||||||
default_factory=lambda: os.getenv("WATSONX_API_KEY"),
|
default_factory=lambda: os.getenv("WATSONX_API_KEY"),
|
||||||
description="The watsonx API key, only needed of using the hosted service",
|
description="The watsonx API key",
|
||||||
)
|
)
|
||||||
project_id: str | None = Field(
|
project_id: str | None = Field(
|
||||||
default_factory=lambda: os.getenv("WATSONX_PROJECT_ID"),
|
default_factory=lambda: os.getenv("WATSONX_PROJECT_ID"),
|
||||||
description="The Project ID key, only needed of using the hosted service",
|
description="The Project ID key",
|
||||||
)
|
)
|
||||||
timeout: int = Field(
|
timeout: int = Field(
|
||||||
default=60,
|
default=60,
|
||||||
|
|
|
@ -38,6 +38,7 @@ from llama_stack.apis.inference import (
|
||||||
TopKSamplingStrategy,
|
TopKSamplingStrategy,
|
||||||
TopPSamplingStrategy,
|
TopPSamplingStrategy,
|
||||||
)
|
)
|
||||||
|
from llama_stack.log import get_logger
|
||||||
from llama_stack.providers.utils.inference.model_registry import ModelRegistryHelper
|
from llama_stack.providers.utils.inference.model_registry import ModelRegistryHelper
|
||||||
from llama_stack.providers.utils.inference.openai_compat import (
|
from llama_stack.providers.utils.inference.openai_compat import (
|
||||||
OpenAICompatCompletionChoice,
|
OpenAICompatCompletionChoice,
|
||||||
|
@ -57,14 +58,29 @@ from llama_stack.providers.utils.inference.prompt_adapter import (
|
||||||
from . import WatsonXConfig
|
from . import WatsonXConfig
|
||||||
from .models import MODEL_ENTRIES
|
from .models import MODEL_ENTRIES
|
||||||
|
|
||||||
|
logger = get_logger(name=__name__, category="inference::watsonx")
|
||||||
|
|
||||||
|
|
||||||
|
# Note on structured output
|
||||||
|
# WatsonX returns responses with a json embedded into a string.
|
||||||
|
# Examples:
|
||||||
|
|
||||||
|
# ChatCompletionResponse(completion_message=CompletionMessage(content='```json\n{\n
|
||||||
|
# "first_name": "Michael",\n "last_name": "Jordan",\n'...)
|
||||||
|
# Not even a valid JSON, but we can still extract the JSON from the content
|
||||||
|
|
||||||
|
# CompletionResponse(content=' \nThe best answer is $\\boxed{\\{"name": "Michael Jordan",
|
||||||
|
# "year_born": "1963", "year_retired": "2003"\\}}$')
|
||||||
|
# Find the start of the boxed content
|
||||||
|
|
||||||
|
|
||||||
class WatsonXInferenceAdapter(Inference, ModelRegistryHelper):
|
class WatsonXInferenceAdapter(Inference, ModelRegistryHelper):
|
||||||
def __init__(self, config: WatsonXConfig) -> None:
|
def __init__(self, config: WatsonXConfig) -> None:
|
||||||
ModelRegistryHelper.__init__(self, MODEL_ENTRIES)
|
ModelRegistryHelper.__init__(self, MODEL_ENTRIES)
|
||||||
|
|
||||||
print(f"Initializing watsonx InferenceAdapter({config.url})...")
|
logger.info(f"Initializing watsonx InferenceAdapter({config.url})...")
|
||||||
|
|
||||||
self._config = config
|
self._config = config
|
||||||
|
self._openai_client: AsyncOpenAI | None = None
|
||||||
|
|
||||||
self._project_id = self._config.project_id
|
self._project_id = self._config.project_id
|
||||||
|
|
||||||
|
|
|
@ -58,6 +58,7 @@ def skip_if_model_doesnt_support_openai_completion(client_with_models, model_id)
|
||||||
# does not work with the specified model, gpt-5-mini. Please choose different model and try
|
# does not work with the specified model, gpt-5-mini. Please choose different model and try
|
||||||
# again. You can learn more about which models can be used with each operation here:
|
# again. You can learn more about which models can be used with each operation here:
|
||||||
# https://go.microsoft.com/fwlink/?linkid=2197993.'}}"}
|
# https://go.microsoft.com/fwlink/?linkid=2197993.'}}"}
|
||||||
|
"remote::watsonx", # return 404 when hitting the /openai/v1 endpoint
|
||||||
):
|
):
|
||||||
pytest.skip(f"Model {model_id} hosted by {provider.provider_type} doesn't support OpenAI completions.")
|
pytest.skip(f"Model {model_id} hosted by {provider.provider_type} doesn't support OpenAI completions.")
|
||||||
|
|
||||||
|
@ -110,6 +111,8 @@ def skip_if_model_doesnt_support_openai_chat_completion(client_with_models, mode
|
||||||
"remote::cerebras",
|
"remote::cerebras",
|
||||||
"remote::databricks",
|
"remote::databricks",
|
||||||
"remote::runpod",
|
"remote::runpod",
|
||||||
|
"remote::tgi",
|
||||||
|
"remote::watsonx", # watsonx returns 404 when hitting the /openai/v1 endpoint
|
||||||
):
|
):
|
||||||
pytest.skip(f"Model {model_id} hosted by {provider.provider_type} doesn't support OpenAI chat completions.")
|
pytest.skip(f"Model {model_id} hosted by {provider.provider_type} doesn't support OpenAI chat completions.")
|
||||||
|
|
||||||
|
|
|
@ -45,7 +45,7 @@ def skip_if_model_doesnt_support_json_schema_structured_output(client_with_model
|
||||||
provider_id = models[model_id].provider_id
|
provider_id = models[model_id].provider_id
|
||||||
providers = {p.provider_id: p for p in client_with_models.providers.list()}
|
providers = {p.provider_id: p for p in client_with_models.providers.list()}
|
||||||
provider = providers[provider_id]
|
provider = providers[provider_id]
|
||||||
if provider.provider_type in ("remote::sambanova", "remote::azure"):
|
if provider.provider_type in ("remote::sambanova", "remote::azure", "remote::watsonx"):
|
||||||
pytest.skip(
|
pytest.skip(
|
||||||
f"Model {model_id} hosted by {provider.provider_type} doesn't support json_schema structured output"
|
f"Model {model_id} hosted by {provider.provider_type} doesn't support json_schema structured output"
|
||||||
)
|
)
|
||||||
|
@ -211,6 +211,7 @@ def test_text_completion_log_probs_streaming(client_with_models, text_model_id,
|
||||||
)
|
)
|
||||||
def test_text_completion_structured_output(client_with_models, text_model_id, test_case):
|
def test_text_completion_structured_output(client_with_models, text_model_id, test_case):
|
||||||
skip_if_model_doesnt_support_completion(client_with_models, text_model_id)
|
skip_if_model_doesnt_support_completion(client_with_models, text_model_id)
|
||||||
|
skip_if_model_doesnt_support_json_schema_structured_output(client_with_models, text_model_id)
|
||||||
|
|
||||||
class AnswerFormat(BaseModel):
|
class AnswerFormat(BaseModel):
|
||||||
name: str
|
name: str
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue