fix: Fixed WatsonX remote inference provider (#3801)

# What does this PR do? This PR fixes issues with the WatsonX provider so it works correctly with LiteLLM. The main problem was that WatsonX requests failed because the provider data validator didn’t properly handle the API key and project ID. This was fixed by updating the WatsonXProviderDataValidator and ensuring the provider data is loaded correctly. The openai_chat_completion method was also updated to match the behavior of other providers while adding WatsonX-specific fields like project_id. It still calls await super().openai_chat_completion.__func__(self, params) to keep the existing setup and tracing logic. After these changes, WatsonX requests now run correctly. ## Test Plan The changes were tested by running chat completion requests and confirming that credentials and project parameters are passed correctly. I have tested with my WatsonX credentials, by using the cli with `uv run llama-stack-client inference chat-completion --session` --------- Signed-off-by: Sébastien Han <seb@redhat.com> Co-authored-by: Sébastien Han <seb@redhat.com>
2025-12-03 09:53:45 +00:00 · 2025-10-14 14:52:32 +02:00 · 2025-10-14 14:52:32 +02:00 · 0dbf79c328
commit 0dbf79c328
parent 1136daf310
5 changed files with 254 additions and 26 deletions
--- a/tests/integration/inference/test_openai_completion.py
+++ b/tests/integration/inference/test_openai_completion.py
@ -58,7 +58,6 @@ def skip_if_model_doesnt_support_openai_completion(client_with_models, model_id)
        #  does not work with the specified model, gpt-5-mini. Please choose different model and try
        #  again. You can learn more about which models can be used with each operation here:
        #  https://go.microsoft.com/fwlink/?linkid=2197993.'}}"}
-        "remote::watsonx",  # return 404 when hitting the /openai/v1 endpoint
        "remote::llama-openai-compat",
    ):
        pytest.skip(f"Model {model_id} hosted by {provider.provider_type} doesn't support OpenAI completions.")
@ -68,6 +67,7 @@ def skip_if_doesnt_support_completions_logprobs(client_with_models, model_id):
    provider_type = provider_from_model(client_with_models, model_id).provider_type
    if provider_type in (
        "remote::ollama",  # logprobs is ignored
+        "remote::watsonx",
    ):
        pytest.skip(f"Model {model_id} hosted by {provider_type} doesn't support /v1/completions logprobs.")

@ -110,6 +110,7 @@ def skip_if_doesnt_support_n(client_with_models, model_id):
        # Error code 400 - {'message': '"n" > 1 is not currently supported', 'type': 'invalid_request_error', 'param': 'n', 'code': 'wrong_api_format'}
        "remote::cerebras",
        "remote::databricks",  # Bad request: parameter "n" must be equal to 1 for streaming mode
+        "remote::watsonx",
    ):
        pytest.skip(f"Model {model_id} hosted by {provider.provider_type} doesn't support n param.")

@ -124,7 +125,6 @@ def skip_if_model_doesnt_support_openai_chat_completion(client_with_models, mode
        "remote::databricks",
        "remote::cerebras",
        "remote::runpod",
-        "remote::watsonx",  # watsonx returns 404 when hitting the /openai/v1 endpoint
    ):
        pytest.skip(f"Model {model_id} hosted by {provider.provider_type} doesn't support OpenAI chat completions.")

@ -508,6 +508,12 @@ def test_openai_chat_completion_non_streaming_with_file(openai_client, client_wi
    assert "hello world" in normalized_content


+def skip_if_doesnt_support_completions_stop_sequence(client_with_models, model_id):
+    provider_type = provider_from_model(client_with_models, model_id).provider_type
+    if provider_type in ("remote::watsonx",):  # openai.BadRequestError: Error code: 400
+        pytest.skip(f"Model {model_id} hosted by {provider_type} doesn't support /v1/completions stop sequence.")
+
+
@pytest.mark.parametrize(
    "test_case",
    [
@ -516,6 +522,7 @@ def test_openai_chat_completion_non_streaming_with_file(openai_client, client_wi
 )
 def test_openai_completion_stop_sequence(client_with_models, openai_client, text_model_id, test_case):
    skip_if_model_doesnt_support_openai_completion(client_with_models, text_model_id)
+    skip_if_doesnt_support_completions_stop_sequence(client_with_models, text_model_id)

    tc = TestCase(test_case)

--- a/tests/integration/inference/test_openai_embeddings.py
+++ b/tests/integration/inference/test_openai_embeddings.py
@ -50,11 +50,15 @@ def skip_if_model_doesnt_support_encoding_format_base64(client, model_id):

 def skip_if_model_doesnt_support_variable_dimensions(client_with_models, model_id):
    provider = provider_from_model(client_with_models, model_id)
-    if provider.provider_type in (
-        "remote::together",  # returns 400
-        "inline::sentence-transformers",
-        # Error code: 400 - {'error_code': 'BAD_REQUEST', 'message': 'Bad request: json: unknown field "dimensions"\n'}
-        "remote::databricks",
+    if (
+        provider.provider_type
+        in (
+            "remote::together",  # returns 400
+            "inline::sentence-transformers",
+            # Error code: 400 - {'error_code': 'BAD_REQUEST', 'message': 'Bad request: json: unknown field "dimensions"\n'}
+            "remote::databricks",
+            "remote::watsonx",  # openai.BadRequestError: Error code: 400 - {'detail': "litellm.UnsupportedParamsError: watsonx does not support parameters: {'dimensions': 384}
+        )
    ):
        pytest.skip(
            f"Model {model_id} hosted by {provider.provider_type} does not support variable output embedding dimensions."