further fixes according to investigation and PR comments

2025-12-03 09:53:45 +00:00 · 2025-11-30 17:31:07 +02:00 · 2025-11-30 17:31:07 +02:00 · c3c9edf981
commit c3c9edf981
parent 9b3c041af0
14 changed files with 212 additions and 30 deletions
--- a/src/llama_stack/providers/remote/inference/watsonx/watsonx.py
+++ b/src/llama_stack/providers/remote/inference/watsonx/watsonx.py
@ -15,6 +15,7 @@ from llama_stack.log import get_logger
 from llama_stack.providers.remote.inference.watsonx.config import WatsonXConfig
 from llama_stack.providers.utils.inference.litellm_openai_mixin import LiteLLMOpenAIMixin
 from llama_stack.providers.utils.inference.openai_compat import prepare_openai_completion_params
+from llama_stack.providers.utils.inference.stream_utils import wrap_async_stream
 from llama_stack_api import (
    Model,
    ModelType,
@ -178,7 +179,7 @@ class WatsonXInferenceAdapter(LiteLLMOpenAIMixin):
    async def openai_completion(
        self,
        params: OpenAICompletionRequestWithExtraBody,
-    ) -> OpenAICompletion:
+    ) -> OpenAICompletion | AsyncIterator[OpenAICompletion]:
        """
        Override parent method to add watsonx-specific parameters.
        """
@ -211,7 +212,12 @@ class WatsonXInferenceAdapter(LiteLLMOpenAIMixin):
            timeout=self.config.timeout,
            project_id=self.config.project_id,
        )
-        return await litellm.atext_completion(**request_params)
+        result = await litellm.atext_completion(**request_params)
+
+        if params.stream:
+            return wrap_async_stream(result)  # type: ignore[arg-type]  # LiteLLM streaming types
+
+        return result  # type: ignore[return-value]  # external lib lacks type stubs

    async def openai_embeddings(
        self,