test

# What does this PR do? ## Test Plan # What does this PR do? ## Test Plan # What does this PR do? ## Test Plan Completes the refactoring started in previous commit by: 1. **Fix library client** (critical): Add logic to detect Pydantic model parameters and construct them properly from request bodies. The key fix is to NOT exclude any params when converting the body for Pydantic models - we need all fields to pass to the Pydantic constructor. Before: _convert_body excluded all params, leaving body empty for Pydantic construction After: Check for Pydantic params first, skip exclusion, construct model with full body 2. **Update remaining providers** to use new Pydantic-based signatures: - litellm_openai_mixin: Extract extra fields via __pydantic_extra__ - databricks: Use TYPE_CHECKING import for params type - llama_openai_compat: Use TYPE_CHECKING import for params type - sentence_transformers: Update method signatures to use params 3. **Update unit tests** to use new Pydantic signature: - test_openai_mixin.py: Use OpenAIChatCompletionRequestParams This fixes test failures where the library client was trying to construct Pydantic models with empty dictionaries. The previous fix had a bug: it called _convert_body() which only keeps fields that match function parameter names. For Pydantic methods with signature: openai_chat_completion(params: OpenAIChatCompletionRequestParams) The signature only has 'params', but the body has 'model', 'messages', etc. So _convert_body() returned an empty dict. Fix: Skip _convert_body() entirely for Pydantic params. Use the raw body directly to construct the Pydantic model (after stripping NOT_GIVENs). This properly fixes the ValidationError where required fields were missing. The streaming code path (_call_streaming) had the same issue as non-streaming: it called _convert_body() which returned empty dict for Pydantic params. Applied the same fix as commit 7476c0ae: - Detect Pydantic model parameters before body conversion - Skip _convert_body() for Pydantic params - Construct Pydantic model directly from raw body (after stripping NOT_GIVENs) This fixes streaming endpoints like openai_chat_completion with stream=True. The streaming code path (_call_streaming) had the same issue as non-streaming: it called _convert_body() which returned empty dict for Pydantic params. Applied the same fix as commit 7476c0ae: - Detect Pydantic model parameters before body conversion - Skip _convert_body() for Pydantic params - Construct Pydantic model directly from raw body (after stripping NOT_GIVENs) This fixes streaming endpoints like openai_chat_completion with stream=True.
2025-10-10 21:34:36 +00:00 · 2025-10-09 13:53:31 -07:00 · 2025-10-09 13:53:31 -07:00 · a93130e323
commit a93130e323
parent 26fd5dbd34
295 changed files with 51966 additions and 3051 deletions
--- a/llama_stack/providers/remote/inference/passthrough/passthrough.py
+++ b/llama_stack/providers/remote/inference/passthrough/passthrough.py
@ -13,15 +13,14 @@ from llama_stack.apis.inference import (
    Inference,
    OpenAIChatCompletion,
    OpenAIChatCompletionChunk,
+    OpenAIChatCompletionRequestParams,
    OpenAICompletion,
+    OpenAICompletionRequestParams,
    OpenAIEmbeddingsResponse,
-    OpenAIMessageParam,
-    OpenAIResponseFormatParam,
 )
 from llama_stack.apis.models import Model
 from llama_stack.core.library_client import convert_pydantic_to_json_value
 from llama_stack.providers.utils.inference.model_registry import ModelRegistryHelper
-from llama_stack.providers.utils.inference.openai_compat import prepare_openai_completion_params

 from .config import PassthroughImplConfig

@ -80,110 +79,33 @@ class PassthroughInferenceAdapter(Inference):

    async def openai_completion(
        self,
-        model: str,
-        prompt: str | list[str] | list[int] | list[list[int]],
-        best_of: int | None = None,
-        echo: bool | None = None,
-        frequency_penalty: float | None = None,
-        logit_bias: dict[str, float] | None = None,
-        logprobs: bool | None = None,
-        max_tokens: int | None = None,
-        n: int | None = None,
-        presence_penalty: float | None = None,
-        seed: int | None = None,
-        stop: str | list[str] | None = None,
-        stream: bool | None = None,
-        stream_options: dict[str, Any] | None = None,
-        temperature: float | None = None,
-        top_p: float | None = None,
-        user: str | None = None,
-        guided_choice: list[str] | None = None,
-        prompt_logprobs: int | None = None,
-        suffix: str | None = None,
+        params: OpenAICompletionRequestParams,
    ) -> OpenAICompletion:
        client = self._get_client()
-        model_obj = await self.model_store.get_model(model)
+        model_obj = await self.model_store.get_model(params.model)

-        params = await prepare_openai_completion_params(
-            model=model_obj.provider_resource_id,
-            prompt=prompt,
-            best_of=best_of,
-            echo=echo,
-            frequency_penalty=frequency_penalty,
-            logit_bias=logit_bias,
-            logprobs=logprobs,
-            max_tokens=max_tokens,
-            n=n,
-            presence_penalty=presence_penalty,
-            seed=seed,
-            stop=stop,
-            stream=stream,
-            stream_options=stream_options,
-            temperature=temperature,
-            top_p=top_p,
-            user=user,
-            guided_choice=guided_choice,
-            prompt_logprobs=prompt_logprobs,
-        )
+        # Update model with provider resource ID
+        params.model = model_obj.provider_resource_id

-        return await client.inference.openai_completion(**params)
+        # Convert Pydantic model to dict, including extra fields
+        request_params = params.model_dump(exclude_none=True)
+
+        return await client.inference.openai_completion(**request_params)

    async def openai_chat_completion(
        self,
-        model: str,
-        messages: list[OpenAIMessageParam],
-        frequency_penalty: float | None = None,
-        function_call: str | dict[str, Any] | None = None,
-        functions: list[dict[str, Any]] | None = None,
-        logit_bias: dict[str, float] | None = None,
-        logprobs: bool | None = None,
-        max_completion_tokens: int | None = None,
-        max_tokens: int | None = None,
-        n: int | None = None,
-        parallel_tool_calls: bool | None = None,
-        presence_penalty: float | None = None,
-        response_format: OpenAIResponseFormatParam | None = None,
-        seed: int | None = None,
-        stop: str | list[str] | None = None,
-        stream: bool | None = None,
-        stream_options: dict[str, Any] | None = None,
-        temperature: float | None = None,
-        tool_choice: str | dict[str, Any] | None = None,
-        tools: list[dict[str, Any]] | None = None,
-        top_logprobs: int | None = None,
-        top_p: float | None = None,
-        user: str | None = None,
+        params: OpenAIChatCompletionRequestParams,
    ) -> OpenAIChatCompletion | AsyncIterator[OpenAIChatCompletionChunk]:
        client = self._get_client()
-        model_obj = await self.model_store.get_model(model)
+        model_obj = await self.model_store.get_model(params.model)

-        params = await prepare_openai_completion_params(
-            model=model_obj.provider_resource_id,
-            messages=messages,
-            frequency_penalty=frequency_penalty,
-            function_call=function_call,
-            functions=functions,
-            logit_bias=logit_bias,
-            logprobs=logprobs,
-            max_completion_tokens=max_completion_tokens,
-            max_tokens=max_tokens,
-            n=n,
-            parallel_tool_calls=parallel_tool_calls,
-            presence_penalty=presence_penalty,
-            response_format=response_format,
-            seed=seed,
-            stop=stop,
-            stream=stream,
-            stream_options=stream_options,
-            temperature=temperature,
-            tool_choice=tool_choice,
-            tools=tools,
-            top_logprobs=top_logprobs,
-            top_p=top_p,
-            user=user,
-        )
+        # Update model with provider resource ID
+        params.model = model_obj.provider_resource_id

-        return await client.inference.openai_chat_completion(**params)
+        # Convert Pydantic model to dict, including extra fields
+        request_params = params.model_dump(exclude_none=True)
+
+        return await client.inference.openai_chat_completion(**request_params)

    def cast_value_to_json_dict(self, request_params: dict[str, Any]) -> dict[str, Any]:
        json_params = {}