fix(mypy): resolve OpenAI SDK and provider type issues (#3936)

## Summary - Fix OpenAI SDK NotGiven/Omit type mismatches in embeddings calls - Fix incorrect OpenAIChatCompletionChunk import in vllm provider - Refactor to avoid type:ignore comments by using conditional kwargs ## Changes **openai_mixin.py (9 errors fixed):** - Build kwargs conditionally for embeddings.create() to avoid NotGiven/Omit mismatch - Only include parameters when they have actual values (not None) **gemini.py (9 errors fixed):** - Apply same conditional kwargs pattern - Add missing Any import **vllm.py (2 errors fixed):** - Use correct OpenAIChatCompletionChunk from llama_stack.apis.inference - Remove incorrect alias from openai package ## Technical Notes The OpenAI SDK has a type system quirk where `NOT_GIVEN` has type `NotGiven` but parameter signatures expect `Omit`. By only passing parameters with actual values, we avoid this mismatch entirely without needing `# type: ignore` comments. 🤖 Generated with [Claude Code](https://claude.com/claude-code) --------- Co-authored-by: Claude <noreply@anthropic.com>
2025-12-03 09:53:45 +00:00 · 2025-10-28 10:54:29 -07:00 · 2025-10-28 10:54:29 -07:00 · 1d385b5b75
commit 1d385b5b75
parent d009dc29f7
7 changed files with 60 additions and 41 deletions
--- a/src/llama_stack/providers/remote/inference/gemini/gemini.py
+++ b/src/llama_stack/providers/remote/inference/gemini/gemini.py
@ -4,7 +4,7 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.

-from openai import NOT_GIVEN
+from typing import Any

 from llama_stack.apis.inference import (
    OpenAIEmbeddingData,
@ -37,21 +37,20 @@ class GeminiInferenceAdapter(OpenAIMixin):
        Override embeddings method to handle Gemini's missing usage statistics.
        Gemini's embedding API doesn't return usage information, so we provide default values.
        """
-        # Prepare request parameters
-        request_params = {
+        # Build request params conditionally to avoid NotGiven/Omit type mismatch
+        request_params: dict[str, Any] = {
            "model": await self._get_provider_model_id(params.model),
            "input": params.input,
-            "encoding_format": params.encoding_format if params.encoding_format is not None else NOT_GIVEN,
-            "dimensions": params.dimensions if params.dimensions is not None else NOT_GIVEN,
-            "user": params.user if params.user is not None else NOT_GIVEN,
        }
+        if params.encoding_format is not None:
+            request_params["encoding_format"] = params.encoding_format
+        if params.dimensions is not None:
+            request_params["dimensions"] = params.dimensions
+        if params.user is not None:
+            request_params["user"] = params.user
+        if params.model_extra:
+            request_params["extra_body"] = params.model_extra

-        # Add extra_body if present
-        extra_body = params.model_extra
-        if extra_body:
-            request_params["extra_body"] = extra_body
-
-        # Call OpenAI embeddings API with properly typed parameters
        response = await self.client.embeddings.create(**request_params)

        data = []
--- a/src/llama_stack/providers/remote/inference/vllm/vllm.py
+++ b/src/llama_stack/providers/remote/inference/vllm/vllm.py
@ -7,13 +7,11 @@ from collections.abc import AsyncIterator
 from urllib.parse import urljoin

 import httpx
-from openai.types.chat.chat_completion_chunk import (
-    ChatCompletionChunk as OpenAIChatCompletionChunk,
-)
 from pydantic import ConfigDict

 from llama_stack.apis.inference import (
    OpenAIChatCompletion,
+    OpenAIChatCompletionChunk,
    OpenAIChatCompletionRequestWithExtraBody,
    ToolChoice,
 )