fix(mypy): resolve OpenAI SDK and provider type issues (#3936)

## Summary
- Fix OpenAI SDK NotGiven/Omit type mismatches in embeddings calls
- Fix incorrect OpenAIChatCompletionChunk import in vllm provider
- Refactor to avoid type:ignore comments by using conditional kwargs

## Changes
**openai_mixin.py (9 errors fixed):**
- Build kwargs conditionally for embeddings.create() to avoid
NotGiven/Omit mismatch
- Only include parameters when they have actual values (not None)

**gemini.py (9 errors fixed):**
- Apply same conditional kwargs pattern
- Add missing Any import

**vllm.py (2 errors fixed):**
- Use correct OpenAIChatCompletionChunk from llama_stack.apis.inference
- Remove incorrect alias from openai package

## Technical Notes
The OpenAI SDK has a type system quirk where `NOT_GIVEN` has type
`NotGiven` but parameter signatures expect `Omit`. By only passing
parameters with actual values, we avoid this mismatch entirely without
needing `# type: ignore` comments.

🤖 Generated with [Claude Code](https://claude.com/claude-code)

---------

Co-authored-by: Claude <noreply@anthropic.com>
This commit is contained in:
Ashwin Bharambe 2025-10-28 10:54:29 -07:00 committed by GitHub
parent d009dc29f7
commit 1d385b5b75
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
7 changed files with 60 additions and 41 deletions

View file

@ -4,7 +4,7 @@
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.
from openai import NOT_GIVEN
from typing import Any
from llama_stack.apis.inference import (
OpenAIEmbeddingData,
@ -37,21 +37,20 @@ class GeminiInferenceAdapter(OpenAIMixin):
Override embeddings method to handle Gemini's missing usage statistics.
Gemini's embedding API doesn't return usage information, so we provide default values.
"""
# Prepare request parameters
request_params = {
# Build request params conditionally to avoid NotGiven/Omit type mismatch
request_params: dict[str, Any] = {
"model": await self._get_provider_model_id(params.model),
"input": params.input,
"encoding_format": params.encoding_format if params.encoding_format is not None else NOT_GIVEN,
"dimensions": params.dimensions if params.dimensions is not None else NOT_GIVEN,
"user": params.user if params.user is not None else NOT_GIVEN,
}
if params.encoding_format is not None:
request_params["encoding_format"] = params.encoding_format
if params.dimensions is not None:
request_params["dimensions"] = params.dimensions
if params.user is not None:
request_params["user"] = params.user
if params.model_extra:
request_params["extra_body"] = params.model_extra
# Add extra_body if present
extra_body = params.model_extra
if extra_body:
request_params["extra_body"] = extra_body
# Call OpenAI embeddings API with properly typed parameters
response = await self.client.embeddings.create(**request_params)
data = []

View file

@ -7,13 +7,11 @@ from collections.abc import AsyncIterator
from urllib.parse import urljoin
import httpx
from openai.types.chat.chat_completion_chunk import (
ChatCompletionChunk as OpenAIChatCompletionChunk,
)
from pydantic import ConfigDict
from llama_stack.apis.inference import (
OpenAIChatCompletion,
OpenAIChatCompletionChunk,
OpenAIChatCompletionRequestWithExtraBody,
ToolChoice,
)