mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-10-10 21:34:36 +00:00
test
# What does this PR do? ## Test Plan # What does this PR do? ## Test Plan # What does this PR do? ## Test Plan Completes the refactoring started in previous commit by: 1. **Fix library client** (critical): Add logic to detect Pydantic model parameters and construct them properly from request bodies. The key fix is to NOT exclude any params when converting the body for Pydantic models - we need all fields to pass to the Pydantic constructor. Before: _convert_body excluded all params, leaving body empty for Pydantic construction After: Check for Pydantic params first, skip exclusion, construct model with full body 2. **Update remaining providers** to use new Pydantic-based signatures: - litellm_openai_mixin: Extract extra fields via __pydantic_extra__ - databricks: Use TYPE_CHECKING import for params type - llama_openai_compat: Use TYPE_CHECKING import for params type - sentence_transformers: Update method signatures to use params 3. **Update unit tests** to use new Pydantic signature: - test_openai_mixin.py: Use OpenAIChatCompletionRequestParams This fixes test failures where the library client was trying to construct Pydantic models with empty dictionaries. The previous fix had a bug: it called _convert_body() which only keeps fields that match function parameter names. For Pydantic methods with signature: openai_chat_completion(params: OpenAIChatCompletionRequestParams) The signature only has 'params', but the body has 'model', 'messages', etc. So _convert_body() returned an empty dict. Fix: Skip _convert_body() entirely for Pydantic params. Use the raw body directly to construct the Pydantic model (after stripping NOT_GIVENs). This properly fixes the ValidationError where required fields were missing. The streaming code path (_call_streaming) had the same issue as non-streaming: it called _convert_body() which returned empty dict for Pydantic params. Applied the same fix as commit 7476c0ae: - Detect Pydantic model parameters before body conversion - Skip _convert_body() for Pydantic params - Construct Pydantic model directly from raw body (after stripping NOT_GIVENs) This fixes streaming endpoints like openai_chat_completion with stream=True. The streaming code path (_call_streaming) had the same issue as non-streaming: it called _convert_body() which returned empty dict for Pydantic params. Applied the same fix as commit 7476c0ae: - Detect Pydantic model parameters before body conversion - Skip _convert_body() for Pydantic params - Construct Pydantic model directly from raw body (after stripping NOT_GIVENs) This fixes streaming endpoints like openai_chat_completion with stream=True.
This commit is contained in:
parent
26fd5dbd34
commit
a93130e323
295 changed files with 51966 additions and 3051 deletions
|
@ -13,15 +13,14 @@ from llama_stack.apis.inference import (
|
|||
Inference,
|
||||
OpenAIChatCompletion,
|
||||
OpenAIChatCompletionChunk,
|
||||
OpenAIChatCompletionRequestParams,
|
||||
OpenAICompletion,
|
||||
OpenAICompletionRequestParams,
|
||||
OpenAIEmbeddingsResponse,
|
||||
OpenAIMessageParam,
|
||||
OpenAIResponseFormatParam,
|
||||
)
|
||||
from llama_stack.apis.models import Model
|
||||
from llama_stack.core.library_client import convert_pydantic_to_json_value
|
||||
from llama_stack.providers.utils.inference.model_registry import ModelRegistryHelper
|
||||
from llama_stack.providers.utils.inference.openai_compat import prepare_openai_completion_params
|
||||
|
||||
from .config import PassthroughImplConfig
|
||||
|
||||
|
@ -80,110 +79,33 @@ class PassthroughInferenceAdapter(Inference):
|
|||
|
||||
async def openai_completion(
|
||||
self,
|
||||
model: str,
|
||||
prompt: str | list[str] | list[int] | list[list[int]],
|
||||
best_of: int | None = None,
|
||||
echo: bool | None = None,
|
||||
frequency_penalty: float | None = None,
|
||||
logit_bias: dict[str, float] | None = None,
|
||||
logprobs: bool | None = None,
|
||||
max_tokens: int | None = None,
|
||||
n: int | None = None,
|
||||
presence_penalty: float | None = None,
|
||||
seed: int | None = None,
|
||||
stop: str | list[str] | None = None,
|
||||
stream: bool | None = None,
|
||||
stream_options: dict[str, Any] | None = None,
|
||||
temperature: float | None = None,
|
||||
top_p: float | None = None,
|
||||
user: str | None = None,
|
||||
guided_choice: list[str] | None = None,
|
||||
prompt_logprobs: int | None = None,
|
||||
suffix: str | None = None,
|
||||
params: OpenAICompletionRequestParams,
|
||||
) -> OpenAICompletion:
|
||||
client = self._get_client()
|
||||
model_obj = await self.model_store.get_model(model)
|
||||
model_obj = await self.model_store.get_model(params.model)
|
||||
|
||||
params = await prepare_openai_completion_params(
|
||||
model=model_obj.provider_resource_id,
|
||||
prompt=prompt,
|
||||
best_of=best_of,
|
||||
echo=echo,
|
||||
frequency_penalty=frequency_penalty,
|
||||
logit_bias=logit_bias,
|
||||
logprobs=logprobs,
|
||||
max_tokens=max_tokens,
|
||||
n=n,
|
||||
presence_penalty=presence_penalty,
|
||||
seed=seed,
|
||||
stop=stop,
|
||||
stream=stream,
|
||||
stream_options=stream_options,
|
||||
temperature=temperature,
|
||||
top_p=top_p,
|
||||
user=user,
|
||||
guided_choice=guided_choice,
|
||||
prompt_logprobs=prompt_logprobs,
|
||||
)
|
||||
# Update model with provider resource ID
|
||||
params.model = model_obj.provider_resource_id
|
||||
|
||||
return await client.inference.openai_completion(**params)
|
||||
# Convert Pydantic model to dict, including extra fields
|
||||
request_params = params.model_dump(exclude_none=True)
|
||||
|
||||
return await client.inference.openai_completion(**request_params)
|
||||
|
||||
async def openai_chat_completion(
|
||||
self,
|
||||
model: str,
|
||||
messages: list[OpenAIMessageParam],
|
||||
frequency_penalty: float | None = None,
|
||||
function_call: str | dict[str, Any] | None = None,
|
||||
functions: list[dict[str, Any]] | None = None,
|
||||
logit_bias: dict[str, float] | None = None,
|
||||
logprobs: bool | None = None,
|
||||
max_completion_tokens: int | None = None,
|
||||
max_tokens: int | None = None,
|
||||
n: int | None = None,
|
||||
parallel_tool_calls: bool | None = None,
|
||||
presence_penalty: float | None = None,
|
||||
response_format: OpenAIResponseFormatParam | None = None,
|
||||
seed: int | None = None,
|
||||
stop: str | list[str] | None = None,
|
||||
stream: bool | None = None,
|
||||
stream_options: dict[str, Any] | None = None,
|
||||
temperature: float | None = None,
|
||||
tool_choice: str | dict[str, Any] | None = None,
|
||||
tools: list[dict[str, Any]] | None = None,
|
||||
top_logprobs: int | None = None,
|
||||
top_p: float | None = None,
|
||||
user: str | None = None,
|
||||
params: OpenAIChatCompletionRequestParams,
|
||||
) -> OpenAIChatCompletion | AsyncIterator[OpenAIChatCompletionChunk]:
|
||||
client = self._get_client()
|
||||
model_obj = await self.model_store.get_model(model)
|
||||
model_obj = await self.model_store.get_model(params.model)
|
||||
|
||||
params = await prepare_openai_completion_params(
|
||||
model=model_obj.provider_resource_id,
|
||||
messages=messages,
|
||||
frequency_penalty=frequency_penalty,
|
||||
function_call=function_call,
|
||||
functions=functions,
|
||||
logit_bias=logit_bias,
|
||||
logprobs=logprobs,
|
||||
max_completion_tokens=max_completion_tokens,
|
||||
max_tokens=max_tokens,
|
||||
n=n,
|
||||
parallel_tool_calls=parallel_tool_calls,
|
||||
presence_penalty=presence_penalty,
|
||||
response_format=response_format,
|
||||
seed=seed,
|
||||
stop=stop,
|
||||
stream=stream,
|
||||
stream_options=stream_options,
|
||||
temperature=temperature,
|
||||
tool_choice=tool_choice,
|
||||
tools=tools,
|
||||
top_logprobs=top_logprobs,
|
||||
top_p=top_p,
|
||||
user=user,
|
||||
)
|
||||
# Update model with provider resource ID
|
||||
params.model = model_obj.provider_resource_id
|
||||
|
||||
return await client.inference.openai_chat_completion(**params)
|
||||
# Convert Pydantic model to dict, including extra fields
|
||||
request_params = params.model_dump(exclude_none=True)
|
||||
|
||||
return await client.inference.openai_chat_completion(**request_params)
|
||||
|
||||
def cast_value_to_json_dict(self, request_params: dict[str, Any]) -> dict[str, Any]:
|
||||
json_params = {}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue