featu: support passing "extra body" throught to providers

# What does this PR do?
Allows passing through extra_body parameters to inference providers.


closes #2720

## Test Plan
CI and added new test
This commit is contained in:
Eric Huang 2025-10-10 14:36:51 -07:00
parent cb7fb0705b
commit aa34b11b50
16 changed files with 1670 additions and 92 deletions

View file

@ -246,7 +246,8 @@ class InferenceRouter(Inference):
provider = await self.routing_table.get_provider_impl(model_obj.identifier)
if params.stream:
response_stream = await provider.openai_chat_completion(params)
extra_body = dict(params.__pydantic_extra__ or {})
response_stream = await provider.openai_chat_completion(params, **extra_body)
# For streaming, the provider returns AsyncIterator[OpenAIChatCompletionChunk]
# We need to add metrics to each chunk and store the final completion
@ -319,7 +320,8 @@ class InferenceRouter(Inference):
async def _nonstream_openai_chat_completion(
self, provider: Inference, params: OpenAIChatCompletionRequest
) -> OpenAIChatCompletion:
response = await provider.openai_chat_completion(params)
extra_body = dict(params.__pydantic_extra__ or {})
response = await provider.openai_chat_completion(params, **extra_body)
for choice in response.choices:
# some providers return an empty list for no tool calls in non-streaming responses
# but the OpenAI API returns None. So, set tool_calls to None if it's empty