featu: support passing "extra body" throught to providers

# What does this PR do? Allows passing through extra_body parameters to inference providers. closes #2720 ## Test Plan CI and added new test
2025-10-12 13:57:57 +00:00 · 2025-10-10 15:46:56 -07:00 · 2025-10-10 15:46:56 -07:00 · 10c7e67fca
commit 10c7e67fca
parent 80d58ab519
35 changed files with 1893 additions and 200 deletions
--- a/llama_stack/providers/remote/inference/vllm/vllm.py
+++ b/llama_stack/providers/remote/inference/vllm/vllm.py
@ -14,7 +14,7 @@ from pydantic import ConfigDict

 from llama_stack.apis.inference import (
    OpenAIChatCompletion,
-    OpenAIChatCompletionRequest,
+    OpenAIChatCompletionRequestWithExtraBody,
    ToolChoice,
 )
 from llama_stack.log import get_logger
@ -93,7 +93,7 @@ class VLLMInferenceAdapter(OpenAIMixin):

    async def openai_chat_completion(
        self,
-        params: OpenAIChatCompletionRequest,
+        params: OpenAIChatCompletionRequestWithExtraBody,
    ) -> OpenAIChatCompletion | AsyncIterator[OpenAIChatCompletionChunk]:
        params = params.model_copy()