featu: support passing "extra body" throught to providers

# What does this PR do? Allows passing through extra_body parameters to inference providers. closes #2720 ## Test Plan CI and added new test
2025-10-12 13:57:57 +00:00 · 2025-10-10 16:10:35 -07:00 · 2025-10-10 16:10:35 -07:00 · 579aa96b2c
commit 579aa96b2c
parent 80d58ab519
42 changed files with 3147 additions and 202 deletions
--- a/llama_stack/providers/inline/batches/reference/batches.py
+++ b/llama_stack/providers/inline/batches/reference/batches.py
@ -22,8 +22,8 @@ from llama_stack.apis.files import Files, OpenAIFilePurpose
 from llama_stack.apis.inference import (
    Inference,
    OpenAIAssistantMessageParam,
-    OpenAIChatCompletionRequest,
-    OpenAICompletionRequest,
+    OpenAIChatCompletionRequestWithExtraBody,
+    OpenAICompletionRequestWithExtraBody,
    OpenAIDeveloperMessageParam,
    OpenAIMessageParam,
    OpenAISystemMessageParam,
@ -608,7 +608,7 @@ class ReferenceBatchesImpl(Batches):
            # TODO(SECURITY): review body for security issues
            if request.url == "/v1/chat/completions":
                request.body["messages"] = [convert_to_openai_message_param(msg) for msg in request.body["messages"]]
-                chat_params = OpenAIChatCompletionRequest(**request.body)
+                chat_params = OpenAIChatCompletionRequestWithExtraBody(**request.body)
                chat_response = await self.inference_api.openai_chat_completion(chat_params)

                # this is for mypy, we don't allow streaming so we'll get the right type
@ -623,7 +623,7 @@ class ReferenceBatchesImpl(Batches):
                    },
                }
            elif request.url == "/v1/completions":
-                completion_params = OpenAICompletionRequest(**request.body)
+                completion_params = OpenAICompletionRequestWithExtraBody(**request.body)
                completion_response = await self.inference_api.openai_completion(completion_params)

                # this is for mypy, we don't allow streaming so we'll get the right type