featu: support passing "extra body" throught to providers

# What does this PR do? Allows passing through extra_body parameters to inference providers. closes #2720 ## Test Plan CI and added new test
2025-10-11 21:48:36 +00:00 · 2025-10-10 14:05:16 -07:00 · 2025-10-10 14:05:16 -07:00 · d7b57a8dd2
commit d7b57a8dd2
parent cb7fb0705b
4 changed files with 107 additions and 20 deletions
--- a/llama_stack/apis/inference/inference.py
+++ b/llama_stack/apis/inference/inference.py
@ -1058,8 +1058,6 @@ class OpenAICompletionRequest(BaseModel):
    :param top_p: (Optional) The top p to use.
    :param user: (Optional) The user to use.
    :param suffix: (Optional) The suffix that should be appended to the completion.
-    :param guided_choice: (Optional) vLLM-specific parameter for guided generation with a list of choices.
-    :param prompt_logprobs: (Optional) vLLM-specific parameter for number of log probabilities to return for prompt tokens.
    """

    model_config = ConfigDict(extra="allow")
@ -1082,12 +1080,6 @@ class OpenAICompletionRequest(BaseModel):
    temperature: float | None = None
    top_p: float | None = None
    user: str | None = None
-
-    # vLLM-specific parameters (documented here but also allowed via extra fields)
-    guided_choice: list[str] | None = None
-    prompt_logprobs: int | None = None
-
-    # for fill-in-the-middle type completion
    suffix: str | None = None