featu: support passing "extra body" throught to providers

# What does this PR do? Allows passing through extra_body parameters to inference providers. closes #2720 ## Test Plan CI and added new test
2025-10-12 21:58:38 +00:00 · 2025-10-10 15:05:22 -07:00 · 2025-10-10 15:05:22 -07:00 · dbaaeea255
commit dbaaeea255
parent cb7fb0705b
21 changed files with 1799 additions and 96 deletions
--- a/llama_stack/providers/remote/inference/llama_openai_compat/llama.py
+++ b/llama_stack/providers/remote/inference/llama_openai_compat/llama.py
@ -3,6 +3,8 @@
 #
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
+from typing import Any
+
 from llama_stack.apis.inference.inference import OpenAICompletion, OpenAICompletionRequest, OpenAIEmbeddingsResponse
 from llama_stack.log import get_logger
 from llama_stack.providers.remote.inference.llama_openai_compat.config import LlamaCompatConfig
@ -30,6 +32,7 @@ class LlamaCompatInferenceAdapter(OpenAIMixin):
    async def openai_completion(
        self,
        params: OpenAICompletionRequest,
+        **kwargs: Any,
    ) -> OpenAICompletion:
        raise NotImplementedError()

@ -40,5 +43,6 @@ class LlamaCompatInferenceAdapter(OpenAIMixin):
        encoding_format: str | None = "float",
        dimensions: int | None = None,
        user: str | None = None,
+        **kwargs: Any,
    ) -> OpenAIEmbeddingsResponse:
        raise NotImplementedError()