diff --git a/llama_stack/providers/inline/inference/meta_reference/inference.py b/llama_stack/providers/inline/inference/meta_reference/inference.py index fd65fa10d..c23794eb4 100644 --- a/llama_stack/providers/inline/inference/meta_reference/inference.py +++ b/llama_stack/providers/inline/inference/meta_reference/inference.py @@ -173,5 +173,6 @@ class MetaReferenceInferenceImpl( top_logprobs: int | None = None, top_p: float | None = None, user: str | None = None, + **kwargs: Any, ) -> OpenAIChatCompletion | AsyncIterator[OpenAIChatCompletionChunk]: raise NotImplementedError("OpenAI chat completion not supported by meta-reference inference provider") diff --git a/llama_stack/providers/inline/inference/sentence_transformers/sentence_transformers.py b/llama_stack/providers/inline/inference/sentence_transformers/sentence_transformers.py index b984d97bf..7aa880de3 100644 --- a/llama_stack/providers/inline/inference/sentence_transformers/sentence_transformers.py +++ b/llama_stack/providers/inline/inference/sentence_transformers/sentence_transformers.py @@ -124,5 +124,6 @@ class SentenceTransformersInferenceImpl( top_logprobs: int | None = None, top_p: float | None = None, user: str | None = None, + **kwargs: Any, ) -> OpenAIChatCompletion | AsyncIterator[OpenAIChatCompletionChunk]: raise NotImplementedError("OpenAI chat completion not supported by sentence transformers provider") diff --git a/llama_stack/providers/remote/inference/bedrock/bedrock.py b/llama_stack/providers/remote/inference/bedrock/bedrock.py index 9c8a74b47..ee354aaf3 100644 --- a/llama_stack/providers/remote/inference/bedrock/bedrock.py +++ b/llama_stack/providers/remote/inference/bedrock/bedrock.py @@ -186,5 +186,6 @@ class BedrockInferenceAdapter( top_logprobs: int | None = None, top_p: float | None = None, user: str | None = None, + **kwargs: Any, ) -> OpenAIChatCompletion | AsyncIterator[OpenAIChatCompletionChunk]: raise NotImplementedError("OpenAI chat completion not supported by the Bedrock provider") diff --git a/llama_stack/providers/utils/inference/litellm_openai_mixin.py b/llama_stack/providers/utils/inference/litellm_openai_mixin.py index 6bef97dd5..68373ada9 100644 --- a/llama_stack/providers/utils/inference/litellm_openai_mixin.py +++ b/llama_stack/providers/utils/inference/litellm_openai_mixin.py @@ -299,6 +299,7 @@ class LiteLLMOpenAIMixin( top_logprobs: int | None = None, top_p: float | None = None, user: str | None = None, + **kwargs: Any, ) -> OpenAIChatCompletion | AsyncIterator[OpenAIChatCompletionChunk]: # Add usage tracking for streaming when telemetry is active from llama_stack.providers.utils.telemetry.tracing import get_current_span @@ -335,6 +336,7 @@ class LiteLLMOpenAIMixin( user=user, api_key=self.get_api_key(), api_base=self.api_base, + **kwargs, ) return await litellm.acompletion(**params)