Merge f229c433fe into sapling-pr-archive-ehhuang

This commit is contained in:
ehhuang 2025-10-08 13:29:39 -07:00 committed by GitHub
commit 3e9dd56af8
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
4 changed files with 5 additions and 0 deletions

View file

@ -173,5 +173,6 @@ class MetaReferenceInferenceImpl(
top_logprobs: int | None = None, top_logprobs: int | None = None,
top_p: float | None = None, top_p: float | None = None,
user: str | None = None, user: str | None = None,
**kwargs: Any,
) -> OpenAIChatCompletion | AsyncIterator[OpenAIChatCompletionChunk]: ) -> OpenAIChatCompletion | AsyncIterator[OpenAIChatCompletionChunk]:
raise NotImplementedError("OpenAI chat completion not supported by meta-reference inference provider") raise NotImplementedError("OpenAI chat completion not supported by meta-reference inference provider")

View file

@ -124,5 +124,6 @@ class SentenceTransformersInferenceImpl(
top_logprobs: int | None = None, top_logprobs: int | None = None,
top_p: float | None = None, top_p: float | None = None,
user: str | None = None, user: str | None = None,
**kwargs: Any,
) -> OpenAIChatCompletion | AsyncIterator[OpenAIChatCompletionChunk]: ) -> OpenAIChatCompletion | AsyncIterator[OpenAIChatCompletionChunk]:
raise NotImplementedError("OpenAI chat completion not supported by sentence transformers provider") raise NotImplementedError("OpenAI chat completion not supported by sentence transformers provider")

View file

@ -186,5 +186,6 @@ class BedrockInferenceAdapter(
top_logprobs: int | None = None, top_logprobs: int | None = None,
top_p: float | None = None, top_p: float | None = None,
user: str | None = None, user: str | None = None,
**kwargs: Any,
) -> OpenAIChatCompletion | AsyncIterator[OpenAIChatCompletionChunk]: ) -> OpenAIChatCompletion | AsyncIterator[OpenAIChatCompletionChunk]:
raise NotImplementedError("OpenAI chat completion not supported by the Bedrock provider") raise NotImplementedError("OpenAI chat completion not supported by the Bedrock provider")

View file

@ -299,6 +299,7 @@ class LiteLLMOpenAIMixin(
top_logprobs: int | None = None, top_logprobs: int | None = None,
top_p: float | None = None, top_p: float | None = None,
user: str | None = None, user: str | None = None,
**kwargs: Any,
) -> OpenAIChatCompletion | AsyncIterator[OpenAIChatCompletionChunk]: ) -> OpenAIChatCompletion | AsyncIterator[OpenAIChatCompletionChunk]:
# Add usage tracking for streaming when telemetry is active # Add usage tracking for streaming when telemetry is active
from llama_stack.providers.utils.telemetry.tracing import get_current_span from llama_stack.providers.utils.telemetry.tracing import get_current_span
@ -335,6 +336,7 @@ class LiteLLMOpenAIMixin(
user=user, user=user,
api_key=self.get_api_key(), api_key=self.get_api_key(),
api_base=self.api_base, api_base=self.api_base,
**kwargs,
) )
return await litellm.acompletion(**params) return await litellm.acompletion(**params)