mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-10-09 13:14:39 +00:00
Merge f229c433fe
into sapling-pr-archive-ehhuang
This commit is contained in:
commit
3e9dd56af8
4 changed files with 5 additions and 0 deletions
|
@ -173,5 +173,6 @@ class MetaReferenceInferenceImpl(
|
||||||
top_logprobs: int | None = None,
|
top_logprobs: int | None = None,
|
||||||
top_p: float | None = None,
|
top_p: float | None = None,
|
||||||
user: str | None = None,
|
user: str | None = None,
|
||||||
|
**kwargs: Any,
|
||||||
) -> OpenAIChatCompletion | AsyncIterator[OpenAIChatCompletionChunk]:
|
) -> OpenAIChatCompletion | AsyncIterator[OpenAIChatCompletionChunk]:
|
||||||
raise NotImplementedError("OpenAI chat completion not supported by meta-reference inference provider")
|
raise NotImplementedError("OpenAI chat completion not supported by meta-reference inference provider")
|
||||||
|
|
|
@ -124,5 +124,6 @@ class SentenceTransformersInferenceImpl(
|
||||||
top_logprobs: int | None = None,
|
top_logprobs: int | None = None,
|
||||||
top_p: float | None = None,
|
top_p: float | None = None,
|
||||||
user: str | None = None,
|
user: str | None = None,
|
||||||
|
**kwargs: Any,
|
||||||
) -> OpenAIChatCompletion | AsyncIterator[OpenAIChatCompletionChunk]:
|
) -> OpenAIChatCompletion | AsyncIterator[OpenAIChatCompletionChunk]:
|
||||||
raise NotImplementedError("OpenAI chat completion not supported by sentence transformers provider")
|
raise NotImplementedError("OpenAI chat completion not supported by sentence transformers provider")
|
||||||
|
|
|
@ -186,5 +186,6 @@ class BedrockInferenceAdapter(
|
||||||
top_logprobs: int | None = None,
|
top_logprobs: int | None = None,
|
||||||
top_p: float | None = None,
|
top_p: float | None = None,
|
||||||
user: str | None = None,
|
user: str | None = None,
|
||||||
|
**kwargs: Any,
|
||||||
) -> OpenAIChatCompletion | AsyncIterator[OpenAIChatCompletionChunk]:
|
) -> OpenAIChatCompletion | AsyncIterator[OpenAIChatCompletionChunk]:
|
||||||
raise NotImplementedError("OpenAI chat completion not supported by the Bedrock provider")
|
raise NotImplementedError("OpenAI chat completion not supported by the Bedrock provider")
|
||||||
|
|
|
@ -299,6 +299,7 @@ class LiteLLMOpenAIMixin(
|
||||||
top_logprobs: int | None = None,
|
top_logprobs: int | None = None,
|
||||||
top_p: float | None = None,
|
top_p: float | None = None,
|
||||||
user: str | None = None,
|
user: str | None = None,
|
||||||
|
**kwargs: Any,
|
||||||
) -> OpenAIChatCompletion | AsyncIterator[OpenAIChatCompletionChunk]:
|
) -> OpenAIChatCompletion | AsyncIterator[OpenAIChatCompletionChunk]:
|
||||||
# Add usage tracking for streaming when telemetry is active
|
# Add usage tracking for streaming when telemetry is active
|
||||||
from llama_stack.providers.utils.telemetry.tracing import get_current_span
|
from llama_stack.providers.utils.telemetry.tracing import get_current_span
|
||||||
|
@ -335,6 +336,7 @@ class LiteLLMOpenAIMixin(
|
||||||
user=user,
|
user=user,
|
||||||
api_key=self.get_api_key(),
|
api_key=self.get_api_key(),
|
||||||
api_base=self.api_base,
|
api_base=self.api_base,
|
||||||
|
**kwargs,
|
||||||
)
|
)
|
||||||
return await litellm.acompletion(**params)
|
return await litellm.acompletion(**params)
|
||||||
|
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue