mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-08-15 14:08:00 +00:00
Merge branch 'main' into vector-store-chunks
This commit is contained in:
commit
d764e35b2d
2 changed files with 4 additions and 5 deletions
|
@ -158,9 +158,8 @@ class LiteLLMOpenAIMixin(
|
||||||
params["model"] = self.get_litellm_model_name(params["model"])
|
params["model"] = self.get_litellm_model_name(params["model"])
|
||||||
|
|
||||||
logger.debug(f"params to litellm (openai compat): {params}")
|
logger.debug(f"params to litellm (openai compat): {params}")
|
||||||
# unfortunately, we need to use synchronous litellm.completion here because litellm
|
# see https://docs.litellm.ai/docs/completion/stream#async-completion
|
||||||
# caches various httpx.client objects in a non-eventloop aware manner
|
response = await litellm.acompletion(**params)
|
||||||
response = litellm.completion(**params)
|
|
||||||
if stream:
|
if stream:
|
||||||
return self._stream_chat_completion(response)
|
return self._stream_chat_completion(response)
|
||||||
else:
|
else:
|
||||||
|
@ -170,7 +169,7 @@ class LiteLLMOpenAIMixin(
|
||||||
self, response: litellm.ModelResponse
|
self, response: litellm.ModelResponse
|
||||||
) -> AsyncIterator[ChatCompletionResponseStreamChunk]:
|
) -> AsyncIterator[ChatCompletionResponseStreamChunk]:
|
||||||
async def _stream_generator():
|
async def _stream_generator():
|
||||||
for chunk in response:
|
async for chunk in response:
|
||||||
yield chunk
|
yield chunk
|
||||||
|
|
||||||
async for chunk in convert_openai_chat_completion_stream(
|
async for chunk in convert_openai_chat_completion_stream(
|
||||||
|
|
|
@ -78,7 +78,7 @@
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"role": "user",
|
"role": "user",
|
||||||
"content": "What's the weather like in San Francisco?"
|
"content": "What's the weather like in San Francisco, CA?"
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"tools": [
|
"tools": [
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue