From 47a51289ca2bc91dd4af7a9838667f88bef76bd0 Mon Sep 17 00:00:00 2001 From: Xi Yan Date: Thu, 19 Sep 2024 08:56:52 -0700 Subject: [PATCH] update router client --- llama_stack/apis/inference/client.py | 11 +++++++++++ .../providers/adapters/inference/ollama/ollama.py | 7 +++++++ 2 files changed, 18 insertions(+) diff --git a/llama_stack/apis/inference/client.py b/llama_stack/apis/inference/client.py index f5321c628..600a1856c 100644 --- a/llama_stack/apis/inference/client.py +++ b/llama_stack/apis/inference/client.py @@ -98,6 +98,17 @@ async def run_main(host: str, port: int, stream: bool): async for log in EventLogger().log(iterator): log.print() + print("Testing a different model provider") + iterator = client.chat_completion( + ChatCompletionRequest( + model="ollama-1", + messages=[message], + stream=stream, + ) + ) + async for log in EventLogger().log(iterator): + log.print() + def main(host: str, port: int, stream: bool = True): asyncio.run(run_main(host, port, stream)) diff --git a/llama_stack/providers/adapters/inference/ollama/ollama.py b/llama_stack/providers/adapters/inference/ollama/ollama.py index bc7780c9d..2dd261f81 100644 --- a/llama_stack/providers/adapters/inference/ollama/ollama.py +++ b/llama_stack/providers/adapters/inference/ollama/ollama.py @@ -105,6 +105,13 @@ class OllamaInferenceAdapter(Inference): delta="", ) ) + + yield ChatCompletionResponseStreamChunk( + event=ChatCompletionResponseEvent( + event_type=ChatCompletionResponseEventType.progress, + delta="model={}, url={}".format(model, self.url), + ) + ) # request = ChatCompletionRequest( # model=model, # messages=messages,