models routing work

This commit is contained in:
Xi Yan 2024-09-19 08:48:10 -07:00
parent f3ff3a3001
commit 9bdd4e3dd9
3 changed files with 20 additions and 3 deletions

View file

@ -98,7 +98,13 @@ class OllamaInferenceAdapter(Inference):
stream: Optional[bool] = False,
logprobs: Optional[LogProbConfig] = None,
) -> AsyncGenerator:
cprint("!! calling remote ollama !!", "red")
cprint("!! calling remote ollama {}, url={}!!".format(model, self.url), "red")
yield ChatCompletionResponseStreamChunk(
event=ChatCompletionResponseEvent(
event_type=ChatCompletionResponseEventType.start,
delta="",
)
)
# request = ChatCompletionRequest(
# model=model,
# messages=messages,