mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-10-04 04:04:14 +00:00
implement full-passthrough in the server
This commit is contained in:
parent
38fd76f85c
commit
9dafa6ad94
8 changed files with 69 additions and 71 deletions
|
@ -75,7 +75,7 @@ class InferenceClient(Inference):
|
|||
async def run_main(host: str, port: int, stream: bool):
|
||||
client = InferenceClient(f"http://{host}:{port}")
|
||||
|
||||
message = UserMessage(content="hello world, help me out here")
|
||||
message = UserMessage(content="hello world, troll me in two-paragraphs about 42")
|
||||
cprint(f"User>{message.content}", "green")
|
||||
iterator = client.chat_completion(
|
||||
ChatCompletionRequest(
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue