mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-12-11 19:56:03 +00:00
40 lines
No EOL
1.2 KiB
Python
40 lines
No EOL
1.2 KiB
Python
from llama_stack_client import LlamaStackClient
|
|
from llama_stack_client import Agent, AgentEventLogger
|
|
from rich.pretty import pprint
|
|
import uuid
|
|
|
|
client = LlamaStackClient(base_url=f"http://localhost:8321",
|
|
default_headers={
|
|
"X-Telemetry-Service": "llama-stack-agent",
|
|
"X-Telemetry-Version": "1.0.0",
|
|
})
|
|
|
|
models = client.models.list()
|
|
llm = next(m for m in models if m.model_type == "llm" and m.provider_id == "vllm")
|
|
model_id = llm.identifier
|
|
|
|
agent = Agent(client, model=model_id, instructions="You are a helpful assistant.")
|
|
|
|
s_id = agent.create_session(session_name=f"s{uuid.uuid4().hex}")
|
|
|
|
print("Non-streaming ...")
|
|
response = agent.create_turn(
|
|
messages=[{"role": "user", "content": "Who are you?"}],
|
|
session_id=s_id,
|
|
stream=False,
|
|
)
|
|
print("agent>", response.output_message.content)
|
|
|
|
print("Streaming ...")
|
|
stream = agent.create_turn(
|
|
messages=[{"role": "user", "content": "Who are you?"}], session_id=s_id, stream=True
|
|
)
|
|
for event in stream:
|
|
pprint(event)
|
|
|
|
print("Streaming with print helper...")
|
|
stream = agent.create_turn(
|
|
messages=[{"role": "user", "content": "Who are you?"}], session_id=s_id, stream=True
|
|
)
|
|
for event in AgentEventLogger().log(stream):
|
|
event.print() |