updated with vllm based values

rh-pre-commit.version: 2.3.2
rh-pre-commit.check-secrets: ENABLED
This commit is contained in:
Antony Sallas 2025-10-22 18:20:32 +08:00
parent a701f68bd7
commit 17e74251e2
11 changed files with 551 additions and 102 deletions

26
examples/inference.py Normal file
View file

@ -0,0 +1,26 @@
from llama_stack_client import LlamaStackClient
client = LlamaStackClient(base_url="http://localhost:8321",
default_headers={
"X-Telemetry-Service": "llama-stack-inference",
"X-Telemetry-Version": "1.0.0",
}
)
# List available models
models = client.models.list()
# Select the first LLM
llm = next(m for m in models if m.model_type == "llm" and m.provider_id == "vllm")
model_id = llm.identifier
print("Model:", model_id)
response = client.chat.completions.create(
model=model_id,
messages=[
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": "Write a haiku about coding"},
],
)
print(response)