updated with vllm based values

rh-pre-commit.version: 2.3.2 rh-pre-commit.check-secrets: ENABLED
2025-12-12 04:00:42 +00:00 · 2025-10-22 18:20:32 +08:00 · 2025-10-22 18:20:32 +08:00 · 17e74251e2
commit 17e74251e2
parent a701f68bd7
11 changed files with 551 additions and 102 deletions
--- a/examples/inference.py
+++ b/examples/inference.py
@ -0,0 +1,26 @@
+from llama_stack_client import LlamaStackClient
+
+client = LlamaStackClient(base_url="http://localhost:8321",
+    default_headers={
+        "X-Telemetry-Service": "llama-stack-inference",
+        "X-Telemetry-Version": "1.0.0",
+    }
+    )
+
+# List available models
+models = client.models.list()
+
+# Select the first LLM
+llm = next(m for m in models if m.model_type == "llm" and m.provider_id == "vllm")
+model_id = llm.identifier
+
+print("Model:", model_id)
+
+response = client.chat.completions.create(
+    model=model_id,
+    messages=[
+        {"role": "system", "content": "You are a helpful assistant."},
+        {"role": "user", "content": "Write a haiku about coding"},
+    ],
+)
+print(response)