mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-12-17 10:59:48 +00:00
docs: make inference model configurable (#4385)
Allow users to specify the inference model through the INFERENCE_MODEL environment variable instead of hardcoding it, with fallback to ollama/llama3.2:3b if not set. Signed-off-by: Costa Shulyupin <costa.shul@redhat.com> Signed-off-by: Costa Shulyupin <costa.shul@redhat.com>
This commit is contained in:
parent
62f7818051
commit
2b85600a7e
1 changed files with 3 additions and 2 deletions
|
|
@ -16,6 +16,7 @@ Run this script after starting a Llama Stack server:
|
||||||
"""
|
"""
|
||||||
|
|
||||||
import io
|
import io
|
||||||
|
import os
|
||||||
|
|
||||||
import requests
|
import requests
|
||||||
from openai import OpenAI
|
from openai import OpenAI
|
||||||
|
|
@ -53,7 +54,7 @@ print("=" * 80)
|
||||||
print(f"Query: {query}\n")
|
print(f"Query: {query}\n")
|
||||||
|
|
||||||
resp = client.responses.create(
|
resp = client.responses.create(
|
||||||
model="ollama/llama3.2:3b", # feel free to change this to any other model
|
model=os.getenv("INFERENCE_MODEL", "ollama/llama3.2:3b"),
|
||||||
input=query,
|
input=query,
|
||||||
tools=[{"type": "file_search", "vector_store_ids": [vs.id]}],
|
tools=[{"type": "file_search", "vector_store_ids": [vs.id]}],
|
||||||
include=["file_search_call.results"],
|
include=["file_search_call.results"],
|
||||||
|
|
@ -93,7 +94,7 @@ print(f"Found {len(context_chunks)} relevant chunks\n")
|
||||||
# Step 3: Use Chat Completions with retrieved context
|
# Step 3: Use Chat Completions with retrieved context
|
||||||
print("Generating response with chat completions...")
|
print("Generating response with chat completions...")
|
||||||
completion = client.chat.completions.create(
|
completion = client.chat.completions.create(
|
||||||
model="ollama/llama3.2:3b", # Feel free to change this to any other model
|
model=os.getenv("INFERENCE_MODEL", "ollama/llama3.2:3b"),
|
||||||
messages=[
|
messages=[
|
||||||
{
|
{
|
||||||
"role": "system",
|
"role": "system",
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue