mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-12-22 20:40:00 +00:00
chore: Adding demo script and importing it into the docs
Signed-off-by: Francisco Javier Arceo <farceo@redhat.com>
This commit is contained in:
parent
89c49eb003
commit
a38770edab
2 changed files with 64 additions and 57 deletions
62
docs/source/getting_started/demo_script.py
Normal file
62
docs/source/getting_started/demo_script.py
Normal file
|
|
@ -0,0 +1,62 @@
|
|||
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
# All rights reserved.
|
||||
#
|
||||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
||||
from llama_stack_client import Agent, AgentEventLogger, RAGDocument, LlamaStackClient
|
||||
|
||||
vector_db_id = "my_demo_vector_db"
|
||||
client = LlamaStackClient(base_url="http://localhost:8321")
|
||||
|
||||
models = client.models.list()
|
||||
|
||||
# Select the first LLM and first embedding models
|
||||
model_id = next(m for m in models if m.model_type == "llm").identifier
|
||||
embedding_model_id = (
|
||||
em := next(m for m in models if m.model_type == "embedding")
|
||||
).identifier
|
||||
embedding_dimension = em.metadata["embedding_dimension"]
|
||||
|
||||
_ = client.vector_dbs.register(
|
||||
vector_db_id=vector_db_id,
|
||||
embedding_model=embedding_model_id,
|
||||
embedding_dimension=embedding_dimension,
|
||||
provider_id="faiss",
|
||||
)
|
||||
source = "https://www.paulgraham.com/greatwork.html"
|
||||
print("rag_tool> Ingesting document:", source)
|
||||
document = RAGDocument(
|
||||
document_id="document_1",
|
||||
content=source,
|
||||
mime_type="text/html",
|
||||
metadata={},
|
||||
)
|
||||
client.tool_runtime.rag_tool.insert(
|
||||
documents=[document],
|
||||
vector_db_id=vector_db_id,
|
||||
chunk_size_in_tokens=50,
|
||||
)
|
||||
agent = Agent(
|
||||
client,
|
||||
model=model_id,
|
||||
instructions="You are a helpful assistant",
|
||||
tools=[
|
||||
{
|
||||
"name": "builtin::rag/knowledge_search",
|
||||
"args": {"vector_db_ids": [vector_db_id]},
|
||||
}
|
||||
],
|
||||
)
|
||||
|
||||
prompt = "How do you do great work?"
|
||||
print("prompt>", prompt)
|
||||
|
||||
response = agent.create_turn(
|
||||
messages=[{"role": "user", "content": prompt}],
|
||||
session_id=agent.create_session("rag_session"),
|
||||
stream=True,
|
||||
)
|
||||
|
||||
for log in AgentEventLogger().log(response):
|
||||
log.print()
|
||||
|
|
@ -24,63 +24,8 @@ ENABLE_OLLAMA=ollama OLLAMA_INFERENCE_MODEL=llama3.2:3b uv run --with llama-stac
|
|||
#### Step 3: Run the demo
|
||||
Now open up a new terminal and copy the following script into a file named `demo_script.py`.
|
||||
|
||||
```python
|
||||
from llama_stack_client import Agent, AgentEventLogger, RAGDocument, LlamaStackClient
|
||||
|
||||
vector_db_id = "my_demo_vector_db"
|
||||
client = LlamaStackClient(base_url="http://localhost:8321")
|
||||
|
||||
models = client.models.list()
|
||||
|
||||
# Select the first LLM and first embedding models
|
||||
model_id = next(m for m in models if m.model_type == "llm").identifier
|
||||
embedding_model_id = (
|
||||
em := next(m for m in models if m.model_type == "embedding")
|
||||
).identifier
|
||||
embedding_dimension = em.metadata["embedding_dimension"]
|
||||
|
||||
_ = client.vector_dbs.register(
|
||||
vector_db_id=vector_db_id,
|
||||
embedding_model=embedding_model_id,
|
||||
embedding_dimension=embedding_dimension,
|
||||
provider_id="faiss",
|
||||
)
|
||||
source = "https://www.paulgraham.com/greatwork.html"
|
||||
print("rag_tool> Ingesting document:", source)
|
||||
document = RAGDocument(
|
||||
document_id="document_1",
|
||||
content=source,
|
||||
mime_type="text/html",
|
||||
metadata={},
|
||||
)
|
||||
client.tool_runtime.rag_tool.insert(
|
||||
documents=[document],
|
||||
vector_db_id=vector_db_id,
|
||||
chunk_size_in_tokens=50,
|
||||
)
|
||||
agent = Agent(
|
||||
client,
|
||||
model=model_id,
|
||||
instructions="You are a helpful assistant",
|
||||
tools=[
|
||||
{
|
||||
"name": "builtin::rag/knowledge_search",
|
||||
"args": {"vector_db_ids": [vector_db_id]},
|
||||
}
|
||||
],
|
||||
)
|
||||
|
||||
prompt = "How do you do great work?"
|
||||
print("prompt>", prompt)
|
||||
|
||||
response = agent.create_turn(
|
||||
messages=[{"role": "user", "content": prompt}],
|
||||
session_id=agent.create_session("rag_session"),
|
||||
stream=True,
|
||||
)
|
||||
|
||||
for log in AgentEventLogger().log(response):
|
||||
log.print()
|
||||
```{literalinclude} ./demo_script.py
|
||||
:language: python
|
||||
```
|
||||
We will use `uv` to run the script
|
||||
```
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue