mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-07-23 21:04:29 +00:00
chore: Adding demo script and importing it into the docs (#2848)
Some checks failed
Integration Auth Tests / test-matrix (oauth2_token) (push) Failing after 1s
Coverage Badge / unit-tests (push) Failing after 6s
Integration Tests / discover-tests (push) Successful in 7s
Unit Tests / unit-tests (3.13) (push) Failing after 6s
Test Llama Stack Build / build-single-provider (push) Failing after 8s
Vector IO Integration Tests / test-matrix (3.13, inline::sqlite-vec) (push) Failing after 11s
SqlStore Integration Tests / test-postgres (3.12) (push) Failing after 15s
Vector IO Integration Tests / test-matrix (3.12, remote::chromadb) (push) Failing after 14s
Test Llama Stack Build / build-ubi9-container-distribution (push) Failing after 9s
Vector IO Integration Tests / test-matrix (3.13, remote::pgvector) (push) Failing after 11s
Vector IO Integration Tests / test-matrix (3.13, inline::faiss) (push) Failing after 13s
Vector IO Integration Tests / test-matrix (3.12, remote::pgvector) (push) Failing after 14s
Test Llama Stack Build / generate-matrix (push) Successful in 10s
Test External Providers / test-external-providers (venv) (push) Failing after 9s
Test Llama Stack Build / build-custom-container-distribution (push) Failing after 11s
Unit Tests / unit-tests (3.12) (push) Failing after 10s
Vector IO Integration Tests / test-matrix (3.12, inline::faiss) (push) Failing after 15s
Vector IO Integration Tests / test-matrix (3.12, inline::milvus) (push) Failing after 15s
Vector IO Integration Tests / test-matrix (3.12, inline::sqlite-vec) (push) Failing after 12s
Vector IO Integration Tests / test-matrix (3.13, remote::chromadb) (push) Failing after 15s
SqlStore Integration Tests / test-postgres (3.13) (push) Failing after 19s
Python Package Build Test / build (3.13) (push) Failing after 12s
Vector IO Integration Tests / test-matrix (3.13, inline::milvus) (push) Failing after 19s
Integration Tests / test-matrix (push) Failing after 13s
Python Package Build Test / build (3.12) (push) Failing after 1m1s
Update ReadTheDocs / update-readthedocs (push) Failing after 1m0s
Test Llama Stack Build / build (push) Failing after 52s
Pre-commit / pre-commit (push) Successful in 2m39s
Some checks failed
Integration Auth Tests / test-matrix (oauth2_token) (push) Failing after 1s
Coverage Badge / unit-tests (push) Failing after 6s
Integration Tests / discover-tests (push) Successful in 7s
Unit Tests / unit-tests (3.13) (push) Failing after 6s
Test Llama Stack Build / build-single-provider (push) Failing after 8s
Vector IO Integration Tests / test-matrix (3.13, inline::sqlite-vec) (push) Failing after 11s
SqlStore Integration Tests / test-postgres (3.12) (push) Failing after 15s
Vector IO Integration Tests / test-matrix (3.12, remote::chromadb) (push) Failing after 14s
Test Llama Stack Build / build-ubi9-container-distribution (push) Failing after 9s
Vector IO Integration Tests / test-matrix (3.13, remote::pgvector) (push) Failing after 11s
Vector IO Integration Tests / test-matrix (3.13, inline::faiss) (push) Failing after 13s
Vector IO Integration Tests / test-matrix (3.12, remote::pgvector) (push) Failing after 14s
Test Llama Stack Build / generate-matrix (push) Successful in 10s
Test External Providers / test-external-providers (venv) (push) Failing after 9s
Test Llama Stack Build / build-custom-container-distribution (push) Failing after 11s
Unit Tests / unit-tests (3.12) (push) Failing after 10s
Vector IO Integration Tests / test-matrix (3.12, inline::faiss) (push) Failing after 15s
Vector IO Integration Tests / test-matrix (3.12, inline::milvus) (push) Failing after 15s
Vector IO Integration Tests / test-matrix (3.12, inline::sqlite-vec) (push) Failing after 12s
Vector IO Integration Tests / test-matrix (3.13, remote::chromadb) (push) Failing after 15s
SqlStore Integration Tests / test-postgres (3.13) (push) Failing after 19s
Python Package Build Test / build (3.13) (push) Failing after 12s
Vector IO Integration Tests / test-matrix (3.13, inline::milvus) (push) Failing after 19s
Integration Tests / test-matrix (push) Failing after 13s
Python Package Build Test / build (3.12) (push) Failing after 1m1s
Update ReadTheDocs / update-readthedocs (push) Failing after 1m0s
Test Llama Stack Build / build (push) Failing after 52s
Pre-commit / pre-commit (push) Successful in 2m39s
# What does this PR do? This PR adds the quickstart as a file to the docs so that it can be more easily maintained and run, as mentioned in https://github.com/meta-llama/llama-stack/pull/2800. ## Test Plan I could add this as a test in the CI but I wasn't sure if we wanted to add additional jobs there. 😅 Signed-off-by: Francisco Javier Arceo <farceo@redhat.com>
This commit is contained in:
parent
c8f274347d
commit
2bc96613f9
2 changed files with 64 additions and 57 deletions
62
docs/source/getting_started/demo_script.py
Normal file
62
docs/source/getting_started/demo_script.py
Normal file
|
@ -0,0 +1,62 @@
|
||||||
|
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||||
|
# All rights reserved.
|
||||||
|
#
|
||||||
|
# This source code is licensed under the terms described in the LICENSE file in
|
||||||
|
# the root directory of this source tree.
|
||||||
|
|
||||||
|
from llama_stack_client import Agent, AgentEventLogger, RAGDocument, LlamaStackClient
|
||||||
|
|
||||||
|
vector_db_id = "my_demo_vector_db"
|
||||||
|
client = LlamaStackClient(base_url="http://localhost:8321")
|
||||||
|
|
||||||
|
models = client.models.list()
|
||||||
|
|
||||||
|
# Select the first LLM and first embedding models
|
||||||
|
model_id = next(m for m in models if m.model_type == "llm").identifier
|
||||||
|
embedding_model_id = (
|
||||||
|
em := next(m for m in models if m.model_type == "embedding")
|
||||||
|
).identifier
|
||||||
|
embedding_dimension = em.metadata["embedding_dimension"]
|
||||||
|
|
||||||
|
_ = client.vector_dbs.register(
|
||||||
|
vector_db_id=vector_db_id,
|
||||||
|
embedding_model=embedding_model_id,
|
||||||
|
embedding_dimension=embedding_dimension,
|
||||||
|
provider_id="faiss",
|
||||||
|
)
|
||||||
|
source = "https://www.paulgraham.com/greatwork.html"
|
||||||
|
print("rag_tool> Ingesting document:", source)
|
||||||
|
document = RAGDocument(
|
||||||
|
document_id="document_1",
|
||||||
|
content=source,
|
||||||
|
mime_type="text/html",
|
||||||
|
metadata={},
|
||||||
|
)
|
||||||
|
client.tool_runtime.rag_tool.insert(
|
||||||
|
documents=[document],
|
||||||
|
vector_db_id=vector_db_id,
|
||||||
|
chunk_size_in_tokens=50,
|
||||||
|
)
|
||||||
|
agent = Agent(
|
||||||
|
client,
|
||||||
|
model=model_id,
|
||||||
|
instructions="You are a helpful assistant",
|
||||||
|
tools=[
|
||||||
|
{
|
||||||
|
"name": "builtin::rag/knowledge_search",
|
||||||
|
"args": {"vector_db_ids": [vector_db_id]},
|
||||||
|
}
|
||||||
|
],
|
||||||
|
)
|
||||||
|
|
||||||
|
prompt = "How do you do great work?"
|
||||||
|
print("prompt>", prompt)
|
||||||
|
|
||||||
|
response = agent.create_turn(
|
||||||
|
messages=[{"role": "user", "content": prompt}],
|
||||||
|
session_id=agent.create_session("rag_session"),
|
||||||
|
stream=True,
|
||||||
|
)
|
||||||
|
|
||||||
|
for log in AgentEventLogger().log(response):
|
||||||
|
log.print()
|
|
@ -24,63 +24,8 @@ ENABLE_OLLAMA=ollama OLLAMA_INFERENCE_MODEL=llama3.2:3b uv run --with llama-stac
|
||||||
#### Step 3: Run the demo
|
#### Step 3: Run the demo
|
||||||
Now open up a new terminal and copy the following script into a file named `demo_script.py`.
|
Now open up a new terminal and copy the following script into a file named `demo_script.py`.
|
||||||
|
|
||||||
```python
|
```{literalinclude} ./demo_script.py
|
||||||
from llama_stack_client import Agent, AgentEventLogger, RAGDocument, LlamaStackClient
|
:language: python
|
||||||
|
|
||||||
vector_db_id = "my_demo_vector_db"
|
|
||||||
client = LlamaStackClient(base_url="http://localhost:8321")
|
|
||||||
|
|
||||||
models = client.models.list()
|
|
||||||
|
|
||||||
# Select the first LLM and first embedding models
|
|
||||||
model_id = next(m for m in models if m.model_type == "llm").identifier
|
|
||||||
embedding_model_id = (
|
|
||||||
em := next(m for m in models if m.model_type == "embedding")
|
|
||||||
).identifier
|
|
||||||
embedding_dimension = em.metadata["embedding_dimension"]
|
|
||||||
|
|
||||||
_ = client.vector_dbs.register(
|
|
||||||
vector_db_id=vector_db_id,
|
|
||||||
embedding_model=embedding_model_id,
|
|
||||||
embedding_dimension=embedding_dimension,
|
|
||||||
provider_id="faiss",
|
|
||||||
)
|
|
||||||
source = "https://www.paulgraham.com/greatwork.html"
|
|
||||||
print("rag_tool> Ingesting document:", source)
|
|
||||||
document = RAGDocument(
|
|
||||||
document_id="document_1",
|
|
||||||
content=source,
|
|
||||||
mime_type="text/html",
|
|
||||||
metadata={},
|
|
||||||
)
|
|
||||||
client.tool_runtime.rag_tool.insert(
|
|
||||||
documents=[document],
|
|
||||||
vector_db_id=vector_db_id,
|
|
||||||
chunk_size_in_tokens=50,
|
|
||||||
)
|
|
||||||
agent = Agent(
|
|
||||||
client,
|
|
||||||
model=model_id,
|
|
||||||
instructions="You are a helpful assistant",
|
|
||||||
tools=[
|
|
||||||
{
|
|
||||||
"name": "builtin::rag/knowledge_search",
|
|
||||||
"args": {"vector_db_ids": [vector_db_id]},
|
|
||||||
}
|
|
||||||
],
|
|
||||||
)
|
|
||||||
|
|
||||||
prompt = "How do you do great work?"
|
|
||||||
print("prompt>", prompt)
|
|
||||||
|
|
||||||
response = agent.create_turn(
|
|
||||||
messages=[{"role": "user", "content": prompt}],
|
|
||||||
session_id=agent.create_session("rag_session"),
|
|
||||||
stream=True,
|
|
||||||
)
|
|
||||||
|
|
||||||
for log in AgentEventLogger().log(response):
|
|
||||||
log.print()
|
|
||||||
```
|
```
|
||||||
We will use `uv` to run the script
|
We will use `uv` to run the script
|
||||||
```
|
```
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue