chore: Adding demo script and importing it into the docs (#2848)

# What does this PR do? This PR adds the quickstart as a file to the docs so that it can be more easily maintained and run, as mentioned in https://github.com/meta-llama/llama-stack/pull/2800. ## Test Plan I could add this as a test in the CI but I wasn't sure if we wanted to add additional jobs there. 😅 Signed-off-by: Francisco Javier Arceo <farceo@redhat.com>
2025-07-25 05:39:47 +00:00 · 2025-07-21 22:53:32 -04:00 · 2025-07-21 22:53:32 -04:00 · 2bc96613f9
commit 2bc96613f9
parent c8f274347d
2 changed files with 64 additions and 57 deletions
--- a/docs/source/getting_started/demo_script.py
+++ b/docs/source/getting_started/demo_script.py
@ -0,0 +1,62 @@
 # Copyright (c) Meta Platforms, Inc. and affiliates.
 # All rights reserved.
 #
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 from llama_stack_client import Agent, AgentEventLogger, RAGDocument, LlamaStackClient
 vector_db_id = "my_demo_vector_db"
 client = LlamaStackClient(base_url="http://localhost:8321")
 models = client.models.list()
 # Select the first LLM and first embedding models
 model_id = next(m for m in models if m.model_type == "llm").identifier
 embedding_model_id = (
    em := next(m for m in models if m.model_type == "embedding")
 ).identifier
 embedding_dimension = em.metadata["embedding_dimension"]
 _ = client.vector_dbs.register(
    vector_db_id=vector_db_id,
    embedding_model=embedding_model_id,
    embedding_dimension=embedding_dimension,
    provider_id="faiss",
 )
 source = "https://www.paulgraham.com/greatwork.html"
 print("rag_tool> Ingesting document:", source)
 document = RAGDocument(
    document_id="document_1",
    content=source,
    mime_type="text/html",
    metadata={},
 )
 client.tool_runtime.rag_tool.insert(
    documents=[document],
    vector_db_id=vector_db_id,
    chunk_size_in_tokens=50,
 )
 agent = Agent(
    client,
    model=model_id,
    instructions="You are a helpful assistant",
    tools=[
        {
            "name": "builtin::rag/knowledge_search",
            "args": {"vector_db_ids": [vector_db_id]},
        }
    ],
 )
 prompt = "How do you do great work?"
 print("prompt>", prompt)
 response = agent.create_turn(
    messages=[{"role": "user", "content": prompt}],
    session_id=agent.create_session("rag_session"),
    stream=True,
 )
 for log in AgentEventLogger().log(response):
    log.print()
--- a/docs/source/getting_started/quickstart.md
+++ b/docs/source/getting_started/quickstart.md
@ -24,63 +24,8 @@ ENABLE_OLLAMA=ollama OLLAMA_INFERENCE_MODEL=llama3.2:3b uv run --with llama-stac
 #### Step 3: Run the demo
 Now open up a new terminal and copy the following script into a file named `demo_script.py`.
-```python
+```{literalinclude} ./demo_script.py
-from llama_stack_client import Agent, AgentEventLogger, RAGDocument, LlamaStackClient
+:language: python
 vector_db_id = "my_demo_vector_db"
 client = LlamaStackClient(base_url="http://localhost:8321")
 models = client.models.list()
 # Select the first LLM and first embedding models
 model_id = next(m for m in models if m.model_type == "llm").identifier
 embedding_model_id = (
    em := next(m for m in models if m.model_type == "embedding")
 ).identifier
 embedding_dimension = em.metadata["embedding_dimension"]
 _ = client.vector_dbs.register(
    vector_db_id=vector_db_id,
    embedding_model=embedding_model_id,
    embedding_dimension=embedding_dimension,
    provider_id="faiss",
 )
 source = "https://www.paulgraham.com/greatwork.html"
 print("rag_tool> Ingesting document:", source)
 document = RAGDocument(
    document_id="document_1",
    content=source,
    mime_type="text/html",
    metadata={},
 )
 client.tool_runtime.rag_tool.insert(
    documents=[document],
    vector_db_id=vector_db_id,
    chunk_size_in_tokens=50,
 )
 agent = Agent(
    client,
    model=model_id,
    instructions="You are a helpful assistant",
    tools=[
        {
            "name": "builtin::rag/knowledge_search",
            "args": {"vector_db_ids": [vector_db_id]},
        }
    ],
 )
 prompt = "How do you do great work?"
 print("prompt>", prompt)
 response = agent.create_turn(
    messages=[{"role": "user", "content": prompt}],
    session_id=agent.create_session("rag_session"),
    stream=True,
 )
 for log in AgentEventLogger().log(response):
    log.print()
 ```
 We will use `uv` to run the script
 ```