From 2bc96613f918316a5df85925b0c7872127947cae Mon Sep 17 00:00:00 2001 From: Francisco Arceo Date: Mon, 21 Jul 2025 22:53:32 -0400 Subject: [PATCH] chore: Adding demo script and importing it into the docs (#2848) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit # What does this PR do? This PR adds the quickstart as a file to the docs so that it can be more easily maintained and run, as mentioned in https://github.com/meta-llama/llama-stack/pull/2800. ## Test Plan I could add this as a test in the CI but I wasn't sure if we wanted to add additional jobs there. 😅 Signed-off-by: Francisco Javier Arceo --- docs/source/getting_started/demo_script.py | 62 ++++++++++++++++++++++ docs/source/getting_started/quickstart.md | 59 +------------------- 2 files changed, 64 insertions(+), 57 deletions(-) create mode 100644 docs/source/getting_started/demo_script.py diff --git a/docs/source/getting_started/demo_script.py b/docs/source/getting_started/demo_script.py new file mode 100644 index 000000000..298fd9899 --- /dev/null +++ b/docs/source/getting_started/demo_script.py @@ -0,0 +1,62 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + +from llama_stack_client import Agent, AgentEventLogger, RAGDocument, LlamaStackClient + +vector_db_id = "my_demo_vector_db" +client = LlamaStackClient(base_url="http://localhost:8321") + +models = client.models.list() + +# Select the first LLM and first embedding models +model_id = next(m for m in models if m.model_type == "llm").identifier +embedding_model_id = ( + em := next(m for m in models if m.model_type == "embedding") +).identifier +embedding_dimension = em.metadata["embedding_dimension"] + +_ = client.vector_dbs.register( + vector_db_id=vector_db_id, + embedding_model=embedding_model_id, + embedding_dimension=embedding_dimension, + provider_id="faiss", +) +source = "https://www.paulgraham.com/greatwork.html" +print("rag_tool> Ingesting document:", source) +document = RAGDocument( + document_id="document_1", + content=source, + mime_type="text/html", + metadata={}, +) +client.tool_runtime.rag_tool.insert( + documents=[document], + vector_db_id=vector_db_id, + chunk_size_in_tokens=50, +) +agent = Agent( + client, + model=model_id, + instructions="You are a helpful assistant", + tools=[ + { + "name": "builtin::rag/knowledge_search", + "args": {"vector_db_ids": [vector_db_id]}, + } + ], +) + +prompt = "How do you do great work?" +print("prompt>", prompt) + +response = agent.create_turn( + messages=[{"role": "user", "content": prompt}], + session_id=agent.create_session("rag_session"), + stream=True, +) + +for log in AgentEventLogger().log(response): + log.print() diff --git a/docs/source/getting_started/quickstart.md b/docs/source/getting_started/quickstart.md index 59791643d..5549f412c 100644 --- a/docs/source/getting_started/quickstart.md +++ b/docs/source/getting_started/quickstart.md @@ -24,63 +24,8 @@ ENABLE_OLLAMA=ollama OLLAMA_INFERENCE_MODEL=llama3.2:3b uv run --with llama-stac #### Step 3: Run the demo Now open up a new terminal and copy the following script into a file named `demo_script.py`. -```python -from llama_stack_client import Agent, AgentEventLogger, RAGDocument, LlamaStackClient - -vector_db_id = "my_demo_vector_db" -client = LlamaStackClient(base_url="http://localhost:8321") - -models = client.models.list() - -# Select the first LLM and first embedding models -model_id = next(m for m in models if m.model_type == "llm").identifier -embedding_model_id = ( - em := next(m for m in models if m.model_type == "embedding") -).identifier -embedding_dimension = em.metadata["embedding_dimension"] - -_ = client.vector_dbs.register( - vector_db_id=vector_db_id, - embedding_model=embedding_model_id, - embedding_dimension=embedding_dimension, - provider_id="faiss", -) -source = "https://www.paulgraham.com/greatwork.html" -print("rag_tool> Ingesting document:", source) -document = RAGDocument( - document_id="document_1", - content=source, - mime_type="text/html", - metadata={}, -) -client.tool_runtime.rag_tool.insert( - documents=[document], - vector_db_id=vector_db_id, - chunk_size_in_tokens=50, -) -agent = Agent( - client, - model=model_id, - instructions="You are a helpful assistant", - tools=[ - { - "name": "builtin::rag/knowledge_search", - "args": {"vector_db_ids": [vector_db_id]}, - } - ], -) - -prompt = "How do you do great work?" -print("prompt>", prompt) - -response = agent.create_turn( - messages=[{"role": "user", "content": prompt}], - session_id=agent.create_session("rag_session"), - stream=True, -) - -for log in AgentEventLogger().log(response): - log.print() +```{literalinclude} ./demo_script.py +:language: python ``` We will use `uv` to run the script ```