feat: Adding Demo script and allowing new Website to source files

Signed-off-by: Francisco Javier Arceo <farceo@redhat.com> # Conflicts: # docs/docs/getting_started/demo_script.py # docs/docs/getting_started/quickstart.mdx
2025-12-12 20:12:33 +00:00 · 2025-10-20 21:28:09 -04:00 · 2025-10-20 21:28:09 -04:00 · 7ab63068f8
commit 7ab63068f8
parent c582654d70
7 changed files with 444 additions and 23 deletions
--- a/docs/README.md
+++ b/docs/README.md
@ -13,6 +13,19 @@ npm run serve
 ```
 You can open up the docs in your browser at http://localhost:3000
 ## File Import System
 This documentation uses a custom component to import files directly from the repository, eliminating copy-paste maintenance:
 ```jsx
 import CodeFromFile from '@site/src/components/CodeFromFile';
 <CodeFromFile src="path/to/file.py" />
 <CodeFromFile src="README.md" startLine={1} endLine={20} />
 ```
 Files are automatically synced from the repo root when building. See the `CodeFromFile` component for syntax highlighting, line ranges, and multi-language support.
 ## Content
 Try out Llama Stack's capabilities through our detailed Jupyter notebooks:
--- a/docs/docs/getting_started/quickstart.mdx
+++ b/docs/docs/getting_started/quickstart.mdx
@ -35,27 +35,9 @@ OLLAMA_URL=http://localhost:11434 uv run --with llama-stack llama stack run star
 #### Step 3: Run the demo
 Now open up a new terminal and copy the following script into a file named `demo_script.py`.
-```python
+import CodeFromFile from '@site/src/components/CodeFromFile';
 import io, requests
 from openai import OpenAI
 url="https://www.paulgraham.com/greatwork.html"
 client = OpenAI(base_url="http://localhost:8321/v1/", api_key="none")
 vs = client.vector_stores.create()
 response = requests.get(url)
 pseudo_file = io.BytesIO(str(response.content).encode('utf-8'))
 uploaded_file = client.files.create(file=(url, pseudo_file, "text/html"), purpose="assistants")
 client.vector_stores.files.create(vector_store_id=vs.id, file_id=uploaded_file.id)
 resp = client.responses.create(
    model="openai/gpt-4o",
    input="How do you do great work? Use the existing knowledge_search tool.",
    tools=[{"type": "file_search", "vector_store_ids": [vs.id]}],
    include=["file_search_call.results"],
 )
 <CodeFromFile src="demo_script.py" title="demo_script.py" />
 We will use `uv` to run the script
 ```
 uv run --with llama-stack-client,fire,requests demo_script.py
--- a/docs/package.json
+++ b/docs/package.json
@ -4,8 +4,8 @@
  "private": true,
  "scripts": {
    "docusaurus": "docusaurus",
-    "start": "docusaurus start",
+    "start": "npm run sync-files && docusaurus start",
-    "build": "docusaurus build",
+    "build": "npm run sync-files && docusaurus build",
    "swizzle": "docusaurus swizzle",
    "deploy": "docusaurus deploy",
    "clear": "docusaurus clear",
@ -15,7 +15,8 @@
    "gen-api-docs": "docusaurus gen-api-docs",
    "clean-api-docs": "docusaurus clean-api-docs",
    "gen-api-docs:version": "docusaurus gen-api-docs:version",
-    "clean-api-docs:version": "docusaurus clean-api-docs:version"
+    "clean-api-docs:version": "docusaurus clean-api-docs:version",
    "sync-files": "node scripts/sync-files.js"
  },
  "dependencies": {
    "@docusaurus/core": "3.8.1",
--- a/docs/scripts/sync-files.js
+++ b/docs/scripts/sync-files.js
@ -0,0 +1,93 @@
 #!/usr/bin/env node
 const fs = require('fs');
 const path = require('path');
 // Repository root is always one level up from docs
 const repoRoot = path.join(__dirname, '..', '..');
 // Get all requested files from the usage tracking file
 function getRequestedFiles() {
  const usageFile = path.join(__dirname, '..', 'static', 'imported-files', 'usage.json');
  if (!fs.existsSync(usageFile)) {
    return [];
  }
  try {
    const usage = JSON.parse(fs.readFileSync(usageFile, 'utf8'));
    return usage.files || [];
  } catch (error) {
    console.warn('Could not read usage file:', error.message);
    return [];
  }
 }
 // Track file usage
 function trackFileUsage(filePath) {
  const usageFile = path.join(__dirname, '..', 'static', 'imported-files', 'usage.json');
  const usageDir = path.dirname(usageFile);
  // Ensure directory exists
  if (!fs.existsSync(usageDir)) {
    fs.mkdirSync(usageDir, { recursive: true });
  }
  let usage = { files: [] };
  if (fs.existsSync(usageFile)) {
    try {
      usage = JSON.parse(fs.readFileSync(usageFile, 'utf8'));
    } catch (error) {
      console.warn('Could not read existing usage file, creating new one');
    }
  }
  if (!usage.files.includes(filePath)) {
    usage.files.push(filePath);
    fs.writeFileSync(usageFile, JSON.stringify(usage, null, 2));
  }
 }
 // Sync a file from repo root to static directory
 function syncFile(filePath) {
  const sourcePath = path.join(repoRoot, filePath);
  const destPath = path.join(__dirname, '..', 'static', 'imported-files', filePath);
  const destDir = path.dirname(destPath);
  // Ensure destination directory exists
  if (!fs.existsSync(destDir)) {
    fs.mkdirSync(destDir, { recursive: true });
  }
  try {
    if (fs.existsSync(sourcePath)) {
      const content = fs.readFileSync(sourcePath, 'utf8');
      fs.writeFileSync(destPath, content);
      console.log(`✅ Synced ${filePath}`);
      trackFileUsage(filePath);
      return true;
    } else {
      console.warn(`⚠️  Source file not found: ${sourcePath}`);
      return false;
    }
  } catch (error) {
    console.error(`❌ Error syncing ${filePath}:`, error.message);
    return false;
  }
 }
 // Main execution
 console.log(`📁 Repository root: ${path.resolve(repoRoot)}`);
 // Get files that are being requested by the documentation
 const requestedFiles = getRequestedFiles();
 console.log(`📄 Syncing ${requestedFiles.length} requested files...`);
 if (requestedFiles.length === 0) {
  console.log('ℹ️  No files requested yet. Files will be synced when first referenced in documentation.');
 } else {
  requestedFiles.forEach(filePath => {
    syncFile(filePath);
  });
 }
 console.log('✅ File sync complete!');
--- a/docs/src/components/CodeFromFile.jsx
+++ b/docs/src/components/CodeFromFile.jsx
@ -0,0 +1,119 @@
 import React, { useState, useEffect } from 'react';
 import CodeBlock from '@theme/CodeBlock';
 export default function CodeFromFile({
  src,
  language = 'python',
  title,
  startLine,
  endLine,
  highlightLines
 }) {
  const [content, setContent] = useState('');
  const [error, setError] = useState(null);
  useEffect(() => {
    async function loadFile() {
      try {
        // Register this file for syncing (build-time only)
        if (typeof window === 'undefined') {
          // This runs during build - register the file
          const fs = require('fs');
          const path = require('path');
          const usageFile = path.join(process.cwd(), 'static', 'imported-files', 'usage.json');
          const usageDir = path.dirname(usageFile);
          if (!fs.existsSync(usageDir)) {
            fs.mkdirSync(usageDir, { recursive: true });
          }
          let usage = { files: [] };
          if (fs.existsSync(usageFile)) {
            try {
              usage = JSON.parse(fs.readFileSync(usageFile, 'utf8'));
            } catch (error) {
              console.warn('Could not read existing usage file');
            }
          }
          if (!usage.files.includes(src)) {
            usage.files.push(src);
            fs.writeFileSync(usageFile, JSON.stringify(usage, null, 2));
          }
        }
        // Load file from static/imported-files directory
        const response = await fetch(`/imported-files/${src}`);
        if (!response.ok) {
          throw new Error(`Failed to fetch: ${response.status}`);
        }
        let text = await response.text();
        // Handle line range if specified
        if (startLine || endLine) {
          const lines = text.split('\n');
          const start = startLine ? Math.max(0, startLine - 1) : 0;
          const end = endLine ? Math.min(lines.length, endLine) : lines.length;
          text = lines.slice(start, end).join('\n');
        }
        setContent(text);
      } catch (err) {
        console.error('Failed to load file:', err);
        setError(`Failed to load ${src}: ${err.message}`);
      }
    }
    loadFile();
  }, [src, startLine, endLine]);
  if (error) {
    return <div style={{ color: 'red', padding: '1rem', border: '1px solid red', borderRadius: '4px' }}>
      Error: {error}
    </div>;
  }
  if (!content) {
    return <div>Loading {src}...</div>;
  }
  // Auto-detect language from file extension if not provided
  const detectedLanguage = language || getLanguageFromExtension(src);
  return (
    <CodeBlock
      language={detectedLanguage}
      title={title || src}
      metastring={highlightLines ? `{${highlightLines}}` : undefined}
    >
      {content}
    </CodeBlock>
  );
 }
 function getLanguageFromExtension(filename) {
  const ext = filename.split('.').pop();
  const languageMap = {
    'py': 'python',
    'js': 'javascript',
    'jsx': 'jsx',
    'ts': 'typescript',
    'tsx': 'tsx',
    'md': 'markdown',
    'sh': 'bash',
    'yaml': 'yaml',
    'yml': 'yaml',
    'json': 'json',
    'css': 'css',
    'html': 'html',
    'cpp': 'cpp',
    'c': 'c',
    'java': 'java',
    'go': 'go',
    'rs': 'rust',
    'php': 'php',
    'rb': 'ruby',
  };
  return languageMap[ext] || 'text';
 }
--- a/docs/static/imported-files/README.md
+++ b/docs/static/imported-files/README.md
@ -0,0 +1,207 @@
 # Llama Stack
 [![PyPI version](https://img.shields.io/pypi/v/llama_stack.svg)](https://pypi.org/project/llama_stack/)
 [![PyPI - Downloads](https://img.shields.io/pypi/dm/llama-stack)](https://pypi.org/project/llama-stack/)
 [![License](https://img.shields.io/pypi/l/llama_stack.svg)](https://github.com/meta-llama/llama-stack/blob/main/LICENSE)
 [![Discord](https://img.shields.io/discord/1257833999603335178?color=6A7EC2&logo=discord&logoColor=ffffff)](https://discord.gg/llama-stack)
 [![Unit Tests](https://github.com/meta-llama/llama-stack/actions/workflows/unit-tests.yml/badge.svg?branch=main)](https://github.com/meta-llama/llama-stack/actions/workflows/unit-tests.yml?query=branch%3Amain)
 [![Integration Tests](https://github.com/meta-llama/llama-stack/actions/workflows/integration-tests.yml/badge.svg?branch=main)](https://github.com/meta-llama/llama-stack/actions/workflows/integration-tests.yml?query=branch%3Amain)
 [**Quick Start**](https://llamastack.github.io/docs/getting_started/quickstart) | [**Documentation**](https://llamastack.github.io/docs) | [**Colab Notebook**](./docs/getting_started.ipynb) | [**Discord**](https://discord.gg/llama-stack)
 ### ✨🎉 Llama 4 Support  🎉✨
 We released [Version 0.2.0](https://github.com/meta-llama/llama-stack/releases/tag/v0.2.0) with support for the Llama 4 herd of models released by Meta.
 <details>
 <summary>👋 Click here to see how to run Llama 4 models on Llama Stack </summary>
 \
 *Note you need 8xH100 GPU-host to run these models*
 ```bash
 pip install -U llama_stack
 MODEL="Llama-4-Scout-17B-16E-Instruct"
 # get meta url from llama.com
 huggingface-cli download meta-llama/$MODEL --local-dir ~/.llama/$MODEL
 # start a llama stack server
 INFERENCE_MODEL=meta-llama/$MODEL llama stack build --run --template meta-reference-gpu
 # install client to interact with the server
 pip install llama-stack-client
 ```
 ### CLI
 ```bash
 # Run a chat completion
 MODEL="Llama-4-Scout-17B-16E-Instruct"
 llama-stack-client --endpoint http://localhost:8321 \
 inference chat-completion \
 --model-id meta-llama/$MODEL \
 --message "write a haiku for meta's llama 4 models"
 OpenAIChatCompletion(
    ...
    choices=[
        OpenAIChatCompletionChoice(
            finish_reason='stop',
            index=0,
            message=OpenAIChatCompletionChoiceMessageOpenAIAssistantMessageParam(
                role='assistant',
                content='...**Silent minds awaken,**  \n**Whispers of billions of words,**  \n**Reasoning breaks the night.**  \n\n—  \n*This haiku blends the essence of LLaMA 4\'s capabilities with nature-inspired metaphor, evoking its vast training data and transformative potential.*',
                ...
            ),
            ...
        )
    ],
    ...
 )
 ```
 ### Python SDK
 ```python
 from llama_stack_client import LlamaStackClient
 client = LlamaStackClient(base_url=f"http://localhost:8321")
 model_id = "meta-llama/Llama-4-Scout-17B-16E-Instruct"
 prompt = "Write a haiku about coding"
 print(f"User> {prompt}")
 response = client.chat.completions.create(
    model=model_id,
    messages=[
        {"role": "system", "content": "You are a helpful assistant."},
        {"role": "user", "content": prompt},
    ],
 )
 print(f"Assistant> {response.choices[0].message.content}")
 ```
 As more providers start supporting Llama 4, you can use them in Llama Stack as well. We are adding to the list. Stay tuned!
 </details>
 ### 🚀 One-Line Installer 🚀
 To try Llama Stack locally, run:
 ```bash
 curl -LsSf https://github.com/meta-llama/llama-stack/raw/main/scripts/install.sh | bash
 ```
 ### Overview
 Llama Stack standardizes the core building blocks that simplify AI application development. It codifies best practices across the Llama ecosystem. More specifically, it provides
 - **Unified API layer** for Inference, RAG, Agents, Tools, Safety, Evals, and Telemetry.
 - **Plugin architecture** to support the rich ecosystem of different API implementations in various environments, including local development, on-premises, cloud, and mobile.
 - **Prepackaged verified distributions** which offer a one-stop solution for developers to get started quickly and reliably in any environment.
 - **Multiple developer interfaces** like CLI and SDKs for Python, Typescript, iOS, and Android.
 - **Standalone applications** as examples for how to build production-grade AI applications with Llama Stack.
 <div style="text-align: center;">
  <img
    src="https://github.com/user-attachments/assets/33d9576d-95ea-468d-95e2-8fa233205a50"
    width="480"
    title="Llama Stack"
    alt="Llama Stack"
  />
 </div>
 ### Llama Stack Benefits
 - **Flexible Options**: Developers can choose their preferred infrastructure without changing APIs and enjoy flexible deployment choices.
 - **Consistent Experience**: With its unified APIs, Llama Stack makes it easier to build, test, and deploy AI applications with consistent application behavior.
 - **Robust Ecosystem**: Llama Stack is already integrated with distribution partners (cloud providers, hardware vendors, and AI-focused companies) that offer tailored infrastructure, software, and services for deploying Llama models.
 By reducing friction and complexity, Llama Stack empowers developers to focus on what they do best: building transformative generative AI applications.
 ### API Providers
 Here is a list of the various API providers and available distributions that can help developers get started easily with Llama Stack.
 Please checkout for [full list](https://llamastack.github.io/docs/providers)
 | API Provider Builder | Environments | Agents | Inference | VectorIO | Safety | Telemetry | Post Training | Eval | DatasetIO |
 |:--------------------:|:------------:|:------:|:---------:|:--------:|:------:|:---------:|:-------------:|:----:|:--------:|
 |    Meta Reference    | Single Node | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ |
 |      SambaNova       | Hosted | | ✅ | | ✅ | | | | |
 |       Cerebras       | Hosted | | ✅ | | | | | | |
 |      Fireworks       | Hosted | ✅ | ✅ | ✅ | | | | | |
 |     AWS Bedrock      | Hosted | | ✅ | | ✅ | | | | |
 |       Together       | Hosted | ✅ | ✅ | | ✅ | | | | |
 |         Groq         | Hosted | | ✅ | | | | | | |
 |        Ollama        | Single Node | | ✅ | | | | | | |
 |         TGI          | Hosted/Single Node | | ✅ | | | | | | |
 |      NVIDIA NIM      | Hosted/Single Node | | ✅ | | ✅ | | | | |
 |       ChromaDB       | Hosted/Single Node | | | ✅ | | | | | |
 |        Milvus        | Hosted/Single Node | | | ✅ | | | | | |
 |        Qdrant        | Hosted/Single Node | | | ✅ | | | | | |
 |       Weaviate       | Hosted/Single Node | | | ✅ | | | | | |
 |      SQLite-vec      | Single Node | | | ✅ | | | | | |
 |      PG Vector       | Single Node | | | ✅ | | | | | |
 |  PyTorch ExecuTorch  | On-device iOS | ✅ | ✅ | | | | | | |
 |         vLLM         | Single Node | | ✅ | | | | | | |
 |        OpenAI        | Hosted | | ✅ | | | | | | |
 |      Anthropic       | Hosted | | ✅ | | | | | | |
 |        Gemini        | Hosted | | ✅ | | | | | | |
 |       WatsonX        | Hosted | | ✅ | | | | | | |
 |     HuggingFace      | Single Node | | | | | | ✅ | | ✅ |
 |      TorchTune       | Single Node | | | | | | ✅ | | |
 |     NVIDIA NEMO      | Hosted | | ✅ | ✅ | | | ✅ | ✅ | ✅ |
 |        NVIDIA        | Hosted | | | | | | ✅ | ✅ | ✅ |
 > **Note**: Additional providers are available through external packages. See [External Providers](https://llamastack.github.io/docs/providers/external) documentation.
 ### Distributions
 A Llama Stack Distribution (or "distro") is a pre-configured bundle of provider implementations for each API component. Distributions make it easy to get started with a specific deployment scenario - you can begin with a local development setup (eg. ollama) and seamlessly transition to production (eg. Fireworks) without changing your application code.
 Here are some of the distributions we support:
 |               **Distribution**                |                                                                    **Llama Stack Docker**                                                                     |                                                 Start This Distribution                                                  |
 |:---------------------------------------------:|:-------------------------------------------------------------------------------------------------------------------------------------------------------------:|:------------------------------------------------------------------------------------------------------------------------:|
 |                Starter Distribution                 |           [llamastack/distribution-starter](https://hub.docker.com/repository/docker/llamastack/distribution-starter/general)           |      [Guide](https://llamastack.github.io/latest/distributions/self_hosted_distro/starter.html)      |
 |                Meta Reference                 |           [llamastack/distribution-meta-reference-gpu](https://hub.docker.com/repository/docker/llamastack/distribution-meta-reference-gpu/general)           |      [Guide](https://llamastack.github.io/latest/distributions/self_hosted_distro/meta-reference-gpu.html)      |
 |                   PostgreSQL                  |                [llamastack/distribution-postgres-demo](https://hub.docker.com/repository/docker/llamastack/distribution-postgres-demo/general)                |                  |
 ### Documentation
 Please checkout our [Documentation](https://llamastack.github.io/latest/index.html) page for more details.
 * CLI references
    * [llama (server-side) CLI Reference](https://llamastack.github.io/latest/references/llama_cli_reference/index.html): Guide for using the `llama` CLI to work with Llama models (download, study prompts), and building/starting a Llama Stack distribution.
    * [llama (client-side) CLI Reference](https://llamastack.github.io/latest/references/llama_stack_client_cli_reference.html): Guide for using the `llama-stack-client` CLI, which allows you to query information about the distribution.
 * Getting Started
    * [Quick guide to start a Llama Stack server](https://llamastack.github.io/latest/getting_started/index.html).
    * [Jupyter notebook](./docs/getting_started.ipynb) to walk-through how to use simple text and vision inference llama_stack_client APIs
    * The complete Llama Stack lesson [Colab notebook](https://colab.research.google.com/drive/1dtVmxotBsI4cGZQNsJRYPrLiDeT0Wnwt) of the new [Llama 3.2 course on Deeplearning.ai](https://learn.deeplearning.ai/courses/introducing-multimodal-llama-3-2/lesson/8/llama-stack).
    * A [Zero-to-Hero Guide](https://github.com/meta-llama/llama-stack/tree/main/docs/zero_to_hero_guide) that guide you through all the key components of llama stack with code samples.
 * [Contributing](CONTRIBUTING.md)
    * [Adding a new API Provider](https://llamastack.github.io/latest/contributing/new_api_provider.html) to walk-through how to add a new API provider.
 ### Llama Stack Client SDKs
 |  **Language** |  **Client SDK** | **Package** |
 | :----: | :----: | :----: |
 | Python |  [llama-stack-client-python](https://github.com/meta-llama/llama-stack-client-python) | [![PyPI version](https://img.shields.io/pypi/v/llama_stack_client.svg)](https://pypi.org/project/llama_stack_client/)
 | Swift  | [llama-stack-client-swift](https://github.com/meta-llama/llama-stack-client-swift) | [![Swift Package Index](https://img.shields.io/endpoint?url=https%3A%2F%2Fswiftpackageindex.com%2Fapi%2Fpackages%2Fmeta-llama%2Fllama-stack-client-swift%2Fbadge%3Ftype%3Dswift-versions)](https://swiftpackageindex.com/meta-llama/llama-stack-client-swift)
 | Typescript   | [llama-stack-client-typescript](https://github.com/meta-llama/llama-stack-client-typescript) | [![NPM version](https://img.shields.io/npm/v/llama-stack-client.svg)](https://npmjs.org/package/llama-stack-client)
 | Kotlin | [llama-stack-client-kotlin](https://github.com/meta-llama/llama-stack-client-kotlin) | [![Maven version](https://img.shields.io/maven-central/v/com.llama.llamastack/llama-stack-client-kotlin)](https://central.sonatype.com/artifact/com.llama.llamastack/llama-stack-client-kotlin)
 Check out our client SDKs for connecting to a Llama Stack server in your preferred language, you can choose from [python](https://github.com/meta-llama/llama-stack-client-python), [typescript](https://github.com/meta-llama/llama-stack-client-typescript), [swift](https://github.com/meta-llama/llama-stack-client-swift), and [kotlin](https://github.com/meta-llama/llama-stack-client-kotlin) programming languages to quickly build your applications.
 You can find more example scripts with client SDKs to talk with the Llama Stack server in our [llama-stack-apps](https://github.com/meta-llama/llama-stack-apps/tree/main/examples) repo.
 ## 🌟 GitHub Star History
 ## Star History
 [![Star History Chart](https://api.star-history.com/svg?repos=meta-llama/llama-stack&type=Date)](https://www.star-history.com/#meta-llama/llama-stack&Date)
 ## ✨ Contributors
 Thanks to all of our amazing contributors!
 <a href="https://github.com/meta-llama/llama-stack/graphs/contributors">
  <img src="https://contrib.rocks/image?repo=meta-llama/llama-stack" />
 </a>
--- a/docs/static/imported-files/usage.json
+++ b/docs/static/imported-files/usage.json
@ -0,0 +1,6 @@
 {
  "files": [
    "docs/getting_started/demo_script.py",
    "README.md"
  ]
 }