mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-12-12 12:06:04 +00:00
feat: Adding Demo script and allowing new Website to source files
Signed-off-by: Francisco Javier Arceo <farceo@redhat.com>
This commit is contained in:
parent
a701f68bd7
commit
255cc90296
8 changed files with 459 additions and 128 deletions
|
|
@ -13,6 +13,19 @@ npm run serve
|
||||||
```
|
```
|
||||||
You can open up the docs in your browser at http://localhost:3000
|
You can open up the docs in your browser at http://localhost:3000
|
||||||
|
|
||||||
|
## File Import System
|
||||||
|
|
||||||
|
This documentation uses a custom component to import files directly from the repository, eliminating copy-paste maintenance:
|
||||||
|
|
||||||
|
```jsx
|
||||||
|
import CodeFromFile from '@site/src/components/CodeFromFile';
|
||||||
|
|
||||||
|
<CodeFromFile src="path/to/file.py" />
|
||||||
|
<CodeFromFile src="README.md" startLine={1} endLine={20} />
|
||||||
|
```
|
||||||
|
|
||||||
|
Files are automatically synced from the repo root when building. See the `CodeFromFile` component for syntax highlighting, line ranges, and multi-language support.
|
||||||
|
|
||||||
## Content
|
## Content
|
||||||
|
|
||||||
Try out Llama Stack's capabilities through our detailed Jupyter notebooks:
|
Try out Llama Stack's capabilities through our detailed Jupyter notebooks:
|
||||||
|
|
|
||||||
|
|
@ -4,65 +4,24 @@
|
||||||
# This source code is licensed under the terms described in the LICENSE file in
|
# This source code is licensed under the terms described in the LICENSE file in
|
||||||
# the root directory of this source tree.
|
# the root directory of this source tree.
|
||||||
|
|
||||||
from llama_stack_client import Agent, AgentEventLogger, RAGDocument, LlamaStackClient
|
|
||||||
|
|
||||||
vector_db_id = "my_demo_vector_db"
|
import io, requests
|
||||||
client = LlamaStackClient(base_url="http://localhost:8321")
|
from openai import OpenAI
|
||||||
|
|
||||||
models = client.models.list()
|
url="https://www.paulgraham.com/greatwork.html"
|
||||||
|
client = OpenAI(base_url="http://localhost:8321/v1/", api_key="none")
|
||||||
|
|
||||||
# Select the first LLM and first embedding models
|
vs = client.vector_stores.create()
|
||||||
model_id = next(m for m in models if m.model_type == "llm").identifier
|
response = requests.get(url)
|
||||||
embedding_model_id = (
|
pseudo_file = io.BytesIO(str(response.content).encode('utf-8'))
|
||||||
em := next(m for m in models if m.model_type == "embedding")
|
uploaded_file = client.files.create(file=(url, pseudo_file, "text/html"), purpose="assistants")
|
||||||
).identifier
|
client.vector_stores.files.create(vector_store_id=vs.id, file_id=uploaded_file.id)
|
||||||
embedding_dimension = em.metadata["embedding_dimension"]
|
|
||||||
|
|
||||||
vector_db = client.vector_dbs.register(
|
resp = client.responses.create(
|
||||||
vector_db_id=vector_db_id,
|
model="gpt-4o",
|
||||||
embedding_model=embedding_model_id,
|
input="How do you do great work? Use the existing knowledge_search tool.",
|
||||||
embedding_dimension=embedding_dimension,
|
tools=[{"type": "file_search", "vector_store_ids": [vs.id]}],
|
||||||
provider_id="faiss",
|
include=["file_search_call.results"],
|
||||||
)
|
|
||||||
vector_db_id = vector_db.identifier
|
|
||||||
source = "https://www.paulgraham.com/greatwork.html"
|
|
||||||
print("rag_tool> Ingesting document:", source)
|
|
||||||
document = RAGDocument(
|
|
||||||
document_id="document_1",
|
|
||||||
content=source,
|
|
||||||
mime_type="text/html",
|
|
||||||
metadata={},
|
|
||||||
)
|
|
||||||
client.tool_runtime.rag_tool.insert(
|
|
||||||
documents=[document],
|
|
||||||
vector_db_id=vector_db_id,
|
|
||||||
chunk_size_in_tokens=100,
|
|
||||||
)
|
|
||||||
agent = Agent(
|
|
||||||
client,
|
|
||||||
model=model_id,
|
|
||||||
instructions="You are a helpful assistant",
|
|
||||||
tools=[
|
|
||||||
{
|
|
||||||
"name": "builtin::rag/knowledge_search",
|
|
||||||
"args": {"vector_db_ids": [vector_db_id]},
|
|
||||||
}
|
|
||||||
],
|
|
||||||
)
|
)
|
||||||
|
|
||||||
prompt = "How do you do great work?"
|
print(resp)
|
||||||
print("prompt>", prompt)
|
|
||||||
|
|
||||||
use_stream = True
|
|
||||||
response = agent.create_turn(
|
|
||||||
messages=[{"role": "user", "content": prompt}],
|
|
||||||
session_id=agent.create_session("rag_session"),
|
|
||||||
stream=use_stream,
|
|
||||||
)
|
|
||||||
|
|
||||||
# Only call `AgentEventLogger().log(response)` for streaming responses.
|
|
||||||
if use_stream:
|
|
||||||
for log in AgentEventLogger().log(response):
|
|
||||||
log.print()
|
|
||||||
else:
|
|
||||||
print(response)
|
|
||||||
|
|
|
||||||
|
|
@ -32,76 +32,9 @@ OLLAMA_URL=http://localhost:11434 \
|
||||||
#### Step 3: Run the demo
|
#### Step 3: Run the demo
|
||||||
Now open up a new terminal and copy the following script into a file named `demo_script.py`.
|
Now open up a new terminal and copy the following script into a file named `demo_script.py`.
|
||||||
|
|
||||||
```python title="demo_script.py"
|
import CodeFromFile from '@site/src/components/CodeFromFile';
|
||||||
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
||||||
# All rights reserved.
|
|
||||||
#
|
|
||||||
# This source code is licensed under the terms described in the LICENSE file in
|
|
||||||
# the root directory of this source tree.
|
|
||||||
|
|
||||||
from llama_stack_client import Agent, AgentEventLogger, RAGDocument, LlamaStackClient
|
<CodeFromFile src="demo_script.py" title="demo_script.py" />
|
||||||
|
|
||||||
vector_db_id = "my_demo_vector_db"
|
|
||||||
client = LlamaStackClient(base_url="http://localhost:8321")
|
|
||||||
|
|
||||||
models = client.models.list()
|
|
||||||
|
|
||||||
# Select the first LLM and first embedding models
|
|
||||||
model_id = next(m for m in models if m.model_type == "llm").identifier
|
|
||||||
embedding_model_id = (
|
|
||||||
em := next(m for m in models if m.model_type == "embedding")
|
|
||||||
).identifier
|
|
||||||
embedding_dimension = em.metadata["embedding_dimension"]
|
|
||||||
|
|
||||||
vector_db = client.vector_dbs.register(
|
|
||||||
vector_db_id=vector_db_id,
|
|
||||||
embedding_model=embedding_model_id,
|
|
||||||
embedding_dimension=embedding_dimension,
|
|
||||||
provider_id="faiss",
|
|
||||||
)
|
|
||||||
vector_db_id = vector_db.identifier
|
|
||||||
source = "https://www.paulgraham.com/greatwork.html"
|
|
||||||
print("rag_tool> Ingesting document:", source)
|
|
||||||
document = RAGDocument(
|
|
||||||
document_id="document_1",
|
|
||||||
content=source,
|
|
||||||
mime_type="text/html",
|
|
||||||
metadata={},
|
|
||||||
)
|
|
||||||
client.tool_runtime.rag_tool.insert(
|
|
||||||
documents=[document],
|
|
||||||
vector_db_id=vector_db_id,
|
|
||||||
chunk_size_in_tokens=100,
|
|
||||||
)
|
|
||||||
agent = Agent(
|
|
||||||
client,
|
|
||||||
model=model_id,
|
|
||||||
instructions="You are a helpful assistant",
|
|
||||||
tools=[
|
|
||||||
{
|
|
||||||
"name": "builtin::rag/knowledge_search",
|
|
||||||
"args": {"vector_db_ids": [vector_db_id]},
|
|
||||||
}
|
|
||||||
],
|
|
||||||
)
|
|
||||||
|
|
||||||
prompt = "How do you do great work?"
|
|
||||||
print("prompt>", prompt)
|
|
||||||
|
|
||||||
use_stream = True
|
|
||||||
response = agent.create_turn(
|
|
||||||
messages=[{"role": "user", "content": prompt}],
|
|
||||||
session_id=agent.create_session("rag_session"),
|
|
||||||
stream=use_stream,
|
|
||||||
)
|
|
||||||
|
|
||||||
# Only call `AgentEventLogger().log(response)` for streaming responses.
|
|
||||||
if use_stream:
|
|
||||||
for log in AgentEventLogger().log(response):
|
|
||||||
log.print()
|
|
||||||
else:
|
|
||||||
print(response)
|
|
||||||
```
|
|
||||||
We will use `uv` to run the script
|
We will use `uv` to run the script
|
||||||
```
|
```
|
||||||
uv run --with llama-stack-client,fire,requests demo_script.py
|
uv run --with llama-stack-client,fire,requests demo_script.py
|
||||||
|
|
|
||||||
|
|
@ -4,8 +4,8 @@
|
||||||
"private": true,
|
"private": true,
|
||||||
"scripts": {
|
"scripts": {
|
||||||
"docusaurus": "docusaurus",
|
"docusaurus": "docusaurus",
|
||||||
"start": "docusaurus start",
|
"start": "npm run sync-files && docusaurus start",
|
||||||
"build": "docusaurus build",
|
"build": "npm run sync-files && docusaurus build",
|
||||||
"swizzle": "docusaurus swizzle",
|
"swizzle": "docusaurus swizzle",
|
||||||
"deploy": "docusaurus deploy",
|
"deploy": "docusaurus deploy",
|
||||||
"clear": "docusaurus clear",
|
"clear": "docusaurus clear",
|
||||||
|
|
@ -15,7 +15,8 @@
|
||||||
"gen-api-docs": "docusaurus gen-api-docs",
|
"gen-api-docs": "docusaurus gen-api-docs",
|
||||||
"clean-api-docs": "docusaurus clean-api-docs",
|
"clean-api-docs": "docusaurus clean-api-docs",
|
||||||
"gen-api-docs:version": "docusaurus gen-api-docs:version",
|
"gen-api-docs:version": "docusaurus gen-api-docs:version",
|
||||||
"clean-api-docs:version": "docusaurus clean-api-docs:version"
|
"clean-api-docs:version": "docusaurus clean-api-docs:version",
|
||||||
|
"sync-files": "node scripts/sync-files.js"
|
||||||
},
|
},
|
||||||
"dependencies": {
|
"dependencies": {
|
||||||
"@docusaurus/core": "3.8.1",
|
"@docusaurus/core": "3.8.1",
|
||||||
|
|
|
||||||
93
docs/scripts/sync-files.js
Executable file
93
docs/scripts/sync-files.js
Executable file
|
|
@ -0,0 +1,93 @@
|
||||||
|
#!/usr/bin/env node
|
||||||
|
|
||||||
|
const fs = require('fs');
|
||||||
|
const path = require('path');
|
||||||
|
|
||||||
|
// Repository root is always one level up from docs
|
||||||
|
const repoRoot = path.join(__dirname, '..', '..');
|
||||||
|
|
||||||
|
// Get all requested files from the usage tracking file
|
||||||
|
function getRequestedFiles() {
|
||||||
|
const usageFile = path.join(__dirname, '..', 'static', 'imported-files', 'usage.json');
|
||||||
|
if (!fs.existsSync(usageFile)) {
|
||||||
|
return [];
|
||||||
|
}
|
||||||
|
|
||||||
|
try {
|
||||||
|
const usage = JSON.parse(fs.readFileSync(usageFile, 'utf8'));
|
||||||
|
return usage.files || [];
|
||||||
|
} catch (error) {
|
||||||
|
console.warn('Could not read usage file:', error.message);
|
||||||
|
return [];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Track file usage
|
||||||
|
function trackFileUsage(filePath) {
|
||||||
|
const usageFile = path.join(__dirname, '..', 'static', 'imported-files', 'usage.json');
|
||||||
|
const usageDir = path.dirname(usageFile);
|
||||||
|
|
||||||
|
// Ensure directory exists
|
||||||
|
if (!fs.existsSync(usageDir)) {
|
||||||
|
fs.mkdirSync(usageDir, { recursive: true });
|
||||||
|
}
|
||||||
|
|
||||||
|
let usage = { files: [] };
|
||||||
|
if (fs.existsSync(usageFile)) {
|
||||||
|
try {
|
||||||
|
usage = JSON.parse(fs.readFileSync(usageFile, 'utf8'));
|
||||||
|
} catch (error) {
|
||||||
|
console.warn('Could not read existing usage file, creating new one');
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!usage.files.includes(filePath)) {
|
||||||
|
usage.files.push(filePath);
|
||||||
|
fs.writeFileSync(usageFile, JSON.stringify(usage, null, 2));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Sync a file from repo root to static directory
|
||||||
|
function syncFile(filePath) {
|
||||||
|
const sourcePath = path.join(repoRoot, filePath);
|
||||||
|
const destPath = path.join(__dirname, '..', 'static', 'imported-files', filePath);
|
||||||
|
const destDir = path.dirname(destPath);
|
||||||
|
|
||||||
|
// Ensure destination directory exists
|
||||||
|
if (!fs.existsSync(destDir)) {
|
||||||
|
fs.mkdirSync(destDir, { recursive: true });
|
||||||
|
}
|
||||||
|
|
||||||
|
try {
|
||||||
|
if (fs.existsSync(sourcePath)) {
|
||||||
|
const content = fs.readFileSync(sourcePath, 'utf8');
|
||||||
|
fs.writeFileSync(destPath, content);
|
||||||
|
console.log(`✅ Synced ${filePath}`);
|
||||||
|
trackFileUsage(filePath);
|
||||||
|
return true;
|
||||||
|
} else {
|
||||||
|
console.warn(`⚠️ Source file not found: ${sourcePath}`);
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
} catch (error) {
|
||||||
|
console.error(`❌ Error syncing ${filePath}:`, error.message);
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Main execution
|
||||||
|
console.log(`📁 Repository root: ${path.resolve(repoRoot)}`);
|
||||||
|
|
||||||
|
// Get files that are being requested by the documentation
|
||||||
|
const requestedFiles = getRequestedFiles();
|
||||||
|
console.log(`📄 Syncing ${requestedFiles.length} requested files...`);
|
||||||
|
|
||||||
|
if (requestedFiles.length === 0) {
|
||||||
|
console.log('ℹ️ No files requested yet. Files will be synced when first referenced in documentation.');
|
||||||
|
} else {
|
||||||
|
requestedFiles.forEach(filePath => {
|
||||||
|
syncFile(filePath);
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
console.log('✅ File sync complete!');
|
||||||
119
docs/src/components/CodeFromFile.jsx
Normal file
119
docs/src/components/CodeFromFile.jsx
Normal file
|
|
@ -0,0 +1,119 @@
|
||||||
|
import React, { useState, useEffect } from 'react';
|
||||||
|
import CodeBlock from '@theme/CodeBlock';
|
||||||
|
|
||||||
|
export default function CodeFromFile({
|
||||||
|
src,
|
||||||
|
language = 'python',
|
||||||
|
title,
|
||||||
|
startLine,
|
||||||
|
endLine,
|
||||||
|
highlightLines
|
||||||
|
}) {
|
||||||
|
const [content, setContent] = useState('');
|
||||||
|
const [error, setError] = useState(null);
|
||||||
|
|
||||||
|
useEffect(() => {
|
||||||
|
async function loadFile() {
|
||||||
|
try {
|
||||||
|
// Register this file for syncing (build-time only)
|
||||||
|
if (typeof window === 'undefined') {
|
||||||
|
// This runs during build - register the file
|
||||||
|
const fs = require('fs');
|
||||||
|
const path = require('path');
|
||||||
|
|
||||||
|
const usageFile = path.join(process.cwd(), 'static', 'imported-files', 'usage.json');
|
||||||
|
const usageDir = path.dirname(usageFile);
|
||||||
|
|
||||||
|
if (!fs.existsSync(usageDir)) {
|
||||||
|
fs.mkdirSync(usageDir, { recursive: true });
|
||||||
|
}
|
||||||
|
|
||||||
|
let usage = { files: [] };
|
||||||
|
if (fs.existsSync(usageFile)) {
|
||||||
|
try {
|
||||||
|
usage = JSON.parse(fs.readFileSync(usageFile, 'utf8'));
|
||||||
|
} catch (error) {
|
||||||
|
console.warn('Could not read existing usage file');
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!usage.files.includes(src)) {
|
||||||
|
usage.files.push(src);
|
||||||
|
fs.writeFileSync(usageFile, JSON.stringify(usage, null, 2));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Load file from static/imported-files directory
|
||||||
|
const response = await fetch(`/imported-files/${src}`);
|
||||||
|
if (!response.ok) {
|
||||||
|
throw new Error(`Failed to fetch: ${response.status}`);
|
||||||
|
}
|
||||||
|
let text = await response.text();
|
||||||
|
|
||||||
|
// Handle line range if specified
|
||||||
|
if (startLine || endLine) {
|
||||||
|
const lines = text.split('\n');
|
||||||
|
const start = startLine ? Math.max(0, startLine - 1) : 0;
|
||||||
|
const end = endLine ? Math.min(lines.length, endLine) : lines.length;
|
||||||
|
text = lines.slice(start, end).join('\n');
|
||||||
|
}
|
||||||
|
|
||||||
|
setContent(text);
|
||||||
|
} catch (err) {
|
||||||
|
console.error('Failed to load file:', err);
|
||||||
|
setError(`Failed to load ${src}: ${err.message}`);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
loadFile();
|
||||||
|
}, [src, startLine, endLine]);
|
||||||
|
|
||||||
|
if (error) {
|
||||||
|
return <div style={{ color: 'red', padding: '1rem', border: '1px solid red', borderRadius: '4px' }}>
|
||||||
|
Error: {error}
|
||||||
|
</div>;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!content) {
|
||||||
|
return <div>Loading {src}...</div>;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Auto-detect language from file extension if not provided
|
||||||
|
const detectedLanguage = language || getLanguageFromExtension(src);
|
||||||
|
|
||||||
|
return (
|
||||||
|
<CodeBlock
|
||||||
|
language={detectedLanguage}
|
||||||
|
title={title || src}
|
||||||
|
metastring={highlightLines ? `{${highlightLines}}` : undefined}
|
||||||
|
>
|
||||||
|
{content}
|
||||||
|
</CodeBlock>
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
function getLanguageFromExtension(filename) {
|
||||||
|
const ext = filename.split('.').pop();
|
||||||
|
const languageMap = {
|
||||||
|
'py': 'python',
|
||||||
|
'js': 'javascript',
|
||||||
|
'jsx': 'jsx',
|
||||||
|
'ts': 'typescript',
|
||||||
|
'tsx': 'tsx',
|
||||||
|
'md': 'markdown',
|
||||||
|
'sh': 'bash',
|
||||||
|
'yaml': 'yaml',
|
||||||
|
'yml': 'yaml',
|
||||||
|
'json': 'json',
|
||||||
|
'css': 'css',
|
||||||
|
'html': 'html',
|
||||||
|
'cpp': 'cpp',
|
||||||
|
'c': 'c',
|
||||||
|
'java': 'java',
|
||||||
|
'go': 'go',
|
||||||
|
'rs': 'rust',
|
||||||
|
'php': 'php',
|
||||||
|
'rb': 'ruby',
|
||||||
|
};
|
||||||
|
return languageMap[ext] || 'text';
|
||||||
|
}
|
||||||
207
docs/static/imported-files/README.md
vendored
Normal file
207
docs/static/imported-files/README.md
vendored
Normal file
|
|
@ -0,0 +1,207 @@
|
||||||
|
# Llama Stack
|
||||||
|
|
||||||
|
[](https://pypi.org/project/llama_stack/)
|
||||||
|
[](https://pypi.org/project/llama-stack/)
|
||||||
|
[](https://github.com/meta-llama/llama-stack/blob/main/LICENSE)
|
||||||
|
[](https://discord.gg/llama-stack)
|
||||||
|
[](https://github.com/meta-llama/llama-stack/actions/workflows/unit-tests.yml?query=branch%3Amain)
|
||||||
|
[](https://github.com/meta-llama/llama-stack/actions/workflows/integration-tests.yml?query=branch%3Amain)
|
||||||
|
|
||||||
|
[**Quick Start**](https://llamastack.github.io/docs/getting_started/quickstart) | [**Documentation**](https://llamastack.github.io/docs) | [**Colab Notebook**](./docs/getting_started.ipynb) | [**Discord**](https://discord.gg/llama-stack)
|
||||||
|
|
||||||
|
|
||||||
|
### ✨🎉 Llama 4 Support 🎉✨
|
||||||
|
We released [Version 0.2.0](https://github.com/meta-llama/llama-stack/releases/tag/v0.2.0) with support for the Llama 4 herd of models released by Meta.
|
||||||
|
|
||||||
|
<details>
|
||||||
|
|
||||||
|
<summary>👋 Click here to see how to run Llama 4 models on Llama Stack </summary>
|
||||||
|
|
||||||
|
\
|
||||||
|
*Note you need 8xH100 GPU-host to run these models*
|
||||||
|
|
||||||
|
```bash
|
||||||
|
pip install -U llama_stack
|
||||||
|
|
||||||
|
MODEL="Llama-4-Scout-17B-16E-Instruct"
|
||||||
|
# get meta url from llama.com
|
||||||
|
huggingface-cli download meta-llama/$MODEL --local-dir ~/.llama/$MODEL
|
||||||
|
|
||||||
|
# start a llama stack server
|
||||||
|
INFERENCE_MODEL=meta-llama/$MODEL llama stack build --run --template meta-reference-gpu
|
||||||
|
|
||||||
|
# install client to interact with the server
|
||||||
|
pip install llama-stack-client
|
||||||
|
```
|
||||||
|
### CLI
|
||||||
|
```bash
|
||||||
|
# Run a chat completion
|
||||||
|
MODEL="Llama-4-Scout-17B-16E-Instruct"
|
||||||
|
|
||||||
|
llama-stack-client --endpoint http://localhost:8321 \
|
||||||
|
inference chat-completion \
|
||||||
|
--model-id meta-llama/$MODEL \
|
||||||
|
--message "write a haiku for meta's llama 4 models"
|
||||||
|
|
||||||
|
OpenAIChatCompletion(
|
||||||
|
...
|
||||||
|
choices=[
|
||||||
|
OpenAIChatCompletionChoice(
|
||||||
|
finish_reason='stop',
|
||||||
|
index=0,
|
||||||
|
message=OpenAIChatCompletionChoiceMessageOpenAIAssistantMessageParam(
|
||||||
|
role='assistant',
|
||||||
|
content='...**Silent minds awaken,** \n**Whispers of billions of words,** \n**Reasoning breaks the night.** \n\n— \n*This haiku blends the essence of LLaMA 4\'s capabilities with nature-inspired metaphor, evoking its vast training data and transformative potential.*',
|
||||||
|
...
|
||||||
|
),
|
||||||
|
...
|
||||||
|
)
|
||||||
|
],
|
||||||
|
...
|
||||||
|
)
|
||||||
|
```
|
||||||
|
### Python SDK
|
||||||
|
```python
|
||||||
|
from llama_stack_client import LlamaStackClient
|
||||||
|
|
||||||
|
client = LlamaStackClient(base_url=f"http://localhost:8321")
|
||||||
|
|
||||||
|
model_id = "meta-llama/Llama-4-Scout-17B-16E-Instruct"
|
||||||
|
prompt = "Write a haiku about coding"
|
||||||
|
|
||||||
|
print(f"User> {prompt}")
|
||||||
|
response = client.chat.completions.create(
|
||||||
|
model=model_id,
|
||||||
|
messages=[
|
||||||
|
{"role": "system", "content": "You are a helpful assistant."},
|
||||||
|
{"role": "user", "content": prompt},
|
||||||
|
],
|
||||||
|
)
|
||||||
|
print(f"Assistant> {response.choices[0].message.content}")
|
||||||
|
```
|
||||||
|
As more providers start supporting Llama 4, you can use them in Llama Stack as well. We are adding to the list. Stay tuned!
|
||||||
|
|
||||||
|
|
||||||
|
</details>
|
||||||
|
|
||||||
|
### 🚀 One-Line Installer 🚀
|
||||||
|
|
||||||
|
To try Llama Stack locally, run:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
curl -LsSf https://github.com/meta-llama/llama-stack/raw/main/scripts/install.sh | bash
|
||||||
|
```
|
||||||
|
|
||||||
|
### Overview
|
||||||
|
|
||||||
|
Llama Stack standardizes the core building blocks that simplify AI application development. It codifies best practices across the Llama ecosystem. More specifically, it provides
|
||||||
|
|
||||||
|
- **Unified API layer** for Inference, RAG, Agents, Tools, Safety, Evals, and Telemetry.
|
||||||
|
- **Plugin architecture** to support the rich ecosystem of different API implementations in various environments, including local development, on-premises, cloud, and mobile.
|
||||||
|
- **Prepackaged verified distributions** which offer a one-stop solution for developers to get started quickly and reliably in any environment.
|
||||||
|
- **Multiple developer interfaces** like CLI and SDKs for Python, Typescript, iOS, and Android.
|
||||||
|
- **Standalone applications** as examples for how to build production-grade AI applications with Llama Stack.
|
||||||
|
|
||||||
|
<div style="text-align: center;">
|
||||||
|
<img
|
||||||
|
src="https://github.com/user-attachments/assets/33d9576d-95ea-468d-95e2-8fa233205a50"
|
||||||
|
width="480"
|
||||||
|
title="Llama Stack"
|
||||||
|
alt="Llama Stack"
|
||||||
|
/>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
### Llama Stack Benefits
|
||||||
|
- **Flexible Options**: Developers can choose their preferred infrastructure without changing APIs and enjoy flexible deployment choices.
|
||||||
|
- **Consistent Experience**: With its unified APIs, Llama Stack makes it easier to build, test, and deploy AI applications with consistent application behavior.
|
||||||
|
- **Robust Ecosystem**: Llama Stack is already integrated with distribution partners (cloud providers, hardware vendors, and AI-focused companies) that offer tailored infrastructure, software, and services for deploying Llama models.
|
||||||
|
|
||||||
|
By reducing friction and complexity, Llama Stack empowers developers to focus on what they do best: building transformative generative AI applications.
|
||||||
|
|
||||||
|
### API Providers
|
||||||
|
Here is a list of the various API providers and available distributions that can help developers get started easily with Llama Stack.
|
||||||
|
Please checkout for [full list](https://llamastack.github.io/docs/providers)
|
||||||
|
|
||||||
|
| API Provider Builder | Environments | Agents | Inference | VectorIO | Safety | Telemetry | Post Training | Eval | DatasetIO |
|
||||||
|
|:--------------------:|:------------:|:------:|:---------:|:--------:|:------:|:---------:|:-------------:|:----:|:--------:|
|
||||||
|
| Meta Reference | Single Node | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ |
|
||||||
|
| SambaNova | Hosted | | ✅ | | ✅ | | | | |
|
||||||
|
| Cerebras | Hosted | | ✅ | | | | | | |
|
||||||
|
| Fireworks | Hosted | ✅ | ✅ | ✅ | | | | | |
|
||||||
|
| AWS Bedrock | Hosted | | ✅ | | ✅ | | | | |
|
||||||
|
| Together | Hosted | ✅ | ✅ | | ✅ | | | | |
|
||||||
|
| Groq | Hosted | | ✅ | | | | | | |
|
||||||
|
| Ollama | Single Node | | ✅ | | | | | | |
|
||||||
|
| TGI | Hosted/Single Node | | ✅ | | | | | | |
|
||||||
|
| NVIDIA NIM | Hosted/Single Node | | ✅ | | ✅ | | | | |
|
||||||
|
| ChromaDB | Hosted/Single Node | | | ✅ | | | | | |
|
||||||
|
| Milvus | Hosted/Single Node | | | ✅ | | | | | |
|
||||||
|
| Qdrant | Hosted/Single Node | | | ✅ | | | | | |
|
||||||
|
| Weaviate | Hosted/Single Node | | | ✅ | | | | | |
|
||||||
|
| SQLite-vec | Single Node | | | ✅ | | | | | |
|
||||||
|
| PG Vector | Single Node | | | ✅ | | | | | |
|
||||||
|
| PyTorch ExecuTorch | On-device iOS | ✅ | ✅ | | | | | | |
|
||||||
|
| vLLM | Single Node | | ✅ | | | | | | |
|
||||||
|
| OpenAI | Hosted | | ✅ | | | | | | |
|
||||||
|
| Anthropic | Hosted | | ✅ | | | | | | |
|
||||||
|
| Gemini | Hosted | | ✅ | | | | | | |
|
||||||
|
| WatsonX | Hosted | | ✅ | | | | | | |
|
||||||
|
| HuggingFace | Single Node | | | | | | ✅ | | ✅ |
|
||||||
|
| TorchTune | Single Node | | | | | | ✅ | | |
|
||||||
|
| NVIDIA NEMO | Hosted | | ✅ | ✅ | | | ✅ | ✅ | ✅ |
|
||||||
|
| NVIDIA | Hosted | | | | | | ✅ | ✅ | ✅ |
|
||||||
|
|
||||||
|
> **Note**: Additional providers are available through external packages. See [External Providers](https://llamastack.github.io/docs/providers/external) documentation.
|
||||||
|
|
||||||
|
### Distributions
|
||||||
|
|
||||||
|
A Llama Stack Distribution (or "distro") is a pre-configured bundle of provider implementations for each API component. Distributions make it easy to get started with a specific deployment scenario - you can begin with a local development setup (eg. ollama) and seamlessly transition to production (eg. Fireworks) without changing your application code.
|
||||||
|
Here are some of the distributions we support:
|
||||||
|
|
||||||
|
| **Distribution** | **Llama Stack Docker** | Start This Distribution |
|
||||||
|
|:---------------------------------------------:|:-------------------------------------------------------------------------------------------------------------------------------------------------------------:|:------------------------------------------------------------------------------------------------------------------------:|
|
||||||
|
| Starter Distribution | [llamastack/distribution-starter](https://hub.docker.com/repository/docker/llamastack/distribution-starter/general) | [Guide](https://llamastack.github.io/latest/distributions/self_hosted_distro/starter.html) |
|
||||||
|
| Meta Reference | [llamastack/distribution-meta-reference-gpu](https://hub.docker.com/repository/docker/llamastack/distribution-meta-reference-gpu/general) | [Guide](https://llamastack.github.io/latest/distributions/self_hosted_distro/meta-reference-gpu.html) |
|
||||||
|
| PostgreSQL | [llamastack/distribution-postgres-demo](https://hub.docker.com/repository/docker/llamastack/distribution-postgres-demo/general) | |
|
||||||
|
|
||||||
|
### Documentation
|
||||||
|
|
||||||
|
Please checkout our [Documentation](https://llamastack.github.io/latest/index.html) page for more details.
|
||||||
|
|
||||||
|
* CLI references
|
||||||
|
* [llama (server-side) CLI Reference](https://llamastack.github.io/latest/references/llama_cli_reference/index.html): Guide for using the `llama` CLI to work with Llama models (download, study prompts), and building/starting a Llama Stack distribution.
|
||||||
|
* [llama (client-side) CLI Reference](https://llamastack.github.io/latest/references/llama_stack_client_cli_reference.html): Guide for using the `llama-stack-client` CLI, which allows you to query information about the distribution.
|
||||||
|
* Getting Started
|
||||||
|
* [Quick guide to start a Llama Stack server](https://llamastack.github.io/latest/getting_started/index.html).
|
||||||
|
* [Jupyter notebook](./docs/getting_started.ipynb) to walk-through how to use simple text and vision inference llama_stack_client APIs
|
||||||
|
* The complete Llama Stack lesson [Colab notebook](https://colab.research.google.com/drive/1dtVmxotBsI4cGZQNsJRYPrLiDeT0Wnwt) of the new [Llama 3.2 course on Deeplearning.ai](https://learn.deeplearning.ai/courses/introducing-multimodal-llama-3-2/lesson/8/llama-stack).
|
||||||
|
* A [Zero-to-Hero Guide](https://github.com/meta-llama/llama-stack/tree/main/docs/zero_to_hero_guide) that guide you through all the key components of llama stack with code samples.
|
||||||
|
* [Contributing](CONTRIBUTING.md)
|
||||||
|
* [Adding a new API Provider](https://llamastack.github.io/latest/contributing/new_api_provider.html) to walk-through how to add a new API provider.
|
||||||
|
|
||||||
|
### Llama Stack Client SDKs
|
||||||
|
|
||||||
|
| **Language** | **Client SDK** | **Package** |
|
||||||
|
| :----: | :----: | :----: |
|
||||||
|
| Python | [llama-stack-client-python](https://github.com/meta-llama/llama-stack-client-python) | [](https://pypi.org/project/llama_stack_client/)
|
||||||
|
| Swift | [llama-stack-client-swift](https://github.com/meta-llama/llama-stack-client-swift) | [](https://swiftpackageindex.com/meta-llama/llama-stack-client-swift)
|
||||||
|
| Typescript | [llama-stack-client-typescript](https://github.com/meta-llama/llama-stack-client-typescript) | [](https://npmjs.org/package/llama-stack-client)
|
||||||
|
| Kotlin | [llama-stack-client-kotlin](https://github.com/meta-llama/llama-stack-client-kotlin) | [](https://central.sonatype.com/artifact/com.llama.llamastack/llama-stack-client-kotlin)
|
||||||
|
|
||||||
|
Check out our client SDKs for connecting to a Llama Stack server in your preferred language, you can choose from [python](https://github.com/meta-llama/llama-stack-client-python), [typescript](https://github.com/meta-llama/llama-stack-client-typescript), [swift](https://github.com/meta-llama/llama-stack-client-swift), and [kotlin](https://github.com/meta-llama/llama-stack-client-kotlin) programming languages to quickly build your applications.
|
||||||
|
|
||||||
|
You can find more example scripts with client SDKs to talk with the Llama Stack server in our [llama-stack-apps](https://github.com/meta-llama/llama-stack-apps/tree/main/examples) repo.
|
||||||
|
|
||||||
|
|
||||||
|
## 🌟 GitHub Star History
|
||||||
|
## Star History
|
||||||
|
|
||||||
|
[](https://www.star-history.com/#meta-llama/llama-stack&Date)
|
||||||
|
|
||||||
|
## ✨ Contributors
|
||||||
|
|
||||||
|
Thanks to all of our amazing contributors!
|
||||||
|
|
||||||
|
<a href="https://github.com/meta-llama/llama-stack/graphs/contributors">
|
||||||
|
<img src="https://contrib.rocks/image?repo=meta-llama/llama-stack" />
|
||||||
|
</a>
|
||||||
6
docs/static/imported-files/usage.json
vendored
Normal file
6
docs/static/imported-files/usage.json
vendored
Normal file
|
|
@ -0,0 +1,6 @@
|
||||||
|
{
|
||||||
|
"files": [
|
||||||
|
"docs/getting_started/demo_script.py",
|
||||||
|
"README.md"
|
||||||
|
]
|
||||||
|
}
|
||||||
Loading…
Add table
Add a link
Reference in a new issue