mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-12-12 12:06:04 +00:00
removing static files and updating script
Signed-off-by: Francisco Javier Arceo <farceo@redhat.com>
This commit is contained in:
parent
255cc90296
commit
5b41e0d65c
7 changed files with 58 additions and 243 deletions
1
.gitignore
vendored
1
.gitignore
vendored
|
|
@ -31,3 +31,4 @@ CLAUDE.md
|
|||
.claude/
|
||||
docs/.docusaurus/
|
||||
docs/node_modules/
|
||||
docs/static/imported-files/
|
||||
|
|
|
|||
|
|
@ -34,7 +34,7 @@ Now open up a new terminal and copy the following script into a file named `demo
|
|||
|
||||
import CodeFromFile from '@site/src/components/CodeFromFile';
|
||||
|
||||
<CodeFromFile src="demo_script.py" title="demo_script.py" />
|
||||
<CodeFromFile src="docs/docs/getting_started/demo_script.py" title="demo_script.py" />
|
||||
We will use `uv` to run the script
|
||||
```
|
||||
uv run --with llama-stack-client,fire,requests demo_script.py
|
||||
|
|
|
|||
|
|
@ -222,6 +222,7 @@ const config: Config = {
|
|||
} satisfies Preset.ThemeConfig,
|
||||
|
||||
plugins: [
|
||||
require.resolve('./plugins/file-sync-plugin'),
|
||||
[
|
||||
"docusaurus-plugin-openapi-docs",
|
||||
{
|
||||
|
|
|
|||
|
|
@ -47,6 +47,57 @@ function trackFileUsage(filePath) {
|
|||
}
|
||||
}
|
||||
|
||||
// Filter content based on file type and options
|
||||
function filterContent(content, filePath) {
|
||||
let lines = content.split('\n');
|
||||
|
||||
// Skip copyright header for Python files
|
||||
if (filePath.endsWith('.py')) {
|
||||
// Read the license header file
|
||||
const licenseHeaderPath = path.join(repoRoot, 'docs', 'license_header.txt');
|
||||
if (fs.existsSync(licenseHeaderPath)) {
|
||||
try {
|
||||
const licenseText = fs.readFileSync(licenseHeaderPath, 'utf8');
|
||||
const licenseLines = licenseText.trim().split('\n');
|
||||
|
||||
// Check if file starts with the license header (accounting for # comments)
|
||||
if (lines.length >= licenseLines.length) {
|
||||
let matches = true;
|
||||
for (let i = 0; i < licenseLines.length; i++) {
|
||||
const codeLine = lines[i]?.replace(/^#\s*/, '').trim();
|
||||
const licenseLine = licenseLines[i]?.trim();
|
||||
if (codeLine !== licenseLine) {
|
||||
matches = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (matches) {
|
||||
// Skip the license header and any trailing empty lines
|
||||
let skipTo = licenseLines.length;
|
||||
while (skipTo < lines.length && lines[skipTo].trim() === '') {
|
||||
skipTo++;
|
||||
}
|
||||
lines = lines.slice(skipTo);
|
||||
}
|
||||
}
|
||||
} catch (error) {
|
||||
console.warn(`Could not read license header, skipping filtering for ${filePath}`);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Trim empty lines from start and end
|
||||
while (lines.length > 0 && lines[0].trim() === '') {
|
||||
lines.shift();
|
||||
}
|
||||
while (lines.length > 0 && lines[lines.length - 1].trim() === '') {
|
||||
lines.pop();
|
||||
}
|
||||
|
||||
return lines.join('\n');
|
||||
}
|
||||
|
||||
// Sync a file from repo root to static directory
|
||||
function syncFile(filePath) {
|
||||
const sourcePath = path.join(repoRoot, filePath);
|
||||
|
|
@ -61,7 +112,8 @@ function syncFile(filePath) {
|
|||
try {
|
||||
if (fs.existsSync(sourcePath)) {
|
||||
const content = fs.readFileSync(sourcePath, 'utf8');
|
||||
fs.writeFileSync(destPath, content);
|
||||
const filteredContent = filterContent(content, filePath);
|
||||
fs.writeFileSync(destPath, filteredContent);
|
||||
console.log(`✅ Synced ${filePath}`);
|
||||
trackFileUsage(filePath);
|
||||
return true;
|
||||
|
|
|
|||
|
|
@ -15,33 +15,7 @@ export default function CodeFromFile({
|
|||
useEffect(() => {
|
||||
async function loadFile() {
|
||||
try {
|
||||
// Register this file for syncing (build-time only)
|
||||
if (typeof window === 'undefined') {
|
||||
// This runs during build - register the file
|
||||
const fs = require('fs');
|
||||
const path = require('path');
|
||||
|
||||
const usageFile = path.join(process.cwd(), 'static', 'imported-files', 'usage.json');
|
||||
const usageDir = path.dirname(usageFile);
|
||||
|
||||
if (!fs.existsSync(usageDir)) {
|
||||
fs.mkdirSync(usageDir, { recursive: true });
|
||||
}
|
||||
|
||||
let usage = { files: [] };
|
||||
if (fs.existsSync(usageFile)) {
|
||||
try {
|
||||
usage = JSON.parse(fs.readFileSync(usageFile, 'utf8'));
|
||||
} catch (error) {
|
||||
console.warn('Could not read existing usage file');
|
||||
}
|
||||
}
|
||||
|
||||
if (!usage.files.includes(src)) {
|
||||
usage.files.push(src);
|
||||
fs.writeFileSync(usageFile, JSON.stringify(usage, null, 2));
|
||||
}
|
||||
}
|
||||
// File registration is now handled by the file-sync-plugin during build
|
||||
|
||||
// Load file from static/imported-files directory
|
||||
const response = await fetch(`/imported-files/${src}`);
|
||||
|
|
@ -50,7 +24,7 @@ export default function CodeFromFile({
|
|||
}
|
||||
let text = await response.text();
|
||||
|
||||
// Handle line range if specified
|
||||
// Handle line range if specified (filtering is done at build time)
|
||||
if (startLine || endLine) {
|
||||
const lines = text.split('\n');
|
||||
const start = startLine ? Math.max(0, startLine - 1) : 0;
|
||||
|
|
|
|||
207
docs/static/imported-files/README.md
vendored
207
docs/static/imported-files/README.md
vendored
|
|
@ -1,207 +0,0 @@
|
|||
# Llama Stack
|
||||
|
||||
[](https://pypi.org/project/llama_stack/)
|
||||
[](https://pypi.org/project/llama-stack/)
|
||||
[](https://github.com/meta-llama/llama-stack/blob/main/LICENSE)
|
||||
[](https://discord.gg/llama-stack)
|
||||
[](https://github.com/meta-llama/llama-stack/actions/workflows/unit-tests.yml?query=branch%3Amain)
|
||||
[](https://github.com/meta-llama/llama-stack/actions/workflows/integration-tests.yml?query=branch%3Amain)
|
||||
|
||||
[**Quick Start**](https://llamastack.github.io/docs/getting_started/quickstart) | [**Documentation**](https://llamastack.github.io/docs) | [**Colab Notebook**](./docs/getting_started.ipynb) | [**Discord**](https://discord.gg/llama-stack)
|
||||
|
||||
|
||||
### ✨🎉 Llama 4 Support 🎉✨
|
||||
We released [Version 0.2.0](https://github.com/meta-llama/llama-stack/releases/tag/v0.2.0) with support for the Llama 4 herd of models released by Meta.
|
||||
|
||||
<details>
|
||||
|
||||
<summary>👋 Click here to see how to run Llama 4 models on Llama Stack </summary>
|
||||
|
||||
\
|
||||
*Note you need 8xH100 GPU-host to run these models*
|
||||
|
||||
```bash
|
||||
pip install -U llama_stack
|
||||
|
||||
MODEL="Llama-4-Scout-17B-16E-Instruct"
|
||||
# get meta url from llama.com
|
||||
huggingface-cli download meta-llama/$MODEL --local-dir ~/.llama/$MODEL
|
||||
|
||||
# start a llama stack server
|
||||
INFERENCE_MODEL=meta-llama/$MODEL llama stack build --run --template meta-reference-gpu
|
||||
|
||||
# install client to interact with the server
|
||||
pip install llama-stack-client
|
||||
```
|
||||
### CLI
|
||||
```bash
|
||||
# Run a chat completion
|
||||
MODEL="Llama-4-Scout-17B-16E-Instruct"
|
||||
|
||||
llama-stack-client --endpoint http://localhost:8321 \
|
||||
inference chat-completion \
|
||||
--model-id meta-llama/$MODEL \
|
||||
--message "write a haiku for meta's llama 4 models"
|
||||
|
||||
OpenAIChatCompletion(
|
||||
...
|
||||
choices=[
|
||||
OpenAIChatCompletionChoice(
|
||||
finish_reason='stop',
|
||||
index=0,
|
||||
message=OpenAIChatCompletionChoiceMessageOpenAIAssistantMessageParam(
|
||||
role='assistant',
|
||||
content='...**Silent minds awaken,** \n**Whispers of billions of words,** \n**Reasoning breaks the night.** \n\n— \n*This haiku blends the essence of LLaMA 4\'s capabilities with nature-inspired metaphor, evoking its vast training data and transformative potential.*',
|
||||
...
|
||||
),
|
||||
...
|
||||
)
|
||||
],
|
||||
...
|
||||
)
|
||||
```
|
||||
### Python SDK
|
||||
```python
|
||||
from llama_stack_client import LlamaStackClient
|
||||
|
||||
client = LlamaStackClient(base_url=f"http://localhost:8321")
|
||||
|
||||
model_id = "meta-llama/Llama-4-Scout-17B-16E-Instruct"
|
||||
prompt = "Write a haiku about coding"
|
||||
|
||||
print(f"User> {prompt}")
|
||||
response = client.chat.completions.create(
|
||||
model=model_id,
|
||||
messages=[
|
||||
{"role": "system", "content": "You are a helpful assistant."},
|
||||
{"role": "user", "content": prompt},
|
||||
],
|
||||
)
|
||||
print(f"Assistant> {response.choices[0].message.content}")
|
||||
```
|
||||
As more providers start supporting Llama 4, you can use them in Llama Stack as well. We are adding to the list. Stay tuned!
|
||||
|
||||
|
||||
</details>
|
||||
|
||||
### 🚀 One-Line Installer 🚀
|
||||
|
||||
To try Llama Stack locally, run:
|
||||
|
||||
```bash
|
||||
curl -LsSf https://github.com/meta-llama/llama-stack/raw/main/scripts/install.sh | bash
|
||||
```
|
||||
|
||||
### Overview
|
||||
|
||||
Llama Stack standardizes the core building blocks that simplify AI application development. It codifies best practices across the Llama ecosystem. More specifically, it provides
|
||||
|
||||
- **Unified API layer** for Inference, RAG, Agents, Tools, Safety, Evals, and Telemetry.
|
||||
- **Plugin architecture** to support the rich ecosystem of different API implementations in various environments, including local development, on-premises, cloud, and mobile.
|
||||
- **Prepackaged verified distributions** which offer a one-stop solution for developers to get started quickly and reliably in any environment.
|
||||
- **Multiple developer interfaces** like CLI and SDKs for Python, Typescript, iOS, and Android.
|
||||
- **Standalone applications** as examples for how to build production-grade AI applications with Llama Stack.
|
||||
|
||||
<div style="text-align: center;">
|
||||
<img
|
||||
src="https://github.com/user-attachments/assets/33d9576d-95ea-468d-95e2-8fa233205a50"
|
||||
width="480"
|
||||
title="Llama Stack"
|
||||
alt="Llama Stack"
|
||||
/>
|
||||
</div>
|
||||
|
||||
### Llama Stack Benefits
|
||||
- **Flexible Options**: Developers can choose their preferred infrastructure without changing APIs and enjoy flexible deployment choices.
|
||||
- **Consistent Experience**: With its unified APIs, Llama Stack makes it easier to build, test, and deploy AI applications with consistent application behavior.
|
||||
- **Robust Ecosystem**: Llama Stack is already integrated with distribution partners (cloud providers, hardware vendors, and AI-focused companies) that offer tailored infrastructure, software, and services for deploying Llama models.
|
||||
|
||||
By reducing friction and complexity, Llama Stack empowers developers to focus on what they do best: building transformative generative AI applications.
|
||||
|
||||
### API Providers
|
||||
Here is a list of the various API providers and available distributions that can help developers get started easily with Llama Stack.
|
||||
Please checkout for [full list](https://llamastack.github.io/docs/providers)
|
||||
|
||||
| API Provider Builder | Environments | Agents | Inference | VectorIO | Safety | Telemetry | Post Training | Eval | DatasetIO |
|
||||
|:--------------------:|:------------:|:------:|:---------:|:--------:|:------:|:---------:|:-------------:|:----:|:--------:|
|
||||
| Meta Reference | Single Node | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ |
|
||||
| SambaNova | Hosted | | ✅ | | ✅ | | | | |
|
||||
| Cerebras | Hosted | | ✅ | | | | | | |
|
||||
| Fireworks | Hosted | ✅ | ✅ | ✅ | | | | | |
|
||||
| AWS Bedrock | Hosted | | ✅ | | ✅ | | | | |
|
||||
| Together | Hosted | ✅ | ✅ | | ✅ | | | | |
|
||||
| Groq | Hosted | | ✅ | | | | | | |
|
||||
| Ollama | Single Node | | ✅ | | | | | | |
|
||||
| TGI | Hosted/Single Node | | ✅ | | | | | | |
|
||||
| NVIDIA NIM | Hosted/Single Node | | ✅ | | ✅ | | | | |
|
||||
| ChromaDB | Hosted/Single Node | | | ✅ | | | | | |
|
||||
| Milvus | Hosted/Single Node | | | ✅ | | | | | |
|
||||
| Qdrant | Hosted/Single Node | | | ✅ | | | | | |
|
||||
| Weaviate | Hosted/Single Node | | | ✅ | | | | | |
|
||||
| SQLite-vec | Single Node | | | ✅ | | | | | |
|
||||
| PG Vector | Single Node | | | ✅ | | | | | |
|
||||
| PyTorch ExecuTorch | On-device iOS | ✅ | ✅ | | | | | | |
|
||||
| vLLM | Single Node | | ✅ | | | | | | |
|
||||
| OpenAI | Hosted | | ✅ | | | | | | |
|
||||
| Anthropic | Hosted | | ✅ | | | | | | |
|
||||
| Gemini | Hosted | | ✅ | | | | | | |
|
||||
| WatsonX | Hosted | | ✅ | | | | | | |
|
||||
| HuggingFace | Single Node | | | | | | ✅ | | ✅ |
|
||||
| TorchTune | Single Node | | | | | | ✅ | | |
|
||||
| NVIDIA NEMO | Hosted | | ✅ | ✅ | | | ✅ | ✅ | ✅ |
|
||||
| NVIDIA | Hosted | | | | | | ✅ | ✅ | ✅ |
|
||||
|
||||
> **Note**: Additional providers are available through external packages. See [External Providers](https://llamastack.github.io/docs/providers/external) documentation.
|
||||
|
||||
### Distributions
|
||||
|
||||
A Llama Stack Distribution (or "distro") is a pre-configured bundle of provider implementations for each API component. Distributions make it easy to get started with a specific deployment scenario - you can begin with a local development setup (eg. ollama) and seamlessly transition to production (eg. Fireworks) without changing your application code.
|
||||
Here are some of the distributions we support:
|
||||
|
||||
| **Distribution** | **Llama Stack Docker** | Start This Distribution |
|
||||
|:---------------------------------------------:|:-------------------------------------------------------------------------------------------------------------------------------------------------------------:|:------------------------------------------------------------------------------------------------------------------------:|
|
||||
| Starter Distribution | [llamastack/distribution-starter](https://hub.docker.com/repository/docker/llamastack/distribution-starter/general) | [Guide](https://llamastack.github.io/latest/distributions/self_hosted_distro/starter.html) |
|
||||
| Meta Reference | [llamastack/distribution-meta-reference-gpu](https://hub.docker.com/repository/docker/llamastack/distribution-meta-reference-gpu/general) | [Guide](https://llamastack.github.io/latest/distributions/self_hosted_distro/meta-reference-gpu.html) |
|
||||
| PostgreSQL | [llamastack/distribution-postgres-demo](https://hub.docker.com/repository/docker/llamastack/distribution-postgres-demo/general) | |
|
||||
|
||||
### Documentation
|
||||
|
||||
Please checkout our [Documentation](https://llamastack.github.io/latest/index.html) page for more details.
|
||||
|
||||
* CLI references
|
||||
* [llama (server-side) CLI Reference](https://llamastack.github.io/latest/references/llama_cli_reference/index.html): Guide for using the `llama` CLI to work with Llama models (download, study prompts), and building/starting a Llama Stack distribution.
|
||||
* [llama (client-side) CLI Reference](https://llamastack.github.io/latest/references/llama_stack_client_cli_reference.html): Guide for using the `llama-stack-client` CLI, which allows you to query information about the distribution.
|
||||
* Getting Started
|
||||
* [Quick guide to start a Llama Stack server](https://llamastack.github.io/latest/getting_started/index.html).
|
||||
* [Jupyter notebook](./docs/getting_started.ipynb) to walk-through how to use simple text and vision inference llama_stack_client APIs
|
||||
* The complete Llama Stack lesson [Colab notebook](https://colab.research.google.com/drive/1dtVmxotBsI4cGZQNsJRYPrLiDeT0Wnwt) of the new [Llama 3.2 course on Deeplearning.ai](https://learn.deeplearning.ai/courses/introducing-multimodal-llama-3-2/lesson/8/llama-stack).
|
||||
* A [Zero-to-Hero Guide](https://github.com/meta-llama/llama-stack/tree/main/docs/zero_to_hero_guide) that guide you through all the key components of llama stack with code samples.
|
||||
* [Contributing](CONTRIBUTING.md)
|
||||
* [Adding a new API Provider](https://llamastack.github.io/latest/contributing/new_api_provider.html) to walk-through how to add a new API provider.
|
||||
|
||||
### Llama Stack Client SDKs
|
||||
|
||||
| **Language** | **Client SDK** | **Package** |
|
||||
| :----: | :----: | :----: |
|
||||
| Python | [llama-stack-client-python](https://github.com/meta-llama/llama-stack-client-python) | [](https://pypi.org/project/llama_stack_client/)
|
||||
| Swift | [llama-stack-client-swift](https://github.com/meta-llama/llama-stack-client-swift) | [](https://swiftpackageindex.com/meta-llama/llama-stack-client-swift)
|
||||
| Typescript | [llama-stack-client-typescript](https://github.com/meta-llama/llama-stack-client-typescript) | [](https://npmjs.org/package/llama-stack-client)
|
||||
| Kotlin | [llama-stack-client-kotlin](https://github.com/meta-llama/llama-stack-client-kotlin) | [](https://central.sonatype.com/artifact/com.llama.llamastack/llama-stack-client-kotlin)
|
||||
|
||||
Check out our client SDKs for connecting to a Llama Stack server in your preferred language, you can choose from [python](https://github.com/meta-llama/llama-stack-client-python), [typescript](https://github.com/meta-llama/llama-stack-client-typescript), [swift](https://github.com/meta-llama/llama-stack-client-swift), and [kotlin](https://github.com/meta-llama/llama-stack-client-kotlin) programming languages to quickly build your applications.
|
||||
|
||||
You can find more example scripts with client SDKs to talk with the Llama Stack server in our [llama-stack-apps](https://github.com/meta-llama/llama-stack-apps/tree/main/examples) repo.
|
||||
|
||||
|
||||
## 🌟 GitHub Star History
|
||||
## Star History
|
||||
|
||||
[](https://www.star-history.com/#meta-llama/llama-stack&Date)
|
||||
|
||||
## ✨ Contributors
|
||||
|
||||
Thanks to all of our amazing contributors!
|
||||
|
||||
<a href="https://github.com/meta-llama/llama-stack/graphs/contributors">
|
||||
<img src="https://contrib.rocks/image?repo=meta-llama/llama-stack" />
|
||||
</a>
|
||||
6
docs/static/imported-files/usage.json
vendored
6
docs/static/imported-files/usage.json
vendored
|
|
@ -1,6 +0,0 @@
|
|||
{
|
||||
"files": [
|
||||
"docs/getting_started/demo_script.py",
|
||||
"README.md"
|
||||
]
|
||||
}
|
||||
Loading…
Add table
Add a link
Reference in a new issue