RHAIENG-565: purge the midstream repo content to only host the build artifacts, so only the redhat-distribution should remain
|
|
@ -1,6 +0,0 @@
|
||||||
[run]
|
|
||||||
omit =
|
|
||||||
*/tests/*
|
|
||||||
*/llama_stack/providers/*
|
|
||||||
*/llama_stack/templates/*
|
|
||||||
.venv/*
|
|
||||||
5
.github/CODEOWNERS
vendored
|
|
@ -1,5 +0,0 @@
|
||||||
# Each line is a file pattern followed by one or more owners.
|
|
||||||
|
|
||||||
# These owners will be the default owners for everything in
|
|
||||||
# the repo. Unless a later match takes precedence,
|
|
||||||
* @ashwinb @yanxi0830 @hardikjshah @raghotham @ehhuang @terrytangyuan @leseb @bbrowning @reluctantfuturist
|
|
||||||
77
.github/ISSUE_TEMPLATE/bug.yml
vendored
|
|
@ -1,77 +0,0 @@
|
||||||
name: 🐛 Bug Report
|
|
||||||
description: Create a report to help us reproduce and fix the bug
|
|
||||||
labels: ["bug"]
|
|
||||||
body:
|
|
||||||
- type: markdown
|
|
||||||
attributes:
|
|
||||||
value: >
|
|
||||||
#### Before submitting a bug, please make sure the issue hasn't been already addressed by searching through [the
|
|
||||||
existing and past issues](https://github.com/meta-llama/llama-stack/issues).
|
|
||||||
|
|
||||||
- type: textarea
|
|
||||||
id: system-info
|
|
||||||
attributes:
|
|
||||||
label: System Info
|
|
||||||
description: |
|
|
||||||
Please share your system info with us. You can use the following command to capture your environment information
|
|
||||||
python -m "torch.utils.collect_env"
|
|
||||||
|
|
||||||
placeholder: |
|
|
||||||
PyTorch version, CUDA version, GPU type, #num of GPUs...
|
|
||||||
validations:
|
|
||||||
required: true
|
|
||||||
|
|
||||||
- type: checkboxes
|
|
||||||
id: information-scripts-examples
|
|
||||||
attributes:
|
|
||||||
label: Information
|
|
||||||
description: 'The problem arises when using:'
|
|
||||||
options:
|
|
||||||
- label: "The official example scripts"
|
|
||||||
- label: "My own modified scripts"
|
|
||||||
|
|
||||||
- type: textarea
|
|
||||||
id: bug-description
|
|
||||||
attributes:
|
|
||||||
label: 🐛 Describe the bug
|
|
||||||
description: |
|
|
||||||
Please provide a clear and concise description of what the bug is.
|
|
||||||
|
|
||||||
Please also paste or describe the results you observe instead of the expected results.
|
|
||||||
placeholder: |
|
|
||||||
A clear and concise description of what the bug is.
|
|
||||||
|
|
||||||
```llama stack
|
|
||||||
# Command that you used for running the examples
|
|
||||||
```
|
|
||||||
Description of the results
|
|
||||||
validations:
|
|
||||||
required: true
|
|
||||||
|
|
||||||
- type: textarea
|
|
||||||
attributes:
|
|
||||||
label: Error logs
|
|
||||||
description: |
|
|
||||||
If you observe an error, please paste the error message including the **full** traceback of the exception. It may be relevant to wrap error messages in ```` ```triple quotes blocks``` ````.
|
|
||||||
|
|
||||||
placeholder: |
|
|
||||||
```
|
|
||||||
The error message you got, with the full traceback.
|
|
||||||
```
|
|
||||||
|
|
||||||
validations:
|
|
||||||
required: true
|
|
||||||
|
|
||||||
|
|
||||||
- type: textarea
|
|
||||||
id: expected-behavior
|
|
||||||
validations:
|
|
||||||
required: true
|
|
||||||
attributes:
|
|
||||||
label: Expected behavior
|
|
||||||
description: "A clear and concise description of what you would expect to happen."
|
|
||||||
|
|
||||||
- type: markdown
|
|
||||||
attributes:
|
|
||||||
value: >
|
|
||||||
Thanks for contributing 🎉!
|
|
||||||
12
.github/ISSUE_TEMPLATE/config.yml
vendored
|
|
@ -1,12 +0,0 @@
|
||||||
blank_issues_enabled: false
|
|
||||||
|
|
||||||
contact_links:
|
|
||||||
- name: Have you read the docs?
|
|
||||||
url: https://llama-stack.readthedocs.io/en/latest/index.html
|
|
||||||
about: Much help can be found in the docs
|
|
||||||
- name: Start a discussion
|
|
||||||
url: https://github.com/meta-llama/llama-stack/discussions/new
|
|
||||||
about: Start a discussion on a topic
|
|
||||||
- name: Chat on Discord
|
|
||||||
url: https://discord.gg/llama-stack
|
|
||||||
about: Maybe chatting with the community can help
|
|
||||||
28
.github/ISSUE_TEMPLATE/feature-request.yml
vendored
|
|
@ -1,28 +0,0 @@
|
||||||
name: 🚀 Feature request
|
|
||||||
description: Request a new llama-stack feature
|
|
||||||
labels: ["enhancement"]
|
|
||||||
body:
|
|
||||||
- type: textarea
|
|
||||||
id: feature-pitch
|
|
||||||
attributes:
|
|
||||||
label: 🚀 Describe the new functionality needed
|
|
||||||
description: >
|
|
||||||
A clear and concise description of _what_ needs to be built.
|
|
||||||
validations:
|
|
||||||
required: true
|
|
||||||
|
|
||||||
- type: textarea
|
|
||||||
id: feature-motivation
|
|
||||||
attributes:
|
|
||||||
label: 💡 Why is this needed? What if we don't build it?
|
|
||||||
description: >
|
|
||||||
A clear and concise description of _why_ this functionality is needed.
|
|
||||||
validations:
|
|
||||||
required: true
|
|
||||||
|
|
||||||
- type: textarea
|
|
||||||
id: other-thoughts
|
|
||||||
attributes:
|
|
||||||
label: Other thoughts
|
|
||||||
description: >
|
|
||||||
Any thoughts about how this may result in complexity in the codebase, or other trade-offs.
|
|
||||||
8
.github/PULL_REQUEST_TEMPLATE.md
vendored
|
|
@ -1,8 +0,0 @@
|
||||||
# What does this PR do?
|
|
||||||
<!-- Provide a short summary of what this PR does and why. Link to relevant issues if applicable. -->
|
|
||||||
|
|
||||||
<!-- If resolving an issue, uncomment and update the line below -->
|
|
||||||
<!-- Closes #[issue-number] -->
|
|
||||||
|
|
||||||
## Test Plan
|
|
||||||
<!-- Describe the tests you ran to verify your changes with result summaries. *Provide clear instructions so the plan can be easily re-executed.* -->
|
|
||||||
2
.github/TRIAGERS.md
vendored
|
|
@ -1,2 +0,0 @@
|
||||||
# This file documents Triage members in the Llama Stack community
|
|
||||||
@bbrowning @booxter @franciscojavierarceo @leseb
|
|
||||||
9
.github/actions/setup-ollama/action.yml
vendored
|
|
@ -1,9 +0,0 @@
|
||||||
name: Setup Ollama
|
|
||||||
description: Start Ollama
|
|
||||||
runs:
|
|
||||||
using: "composite"
|
|
||||||
steps:
|
|
||||||
- name: Start Ollama
|
|
||||||
shell: bash
|
|
||||||
run: |
|
|
||||||
docker run -d --name ollama -p 11434:11434 docker.io/leseb/ollama-with-models
|
|
||||||
27
.github/actions/setup-runner/action.yml
vendored
|
|
@ -1,27 +0,0 @@
|
||||||
name: Setup runner
|
|
||||||
description: Prepare a runner for the tests (install uv, python, project dependencies, etc.)
|
|
||||||
inputs:
|
|
||||||
python-version:
|
|
||||||
description: The Python version to use
|
|
||||||
required: false
|
|
||||||
default: "3.10"
|
|
||||||
runs:
|
|
||||||
using: "composite"
|
|
||||||
steps:
|
|
||||||
- name: Install uv
|
|
||||||
uses: astral-sh/setup-uv@6b9c6063abd6010835644d4c2e1bef4cf5cd0fca # v6.0.1
|
|
||||||
with:
|
|
||||||
python-version: ${{ inputs.python-version }}
|
|
||||||
activate-environment: true
|
|
||||||
version: 0.7.6
|
|
||||||
|
|
||||||
- name: Install dependencies
|
|
||||||
shell: bash
|
|
||||||
run: |
|
|
||||||
uv sync --all-groups
|
|
||||||
uv pip install ollama faiss-cpu
|
|
||||||
# always test against the latest version of the client
|
|
||||||
# TODO: this is not necessarily a good idea. we need to test against both published and latest
|
|
||||||
# to find out backwards compatibility issues.
|
|
||||||
uv pip install git+https://github.com/meta-llama/llama-stack-client-python.git@main
|
|
||||||
uv pip install -e .
|
|
||||||
23
.github/dependabot.yml
vendored
|
|
@ -1,23 +0,0 @@
|
||||||
# GitHub Dependabot configuration
|
|
||||||
version: 2
|
|
||||||
updates:
|
|
||||||
# Enable version updates for GitHub Actions
|
|
||||||
- package-ecosystem: "github-actions"
|
|
||||||
directory: "/" # Will use the default workflow location of `.github/workflows`
|
|
||||||
schedule:
|
|
||||||
interval: "weekly"
|
|
||||||
day: "saturday"
|
|
||||||
commit-message:
|
|
||||||
prefix: chore(github-deps)
|
|
||||||
- package-ecosystem: "uv"
|
|
||||||
directory: "/"
|
|
||||||
schedule:
|
|
||||||
interval: "weekly"
|
|
||||||
day: "saturday"
|
|
||||||
# ignore all non-security updates: https://docs.github.com/en/code-security/dependabot/dependabot-version-updates/configuration-options-for-the-dependabot.yml-file#open-pull-requests-limit
|
|
||||||
open-pull-requests-limit: 0
|
|
||||||
labels:
|
|
||||||
- type/dependencies
|
|
||||||
- python
|
|
||||||
commit-message:
|
|
||||||
prefix: chore(python-deps)
|
|
||||||
29
.github/workflows/changelog.yml
vendored
|
|
@ -1,29 +0,0 @@
|
||||||
name: Update Changelog
|
|
||||||
|
|
||||||
on:
|
|
||||||
release:
|
|
||||||
types: [published, unpublished, created, edited, deleted, released]
|
|
||||||
|
|
||||||
permissions:
|
|
||||||
contents: read
|
|
||||||
|
|
||||||
jobs:
|
|
||||||
generate_changelog:
|
|
||||||
name: Generate changelog
|
|
||||||
permissions:
|
|
||||||
contents: write # for peter-evans/create-pull-request to create branch
|
|
||||||
pull-requests: write # for peter-evans/create-pull-request to create a PR
|
|
||||||
runs-on: ubuntu-latest
|
|
||||||
steps:
|
|
||||||
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
|
|
||||||
with:
|
|
||||||
ref: main
|
|
||||||
fetch-depth: 0
|
|
||||||
- run: |
|
|
||||||
python ./scripts/gen-changelog.py
|
|
||||||
- uses: peter-evans/create-pull-request@271a8d0340265f705b14b6d32b9829c1cb33d45e # v7.0.8
|
|
||||||
with:
|
|
||||||
title: 'docs: update CHANGELOG.md for ${{ github.ref_name }}'
|
|
||||||
commit-message: 'docs: update CHANGELOG.md for ${{ github.ref_name }}'
|
|
||||||
branch: create-pull-request/changelog
|
|
||||||
signoff: true
|
|
||||||
355
.github/workflows/gha_workflow_llama_stack_tests.yml
vendored
|
|
@ -1,355 +0,0 @@
|
||||||
name: "Run Llama-stack Tests"
|
|
||||||
|
|
||||||
on:
|
|
||||||
#### Temporarily disable PR runs until tests run as intended within mainline.
|
|
||||||
#TODO Add this back.
|
|
||||||
#pull_request_target:
|
|
||||||
# types: ["opened"]
|
|
||||||
# branches:
|
|
||||||
# - 'main'
|
|
||||||
# paths:
|
|
||||||
# - 'llama_stack/**/*.py'
|
|
||||||
# - 'tests/**/*.py'
|
|
||||||
|
|
||||||
workflow_dispatch:
|
|
||||||
inputs:
|
|
||||||
runner:
|
|
||||||
description: 'GHA Runner Scale Set label to run workflow on.'
|
|
||||||
required: true
|
|
||||||
default: "llama-stack-gha-runner-gpu"
|
|
||||||
|
|
||||||
checkout_reference:
|
|
||||||
description: "The branch, tag, or SHA to checkout"
|
|
||||||
required: true
|
|
||||||
default: "main"
|
|
||||||
|
|
||||||
debug:
|
|
||||||
description: 'Run debugging steps?'
|
|
||||||
required: false
|
|
||||||
default: "true"
|
|
||||||
|
|
||||||
sleep_time:
|
|
||||||
description: '[DEBUG] sleep time for debugging'
|
|
||||||
required: true
|
|
||||||
default: "0"
|
|
||||||
|
|
||||||
provider_id:
|
|
||||||
description: 'ID of your provider'
|
|
||||||
required: true
|
|
||||||
default: "meta_reference"
|
|
||||||
|
|
||||||
model_id:
|
|
||||||
description: 'Shorthand name for target model ID (llama_3b or llama_8b)'
|
|
||||||
required: true
|
|
||||||
default: "llama_3b"
|
|
||||||
|
|
||||||
model_override_3b:
|
|
||||||
description: 'Specify shorthand model for <llama_3b> '
|
|
||||||
required: false
|
|
||||||
default: "Llama3.2-3B-Instruct"
|
|
||||||
|
|
||||||
model_override_8b:
|
|
||||||
description: 'Specify shorthand model for <llama_8b> '
|
|
||||||
required: false
|
|
||||||
default: "Llama3.1-8B-Instruct"
|
|
||||||
|
|
||||||
env:
|
|
||||||
# ID used for each test's provider config
|
|
||||||
PROVIDER_ID: "${{ inputs.provider_id || 'meta_reference' }}"
|
|
||||||
|
|
||||||
# Path to model checkpoints within EFS volume
|
|
||||||
MODEL_CHECKPOINT_DIR: "/data/llama"
|
|
||||||
|
|
||||||
# Path to directory to run tests from
|
|
||||||
TESTS_PATH: "${{ github.workspace }}/llama_stack/providers/tests"
|
|
||||||
|
|
||||||
# Keep track of a list of model IDs that are valid to use within pytest fixture marks
|
|
||||||
AVAILABLE_MODEL_IDs: "llama_3b llama_8b"
|
|
||||||
|
|
||||||
# Shorthand name for model ID, used in pytest fixture marks
|
|
||||||
MODEL_ID: "${{ inputs.model_id || 'llama_3b' }}"
|
|
||||||
|
|
||||||
# Override the `llama_3b` / `llama_8b' models, else use the default.
|
|
||||||
LLAMA_3B_OVERRIDE: "${{ inputs.model_override_3b || 'Llama3.2-3B-Instruct' }}"
|
|
||||||
LLAMA_8B_OVERRIDE: "${{ inputs.model_override_8b || 'Llama3.1-8B-Instruct' }}"
|
|
||||||
|
|
||||||
# Defines which directories in TESTS_PATH to exclude from the test loop
|
|
||||||
EXCLUDED_DIRS: "__pycache__"
|
|
||||||
|
|
||||||
# Defines the output xml reports generated after a test is run
|
|
||||||
REPORTS_GEN: ""
|
|
||||||
|
|
||||||
jobs:
|
|
||||||
execute_workflow:
|
|
||||||
name: Execute workload on Self-Hosted GPU k8s runner
|
|
||||||
permissions:
|
|
||||||
pull-requests: write
|
|
||||||
defaults:
|
|
||||||
run:
|
|
||||||
shell: bash
|
|
||||||
runs-on: ${{ inputs.runner != '' && inputs.runner || 'llama-stack-gha-runner-gpu' }}
|
|
||||||
if: always()
|
|
||||||
steps:
|
|
||||||
|
|
||||||
##############################
|
|
||||||
#### INITIAL DEBUG CHECKS ####
|
|
||||||
##############################
|
|
||||||
- name: "[DEBUG] Check content of the EFS mount"
|
|
||||||
id: debug_efs_volume
|
|
||||||
continue-on-error: true
|
|
||||||
if: inputs.debug == 'true'
|
|
||||||
run: |
|
|
||||||
echo "========= Content of the EFS mount ============="
|
|
||||||
ls -la ${{ env.MODEL_CHECKPOINT_DIR }}
|
|
||||||
|
|
||||||
- name: "[DEBUG] Get runner container OS information"
|
|
||||||
id: debug_os_info
|
|
||||||
if: ${{ inputs.debug == 'true' }}
|
|
||||||
run: |
|
|
||||||
cat /etc/os-release
|
|
||||||
|
|
||||||
- name: "[DEBUG] Print environment variables"
|
|
||||||
id: debug_env_vars
|
|
||||||
if: ${{ inputs.debug == 'true' }}
|
|
||||||
run: |
|
|
||||||
echo "PROVIDER_ID = ${PROVIDER_ID}"
|
|
||||||
echo "MODEL_CHECKPOINT_DIR = ${MODEL_CHECKPOINT_DIR}"
|
|
||||||
echo "AVAILABLE_MODEL_IDs = ${AVAILABLE_MODEL_IDs}"
|
|
||||||
echo "MODEL_ID = ${MODEL_ID}"
|
|
||||||
echo "LLAMA_3B_OVERRIDE = ${LLAMA_3B_OVERRIDE}"
|
|
||||||
echo "LLAMA_8B_OVERRIDE = ${LLAMA_8B_OVERRIDE}"
|
|
||||||
echo "EXCLUDED_DIRS = ${EXCLUDED_DIRS}"
|
|
||||||
echo "REPORTS_GEN = ${REPORTS_GEN}"
|
|
||||||
|
|
||||||
############################
|
|
||||||
#### MODEL INPUT CHECKS ####
|
|
||||||
############################
|
|
||||||
|
|
||||||
- name: "Check if env.model_id is valid"
|
|
||||||
id: check_model_id
|
|
||||||
run: |
|
|
||||||
if [[ " ${AVAILABLE_MODEL_IDs[@]} " =~ " ${MODEL_ID} " ]]; then
|
|
||||||
echo "Model ID '${MODEL_ID}' is valid."
|
|
||||||
else
|
|
||||||
echo "Model ID '${MODEL_ID}' is invalid. Terminating workflow."
|
|
||||||
exit 1
|
|
||||||
fi
|
|
||||||
|
|
||||||
#######################
|
|
||||||
#### CODE CHECKOUT ####
|
|
||||||
#######################
|
|
||||||
- name: "Checkout 'meta-llama/llama-stack' repository"
|
|
||||||
id: checkout_repo
|
|
||||||
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
|
|
||||||
with:
|
|
||||||
ref: ${{ inputs.branch }}
|
|
||||||
|
|
||||||
- name: "[DEBUG] Content of the repository after checkout"
|
|
||||||
id: debug_content_after_checkout
|
|
||||||
if: ${{ inputs.debug == 'true' }}
|
|
||||||
run: |
|
|
||||||
ls -la ${GITHUB_WORKSPACE}
|
|
||||||
|
|
||||||
##########################################################
|
|
||||||
#### OPTIONAL SLEEP DEBUG ####
|
|
||||||
# #
|
|
||||||
# Use to "exec" into the test k8s POD and run tests #
|
|
||||||
# manually to identify what dependencies are being used. #
|
|
||||||
# #
|
|
||||||
##########################################################
|
|
||||||
- name: "[DEBUG] sleep"
|
|
||||||
id: debug_sleep
|
|
||||||
if: ${{ inputs.debug == 'true' && inputs.sleep_time != '' }}
|
|
||||||
run: |
|
|
||||||
sleep ${{ inputs.sleep_time }}
|
|
||||||
|
|
||||||
############################
|
|
||||||
#### UPDATE SYSTEM PATH ####
|
|
||||||
############################
|
|
||||||
- name: "Update path: execute"
|
|
||||||
id: path_update_exec
|
|
||||||
run: |
|
|
||||||
# .local/bin is needed for certain libraries installed below to be recognized
|
|
||||||
# when calling their executable to install sub-dependencies
|
|
||||||
mkdir -p ${HOME}/.local/bin
|
|
||||||
echo "${HOME}/.local/bin" >> "$GITHUB_PATH"
|
|
||||||
|
|
||||||
#####################################
|
|
||||||
#### UPDATE CHECKPOINT DIRECTORY ####
|
|
||||||
#####################################
|
|
||||||
- name: "Update checkpoint directory"
|
|
||||||
id: checkpoint_update
|
|
||||||
run: |
|
|
||||||
echo "Checkpoint directory: ${MODEL_CHECKPOINT_DIR}/$LLAMA_3B_OVERRIDE"
|
|
||||||
if [ "${MODEL_ID}" = "llama_3b" ] && [ -d "${MODEL_CHECKPOINT_DIR}/${LLAMA_3B_OVERRIDE}" ]; then
|
|
||||||
echo "MODEL_CHECKPOINT_DIR=${MODEL_CHECKPOINT_DIR}/${LLAMA_3B_OVERRIDE}" >> "$GITHUB_ENV"
|
|
||||||
elif [ "${MODEL_ID}" = "llama_8b" ] && [ -d "${MODEL_CHECKPOINT_DIR}/${LLAMA_8B_OVERRIDE}" ]; then
|
|
||||||
echo "MODEL_CHECKPOINT_DIR=${MODEL_CHECKPOINT_DIR}/${LLAMA_8B_OVERRIDE}" >> "$GITHUB_ENV"
|
|
||||||
else
|
|
||||||
echo "MODEL_ID & LLAMA_*B_OVERRIDE are not a valid pairing. Terminating workflow."
|
|
||||||
exit 1
|
|
||||||
fi
|
|
||||||
|
|
||||||
- name: "[DEBUG] Checkpoint update check"
|
|
||||||
id: debug_checkpoint_update
|
|
||||||
if: ${{ inputs.debug == 'true' }}
|
|
||||||
run: |
|
|
||||||
echo "MODEL_CHECKPOINT_DIR (after update) = ${MODEL_CHECKPOINT_DIR}"
|
|
||||||
|
|
||||||
##################################
|
|
||||||
#### DEPENDENCY INSTALLATIONS ####
|
|
||||||
##################################
|
|
||||||
- name: "Installing 'apt' required packages"
|
|
||||||
id: install_apt
|
|
||||||
run: |
|
|
||||||
echo "[STEP] Installing 'apt' required packages"
|
|
||||||
sudo apt update -y
|
|
||||||
sudo apt install -y python3 python3-pip npm wget
|
|
||||||
|
|
||||||
- name: "Installing packages with 'curl'"
|
|
||||||
id: install_curl
|
|
||||||
run: |
|
|
||||||
curl -fsSL https://ollama.com/install.sh | sh
|
|
||||||
|
|
||||||
- name: "Installing packages with 'wget'"
|
|
||||||
id: install_wget
|
|
||||||
run: |
|
|
||||||
wget https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh
|
|
||||||
chmod +x Miniconda3-latest-Linux-x86_64.sh
|
|
||||||
./Miniconda3-latest-Linux-x86_64.sh -b install -c pytorch -c nvidia faiss-gpu=1.9.0
|
|
||||||
# Add miniconda3 bin to system path
|
|
||||||
echo "${HOME}/miniconda3/bin" >> "$GITHUB_PATH"
|
|
||||||
|
|
||||||
- name: "Installing packages with 'npm'"
|
|
||||||
id: install_npm_generic
|
|
||||||
run: |
|
|
||||||
sudo npm install -g junit-merge
|
|
||||||
|
|
||||||
- name: "Installing pip dependencies"
|
|
||||||
id: install_pip_generic
|
|
||||||
run: |
|
|
||||||
echo "[STEP] Installing 'llama-stack' models"
|
|
||||||
pip install -U pip setuptools
|
|
||||||
pip install -r requirements.txt
|
|
||||||
pip install -e .
|
|
||||||
pip install -U \
|
|
||||||
torch torchvision \
|
|
||||||
pytest pytest_asyncio \
|
|
||||||
fairscale lm-format-enforcer \
|
|
||||||
zmq chardet pypdf \
|
|
||||||
pandas sentence_transformers together \
|
|
||||||
aiosqlite
|
|
||||||
- name: "Installing packages with conda"
|
|
||||||
id: install_conda_generic
|
|
||||||
run: |
|
|
||||||
conda install -q -c pytorch -c nvidia faiss-gpu=1.9.0
|
|
||||||
|
|
||||||
#############################################################
|
|
||||||
#### TESTING TO BE DONE FOR BOTH PRS AND MANUAL DISPATCH ####
|
|
||||||
#############################################################
|
|
||||||
- name: "Run Tests: Loop"
|
|
||||||
id: run_tests_loop
|
|
||||||
working-directory: "${{ github.workspace }}"
|
|
||||||
run: |
|
|
||||||
pattern=""
|
|
||||||
for dir in llama_stack/providers/tests/*; do
|
|
||||||
if [ -d "$dir" ]; then
|
|
||||||
dir_name=$(basename "$dir")
|
|
||||||
if [[ ! " $EXCLUDED_DIRS " =~ " $dir_name " ]]; then
|
|
||||||
for file in "$dir"/test_*.py; do
|
|
||||||
test_name=$(basename "$file")
|
|
||||||
new_file="result-${dir_name}-${test_name}.xml"
|
|
||||||
if torchrun $(which pytest) -s -v ${TESTS_PATH}/${dir_name}/${test_name} -m "${PROVIDER_ID} and ${MODEL_ID}" \
|
|
||||||
--junitxml="${{ github.workspace }}/${new_file}"; then
|
|
||||||
echo "Ran test: ${test_name}"
|
|
||||||
else
|
|
||||||
echo "Did NOT run test: ${test_name}"
|
|
||||||
fi
|
|
||||||
pattern+="${new_file} "
|
|
||||||
done
|
|
||||||
fi
|
|
||||||
fi
|
|
||||||
done
|
|
||||||
echo "REPORTS_GEN=$pattern" >> "$GITHUB_ENV"
|
|
||||||
|
|
||||||
- name: "Test Summary: Merge"
|
|
||||||
id: test_summary_merge
|
|
||||||
working-directory: "${{ github.workspace }}"
|
|
||||||
run: |
|
|
||||||
echo "Merging the following test result files: ${REPORTS_GEN}"
|
|
||||||
# Defaults to merging them into 'merged-test-results.xml'
|
|
||||||
junit-merge ${{ env.REPORTS_GEN }}
|
|
||||||
|
|
||||||
############################################
|
|
||||||
#### AUTOMATIC TESTING ON PULL REQUESTS ####
|
|
||||||
############################################
|
|
||||||
|
|
||||||
#### Run tests ####
|
|
||||||
|
|
||||||
- name: "PR - Run Tests"
|
|
||||||
id: pr_run_tests
|
|
||||||
working-directory: "${{ github.workspace }}"
|
|
||||||
if: github.event_name == 'pull_request_target'
|
|
||||||
run: |
|
|
||||||
echo "[STEP] Running PyTest tests at 'GITHUB_WORKSPACE' path: ${GITHUB_WORKSPACE} | path: ${{ github.workspace }}"
|
|
||||||
# (Optional) Add more tests here.
|
|
||||||
|
|
||||||
# Merge test results with 'merged-test-results.xml' from above.
|
|
||||||
# junit-merge <new-test-results> merged-test-results.xml
|
|
||||||
|
|
||||||
#### Create test summary ####
|
|
||||||
|
|
||||||
- name: "PR - Test Summary"
|
|
||||||
id: pr_test_summary_create
|
|
||||||
if: github.event_name == 'pull_request_target'
|
|
||||||
uses: test-summary/action@31493c76ec9e7aa675f1585d3ed6f1da69269a86 # v2.4
|
|
||||||
with:
|
|
||||||
paths: "${{ github.workspace }}/merged-test-results.xml"
|
|
||||||
output: test-summary.md
|
|
||||||
|
|
||||||
- name: "PR - Upload Test Summary"
|
|
||||||
id: pr_test_summary_upload
|
|
||||||
if: github.event_name == 'pull_request_target'
|
|
||||||
uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4.6.2
|
|
||||||
with:
|
|
||||||
name: test-summary
|
|
||||||
path: test-summary.md
|
|
||||||
|
|
||||||
#### Update PR request ####
|
|
||||||
|
|
||||||
- name: "PR - Update comment"
|
|
||||||
id: pr_update_comment
|
|
||||||
if: github.event_name == 'pull_request_target'
|
|
||||||
uses: thollander/actions-comment-pull-request@24bffb9b452ba05a4f3f77933840a6a841d1b32b # v3.0.1
|
|
||||||
with:
|
|
||||||
filePath: test-summary.md
|
|
||||||
|
|
||||||
########################
|
|
||||||
#### MANUAL TESTING ####
|
|
||||||
########################
|
|
||||||
|
|
||||||
#### Run tests ####
|
|
||||||
|
|
||||||
- name: "Manual - Run Tests: Prep"
|
|
||||||
id: manual_run_tests
|
|
||||||
working-directory: "${{ github.workspace }}"
|
|
||||||
if: github.event_name == 'workflow_dispatch'
|
|
||||||
run: |
|
|
||||||
echo "[STEP] Running PyTest tests at 'GITHUB_WORKSPACE' path: ${{ github.workspace }}"
|
|
||||||
|
|
||||||
#TODO Use this when collection errors are resolved
|
|
||||||
# pytest -s -v -m "${PROVIDER_ID} and ${MODEL_ID}" --junitxml="${{ github.workspace }}/merged-test-results.xml"
|
|
||||||
|
|
||||||
# (Optional) Add more tests here.
|
|
||||||
|
|
||||||
# Merge test results with 'merged-test-results.xml' from above.
|
|
||||||
# junit-merge <new-test-results> merged-test-results.xml
|
|
||||||
|
|
||||||
#### Create test summary ####
|
|
||||||
|
|
||||||
- name: "Manual - Test Summary"
|
|
||||||
id: manual_test_summary
|
|
||||||
if: always() && github.event_name == 'workflow_dispatch'
|
|
||||||
uses: test-summary/action@31493c76ec9e7aa675f1585d3ed6f1da69269a86 # v2.4
|
|
||||||
with:
|
|
||||||
paths: "${{ github.workspace }}/merged-test-results.xml"
|
|
||||||
26
.github/workflows/install-script-ci.yml
vendored
|
|
@ -1,26 +0,0 @@
|
||||||
name: Installer CI
|
|
||||||
|
|
||||||
on:
|
|
||||||
pull_request:
|
|
||||||
paths:
|
|
||||||
- 'install.sh'
|
|
||||||
push:
|
|
||||||
paths:
|
|
||||||
- 'install.sh'
|
|
||||||
schedule:
|
|
||||||
- cron: '0 2 * * *' # every day at 02:00 UTC
|
|
||||||
|
|
||||||
jobs:
|
|
||||||
lint:
|
|
||||||
runs-on: ubuntu-latest
|
|
||||||
steps:
|
|
||||||
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # 4.2.2
|
|
||||||
- name: Run ShellCheck on install.sh
|
|
||||||
run: shellcheck install.sh
|
|
||||||
smoke-test:
|
|
||||||
needs: lint
|
|
||||||
runs-on: ubuntu-latest
|
|
||||||
steps:
|
|
||||||
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # 4.2.2
|
|
||||||
- name: Run installer end-to-end
|
|
||||||
run: ./install.sh
|
|
||||||
132
.github/workflows/integration-auth-tests.yml
vendored
|
|
@ -1,132 +0,0 @@
|
||||||
name: Integration Auth Tests
|
|
||||||
|
|
||||||
on:
|
|
||||||
push:
|
|
||||||
branches: [ main ]
|
|
||||||
pull_request:
|
|
||||||
branches: [ main ]
|
|
||||||
paths:
|
|
||||||
- 'distributions/**'
|
|
||||||
- 'llama_stack/**'
|
|
||||||
- 'tests/integration/**'
|
|
||||||
- 'uv.lock'
|
|
||||||
- 'pyproject.toml'
|
|
||||||
- 'requirements.txt'
|
|
||||||
- '.github/workflows/integration-auth-tests.yml' # This workflow
|
|
||||||
|
|
||||||
concurrency:
|
|
||||||
group: ${{ github.workflow }}-${{ github.ref }}
|
|
||||||
cancel-in-progress: true
|
|
||||||
|
|
||||||
jobs:
|
|
||||||
test-matrix:
|
|
||||||
runs-on: ubuntu-latest
|
|
||||||
strategy:
|
|
||||||
matrix:
|
|
||||||
auth-provider: [oauth2_token]
|
|
||||||
fail-fast: false # we want to run all tests regardless of failure
|
|
||||||
|
|
||||||
steps:
|
|
||||||
- name: Checkout repository
|
|
||||||
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
|
|
||||||
|
|
||||||
- name: Install dependencies
|
|
||||||
uses: ./.github/actions/setup-runner
|
|
||||||
|
|
||||||
- name: Build Llama Stack
|
|
||||||
run: |
|
|
||||||
llama stack build --template ollama --image-type venv
|
|
||||||
|
|
||||||
- name: Install minikube
|
|
||||||
if: ${{ matrix.auth-provider == 'kubernetes' }}
|
|
||||||
uses: medyagh/setup-minikube@cea33675329b799adccc9526aa5daccc26cd5052 # v0.0.19
|
|
||||||
|
|
||||||
- name: Start minikube
|
|
||||||
if: ${{ matrix.auth-provider == 'oauth2_token' }}
|
|
||||||
run: |
|
|
||||||
minikube start
|
|
||||||
kubectl get pods -A
|
|
||||||
|
|
||||||
- name: Configure Kube Auth
|
|
||||||
if: ${{ matrix.auth-provider == 'oauth2_token' }}
|
|
||||||
run: |
|
|
||||||
kubectl create namespace llama-stack
|
|
||||||
kubectl create serviceaccount llama-stack-auth -n llama-stack
|
|
||||||
kubectl create rolebinding llama-stack-auth-rolebinding --clusterrole=admin --serviceaccount=llama-stack:llama-stack-auth -n llama-stack
|
|
||||||
kubectl create token llama-stack-auth -n llama-stack > llama-stack-auth-token
|
|
||||||
cat <<EOF | kubectl apply -f -
|
|
||||||
apiVersion: rbac.authorization.k8s.io/v1
|
|
||||||
kind: ClusterRole
|
|
||||||
metadata:
|
|
||||||
name: allow-anonymous-openid
|
|
||||||
rules:
|
|
||||||
- nonResourceURLs: ["/openid/v1/jwks"]
|
|
||||||
verbs: ["get"]
|
|
||||||
---
|
|
||||||
apiVersion: rbac.authorization.k8s.io/v1
|
|
||||||
kind: ClusterRoleBinding
|
|
||||||
metadata:
|
|
||||||
name: allow-anonymous-openid
|
|
||||||
roleRef:
|
|
||||||
apiGroup: rbac.authorization.k8s.io
|
|
||||||
kind: ClusterRole
|
|
||||||
name: allow-anonymous-openid
|
|
||||||
subjects:
|
|
||||||
- kind: User
|
|
||||||
name: system:anonymous
|
|
||||||
apiGroup: rbac.authorization.k8s.io
|
|
||||||
EOF
|
|
||||||
|
|
||||||
- name: Set Kubernetes Config
|
|
||||||
if: ${{ matrix.auth-provider == 'oauth2_token' }}
|
|
||||||
run: |
|
|
||||||
echo "KUBERNETES_API_SERVER_URL=$(kubectl get --raw /.well-known/openid-configuration| jq -r .jwks_uri)" >> $GITHUB_ENV
|
|
||||||
echo "KUBERNETES_CA_CERT_PATH=$(kubectl config view --minify -o jsonpath='{.clusters[0].cluster.certificate-authority}')" >> $GITHUB_ENV
|
|
||||||
echo "KUBERNETES_ISSUER=$(kubectl get --raw /.well-known/openid-configuration| jq -r .issuer)" >> $GITHUB_ENV
|
|
||||||
echo "KUBERNETES_AUDIENCE=$(kubectl create token llama-stack-auth -n llama-stack --duration=1h | cut -d. -f2 | base64 -d | jq -r '.aud[0]')" >> $GITHUB_ENV
|
|
||||||
|
|
||||||
- name: Set Kube Auth Config and run server
|
|
||||||
env:
|
|
||||||
INFERENCE_MODEL: "meta-llama/Llama-3.2-3B-Instruct"
|
|
||||||
if: ${{ matrix.auth-provider == 'oauth2_token' }}
|
|
||||||
run: |
|
|
||||||
run_dir=$(mktemp -d)
|
|
||||||
cat <<'EOF' > $run_dir/run.yaml
|
|
||||||
version: '2'
|
|
||||||
image_name: kube
|
|
||||||
apis: []
|
|
||||||
providers: {}
|
|
||||||
server:
|
|
||||||
port: 8321
|
|
||||||
EOF
|
|
||||||
yq eval '.server.auth = {"provider_type": "${{ matrix.auth-provider }}"}' -i $run_dir/run.yaml
|
|
||||||
yq eval '.server.auth.config = {"tls_cafile": "${{ env.KUBERNETES_CA_CERT_PATH }}", "issuer": "${{ env.KUBERNETES_ISSUER }}", "audience": "${{ env.KUBERNETES_AUDIENCE }}"}' -i $run_dir/run.yaml
|
|
||||||
yq eval '.server.auth.config.jwks = {"uri": "${{ env.KUBERNETES_API_SERVER_URL }}"}' -i $run_dir/run.yaml
|
|
||||||
cat $run_dir/run.yaml
|
|
||||||
|
|
||||||
nohup uv run llama stack run $run_dir/run.yaml --image-type venv > server.log 2>&1 &
|
|
||||||
|
|
||||||
- name: Wait for Llama Stack server to be ready
|
|
||||||
run: |
|
|
||||||
echo "Waiting for Llama Stack server..."
|
|
||||||
for i in {1..30}; do
|
|
||||||
if curl -s -L -H "Authorization: Bearer $(cat llama-stack-auth-token)" http://localhost:8321/v1/health | grep -q "OK"; then
|
|
||||||
echo "Llama Stack server is up!"
|
|
||||||
if grep -q "Enabling authentication with provider: ${{ matrix.auth-provider }}" server.log; then
|
|
||||||
echo "Llama Stack server is configured to use ${{ matrix.auth-provider }} auth"
|
|
||||||
exit 0
|
|
||||||
else
|
|
||||||
echo "Llama Stack server is not configured to use ${{ matrix.auth-provider }} auth"
|
|
||||||
cat server.log
|
|
||||||
exit 1
|
|
||||||
fi
|
|
||||||
fi
|
|
||||||
sleep 1
|
|
||||||
done
|
|
||||||
echo "Llama Stack server failed to start"
|
|
||||||
cat server.log
|
|
||||||
exit 1
|
|
||||||
|
|
||||||
- name: Test auth
|
|
||||||
run: |
|
|
||||||
curl -s -L -H "Authorization: Bearer $(cat llama-stack-auth-token)" http://127.0.0.1:8321/v1/providers|jq
|
|
||||||
120
.github/workflows/integration-tests.yml
vendored
|
|
@ -1,120 +0,0 @@
|
||||||
name: Integration Tests
|
|
||||||
|
|
||||||
on:
|
|
||||||
push:
|
|
||||||
branches: [ main ]
|
|
||||||
pull_request:
|
|
||||||
branches: [ main ]
|
|
||||||
paths:
|
|
||||||
- 'llama_stack/**'
|
|
||||||
- 'tests/integration/**'
|
|
||||||
- 'uv.lock'
|
|
||||||
- 'pyproject.toml'
|
|
||||||
- 'requirements.txt'
|
|
||||||
- '.github/workflows/integration-tests.yml' # This workflow
|
|
||||||
|
|
||||||
concurrency:
|
|
||||||
group: ${{ github.workflow }}-${{ github.ref }}
|
|
||||||
cancel-in-progress: true
|
|
||||||
|
|
||||||
jobs:
|
|
||||||
test-matrix:
|
|
||||||
runs-on: ubuntu-latest
|
|
||||||
strategy:
|
|
||||||
matrix:
|
|
||||||
# Listing tests manually since some of them currently fail
|
|
||||||
# TODO: generate matrix list from tests/integration when fixed
|
|
||||||
test-type: [agents, inference, datasets, inspect, scoring, post_training, providers, tool_runtime]
|
|
||||||
client-type: [library, http]
|
|
||||||
python-version: ["3.10", "3.11", "3.12"]
|
|
||||||
fail-fast: false # we want to run all tests regardless of failure
|
|
||||||
|
|
||||||
steps:
|
|
||||||
- name: Checkout repository
|
|
||||||
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
|
|
||||||
|
|
||||||
- name: Install dependencies
|
|
||||||
uses: ./.github/actions/setup-runner
|
|
||||||
with:
|
|
||||||
python-version: ${{ matrix.python-version }}
|
|
||||||
|
|
||||||
- name: Setup ollama
|
|
||||||
uses: ./.github/actions/setup-ollama
|
|
||||||
|
|
||||||
- name: Build Llama Stack
|
|
||||||
run: |
|
|
||||||
uv run llama stack build --template ollama --image-type venv
|
|
||||||
|
|
||||||
- name: Start Llama Stack server in background
|
|
||||||
if: matrix.client-type == 'http'
|
|
||||||
env:
|
|
||||||
INFERENCE_MODEL: "meta-llama/Llama-3.2-3B-Instruct"
|
|
||||||
run: |
|
|
||||||
LLAMA_STACK_LOG_FILE=server.log nohup uv run llama stack run ./llama_stack/templates/ollama/run.yaml --image-type venv --env OLLAMA_URL="http://0.0.0.0:11434" &
|
|
||||||
|
|
||||||
- name: Wait for Llama Stack server to be ready
|
|
||||||
if: matrix.client-type == 'http'
|
|
||||||
run: |
|
|
||||||
echo "Waiting for Llama Stack server..."
|
|
||||||
for i in {1..30}; do
|
|
||||||
if curl -s http://localhost:8321/v1/health | grep -q "OK"; then
|
|
||||||
echo "Llama Stack server is up!"
|
|
||||||
exit 0
|
|
||||||
fi
|
|
||||||
sleep 1
|
|
||||||
done
|
|
||||||
echo "Llama Stack server failed to start"
|
|
||||||
cat server.log
|
|
||||||
exit 1
|
|
||||||
|
|
||||||
- name: Verify Ollama status is OK
|
|
||||||
if: matrix.client-type == 'http'
|
|
||||||
run: |
|
|
||||||
echo "Verifying Ollama status..."
|
|
||||||
ollama_status=$(curl -s -L http://127.0.0.1:8321/v1/providers/ollama|jq --raw-output .health.status)
|
|
||||||
echo "Ollama status: $ollama_status"
|
|
||||||
if [ "$ollama_status" != "OK" ]; then
|
|
||||||
echo "Ollama health check failed"
|
|
||||||
exit 1
|
|
||||||
fi
|
|
||||||
|
|
||||||
- name: Check Storage and Memory Available Before Tests
|
|
||||||
if: ${{ always() }}
|
|
||||||
run: |
|
|
||||||
free -h
|
|
||||||
df -h
|
|
||||||
|
|
||||||
- name: Run Integration Tests
|
|
||||||
env:
|
|
||||||
INFERENCE_MODEL: "meta-llama/Llama-3.2-3B-Instruct"
|
|
||||||
OLLAMA_URL: "http://0.0.0.0:11434"
|
|
||||||
run: |
|
|
||||||
if [ "${{ matrix.client-type }}" == "library" ]; then
|
|
||||||
stack_config="ollama"
|
|
||||||
else
|
|
||||||
stack_config="http://localhost:8321"
|
|
||||||
fi
|
|
||||||
uv run pytest -s -v tests/integration/${{ matrix.test-type }} --stack-config=${stack_config} \
|
|
||||||
-k "not(builtin_tool or safety_with_image or code_interpreter or test_rag)" \
|
|
||||||
--text-model="meta-llama/Llama-3.2-3B-Instruct" \
|
|
||||||
--embedding-model=all-MiniLM-L6-v2
|
|
||||||
|
|
||||||
- name: Check Storage and Memory Available After Tests
|
|
||||||
if: ${{ always() }}
|
|
||||||
run: |
|
|
||||||
free -h
|
|
||||||
df -h
|
|
||||||
|
|
||||||
- name: Write ollama logs to file
|
|
||||||
if: ${{ always() }}
|
|
||||||
run: |
|
|
||||||
sudo docker logs ollama > ollama.log
|
|
||||||
|
|
||||||
- name: Upload all logs to artifacts
|
|
||||||
if: ${{ always() }}
|
|
||||||
uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4.6.2
|
|
||||||
with:
|
|
||||||
name: logs-${{ github.run_id }}-${{ github.run_attempt }}-${{ matrix.client-type }}-${{ matrix.test-type }}-${{ matrix.python-version }}
|
|
||||||
path: |
|
|
||||||
*.log
|
|
||||||
retention-days: 1
|
|
||||||
45
.github/workflows/pre-commit.yml
vendored
|
|
@ -1,45 +0,0 @@
|
||||||
name: Pre-commit
|
|
||||||
|
|
||||||
on:
|
|
||||||
pull_request:
|
|
||||||
push:
|
|
||||||
branches: [main]
|
|
||||||
|
|
||||||
concurrency:
|
|
||||||
group: ${{ github.workflow }}-${{ github.ref }}
|
|
||||||
cancel-in-progress: true
|
|
||||||
|
|
||||||
jobs:
|
|
||||||
pre-commit:
|
|
||||||
runs-on: ubuntu-latest
|
|
||||||
|
|
||||||
steps:
|
|
||||||
- name: Checkout code
|
|
||||||
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
|
|
||||||
|
|
||||||
- name: Set up Python
|
|
||||||
uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065 # v5.6.0
|
|
||||||
with:
|
|
||||||
python-version: '3.11'
|
|
||||||
cache: pip
|
|
||||||
cache-dependency-path: |
|
|
||||||
**/requirements*.txt
|
|
||||||
.pre-commit-config.yaml
|
|
||||||
|
|
||||||
- uses: pre-commit/action@2c7b3805fd2a0fd8c1884dcaebf91fc102a13ecd # v3.0.1
|
|
||||||
env:
|
|
||||||
SKIP: no-commit-to-branch
|
|
||||||
RUFF_OUTPUT_FORMAT: github
|
|
||||||
|
|
||||||
- name: Verify if there are any diff files after pre-commit
|
|
||||||
run: |
|
|
||||||
git diff --exit-code || (echo "There are uncommitted changes, run pre-commit locally and commit again" && exit 1)
|
|
||||||
|
|
||||||
- name: Verify if there are any new files after pre-commit
|
|
||||||
run: |
|
|
||||||
unstaged_files=$(git ls-files --others --exclude-standard)
|
|
||||||
if [ -n "$unstaged_files" ]; then
|
|
||||||
echo "There are uncommitted new files, run pre-commit locally and commit again"
|
|
||||||
echo "$unstaged_files"
|
|
||||||
exit 1
|
|
||||||
fi
|
|
||||||
149
.github/workflows/providers-build.yml
vendored
|
|
@ -1,149 +0,0 @@
|
||||||
name: Test Llama Stack Build
|
|
||||||
|
|
||||||
on:
|
|
||||||
push:
|
|
||||||
branches:
|
|
||||||
- main
|
|
||||||
paths:
|
|
||||||
- 'llama_stack/cli/stack/build.py'
|
|
||||||
- 'llama_stack/cli/stack/_build.py'
|
|
||||||
- 'llama_stack/distribution/build.*'
|
|
||||||
- 'llama_stack/distribution/*.sh'
|
|
||||||
- '.github/workflows/providers-build.yml'
|
|
||||||
- 'llama_stack/templates/**'
|
|
||||||
pull_request:
|
|
||||||
paths:
|
|
||||||
- 'llama_stack/cli/stack/build.py'
|
|
||||||
- 'llama_stack/cli/stack/_build.py'
|
|
||||||
- 'llama_stack/distribution/build.*'
|
|
||||||
- 'llama_stack/distribution/*.sh'
|
|
||||||
- '.github/workflows/providers-build.yml'
|
|
||||||
- 'llama_stack/templates/**'
|
|
||||||
|
|
||||||
concurrency:
|
|
||||||
group: ${{ github.workflow }}-${{ github.ref }}
|
|
||||||
cancel-in-progress: true
|
|
||||||
|
|
||||||
jobs:
|
|
||||||
generate-matrix:
|
|
||||||
runs-on: ubuntu-latest
|
|
||||||
outputs:
|
|
||||||
templates: ${{ steps.set-matrix.outputs.templates }}
|
|
||||||
steps:
|
|
||||||
- name: Checkout repository
|
|
||||||
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
|
|
||||||
|
|
||||||
- name: Generate Template List
|
|
||||||
id: set-matrix
|
|
||||||
run: |
|
|
||||||
templates=$(ls llama_stack/templates/*/*build.yaml | awk -F'/' '{print $(NF-1)}' | jq -R -s -c 'split("\n")[:-1]')
|
|
||||||
echo "templates=$templates" >> "$GITHUB_OUTPUT"
|
|
||||||
|
|
||||||
build:
|
|
||||||
needs: generate-matrix
|
|
||||||
runs-on: ubuntu-latest
|
|
||||||
strategy:
|
|
||||||
matrix:
|
|
||||||
template: ${{ fromJson(needs.generate-matrix.outputs.templates) }}
|
|
||||||
image-type: [venv, container]
|
|
||||||
fail-fast: false # We want to run all jobs even if some fail
|
|
||||||
|
|
||||||
steps:
|
|
||||||
- name: Checkout repository
|
|
||||||
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
|
|
||||||
|
|
||||||
- name: Install dependencies
|
|
||||||
uses: ./.github/actions/setup-runner
|
|
||||||
|
|
||||||
- name: Print build dependencies
|
|
||||||
run: |
|
|
||||||
uv run llama stack build --template ${{ matrix.template }} --image-type ${{ matrix.image-type }} --image-name test --print-deps-only
|
|
||||||
|
|
||||||
- name: Run Llama Stack Build
|
|
||||||
run: |
|
|
||||||
# USE_COPY_NOT_MOUNT is set to true since mounting is not supported by docker buildx, we use COPY instead
|
|
||||||
# LLAMA_STACK_DIR is set to the current directory so we are building from the source
|
|
||||||
USE_COPY_NOT_MOUNT=true LLAMA_STACK_DIR=. uv run llama stack build --template ${{ matrix.template }} --image-type ${{ matrix.image-type }} --image-name test
|
|
||||||
|
|
||||||
- name: Print dependencies in the image
|
|
||||||
if: matrix.image-type == 'venv'
|
|
||||||
run: |
|
|
||||||
uv pip list
|
|
||||||
|
|
||||||
build-single-provider:
|
|
||||||
runs-on: ubuntu-latest
|
|
||||||
steps:
|
|
||||||
- name: Checkout repository
|
|
||||||
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
|
|
||||||
|
|
||||||
- name: Install dependencies
|
|
||||||
uses: ./.github/actions/setup-runner
|
|
||||||
|
|
||||||
- name: Build a single provider
|
|
||||||
run: |
|
|
||||||
USE_COPY_NOT_MOUNT=true LLAMA_STACK_DIR=. uv run llama stack build --image-type venv --image-name test --providers inference=remote::ollama
|
|
||||||
|
|
||||||
build-custom-container-distribution:
|
|
||||||
runs-on: ubuntu-latest
|
|
||||||
steps:
|
|
||||||
- name: Checkout repository
|
|
||||||
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
|
|
||||||
|
|
||||||
- name: Install dependencies
|
|
||||||
uses: ./.github/actions/setup-runner
|
|
||||||
|
|
||||||
- name: Build a single provider
|
|
||||||
run: |
|
|
||||||
yq -i '.image_type = "container"' llama_stack/templates/starter/build.yaml
|
|
||||||
yq -i '.image_name = "test"' llama_stack/templates/starter/build.yaml
|
|
||||||
USE_COPY_NOT_MOUNT=true LLAMA_STACK_DIR=. uv run llama stack build --config llama_stack/templates/starter/build.yaml
|
|
||||||
|
|
||||||
- name: Inspect the container image entrypoint
|
|
||||||
run: |
|
|
||||||
IMAGE_ID=$(docker images --format "{{.Repository}}:{{.Tag}}" | head -n 1)
|
|
||||||
entrypoint=$(docker inspect --format '{{ .Config.Entrypoint }}' $IMAGE_ID)
|
|
||||||
echo "Entrypoint: $entrypoint"
|
|
||||||
if [ "$entrypoint" != "[python -m llama_stack.distribution.server.server --config /app/run.yaml]" ]; then
|
|
||||||
echo "Entrypoint is not correct"
|
|
||||||
exit 1
|
|
||||||
fi
|
|
||||||
|
|
||||||
build-ubi9-container-distribution:
|
|
||||||
runs-on: ubuntu-latest
|
|
||||||
steps:
|
|
||||||
- name: Checkout repository
|
|
||||||
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
|
|
||||||
|
|
||||||
- name: Install dependencies
|
|
||||||
uses: ./.github/actions/setup-runner
|
|
||||||
|
|
||||||
- name: Pin template to UBI9 base
|
|
||||||
run: |
|
|
||||||
yq -i '
|
|
||||||
.image_type = "container" |
|
|
||||||
.image_name = "ubi9-test" |
|
|
||||||
.distribution_spec.container_image = "registry.access.redhat.com/ubi9:latest"
|
|
||||||
' llama_stack/templates/starter/build.yaml
|
|
||||||
|
|
||||||
- name: Build dev container (UBI9)
|
|
||||||
env:
|
|
||||||
USE_COPY_NOT_MOUNT: "true"
|
|
||||||
LLAMA_STACK_DIR: "."
|
|
||||||
run: |
|
|
||||||
uv run llama stack build --config llama_stack/templates/starter/build.yaml
|
|
||||||
|
|
||||||
- name: Inspect UBI9 image
|
|
||||||
run: |
|
|
||||||
IMAGE_ID=$(docker images --format "{{.Repository}}:{{.Tag}}" | head -n 1)
|
|
||||||
entrypoint=$(docker inspect --format '{{ .Config.Entrypoint }}' $IMAGE_ID)
|
|
||||||
echo "Entrypoint: $entrypoint"
|
|
||||||
if [ "$entrypoint" != "[python -m llama_stack.distribution.server.server --config /app/run.yaml]" ]; then
|
|
||||||
echo "Entrypoint is not correct"
|
|
||||||
exit 1
|
|
||||||
fi
|
|
||||||
|
|
||||||
echo "Checking /etc/os-release in $IMAGE_ID"
|
|
||||||
docker run --rm --entrypoint sh "$IMAGE_ID" -c \
|
|
||||||
'source /etc/os-release && echo "$ID"' \
|
|
||||||
| grep -qE '^(rhel|ubi)$' \
|
|
||||||
|| { echo "Base image is not UBI 9!"; exit 1; }
|
|
||||||
25
.github/workflows/semantic-pr.yml
vendored
|
|
@ -1,25 +0,0 @@
|
||||||
name: Check semantic PR titles
|
|
||||||
|
|
||||||
on:
|
|
||||||
pull_request_target:
|
|
||||||
types:
|
|
||||||
- opened
|
|
||||||
- edited
|
|
||||||
- reopened
|
|
||||||
- synchronize
|
|
||||||
|
|
||||||
concurrency:
|
|
||||||
group: ${{ github.workflow }}-${{ github.ref }}
|
|
||||||
cancel-in-progress: true
|
|
||||||
|
|
||||||
permissions:
|
|
||||||
contents: read
|
|
||||||
|
|
||||||
jobs:
|
|
||||||
title-check:
|
|
||||||
runs-on: ubuntu-latest
|
|
||||||
steps:
|
|
||||||
- name: Check PR Title's semantic conformance
|
|
||||||
uses: amannn/action-semantic-pull-request@0723387faaf9b38adef4775cd42cfd5155ed6017 # v5.5.3
|
|
||||||
env:
|
|
||||||
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
|
||||||
45
.github/workflows/stale_bot.yml
vendored
|
|
@ -1,45 +0,0 @@
|
||||||
name: Close stale issues and PRs
|
|
||||||
|
|
||||||
on:
|
|
||||||
schedule:
|
|
||||||
- cron: '0 0 * * *' # every day at midnight
|
|
||||||
|
|
||||||
env:
|
|
||||||
LC_ALL: en_US.UTF-8
|
|
||||||
|
|
||||||
defaults:
|
|
||||||
run:
|
|
||||||
shell: bash
|
|
||||||
|
|
||||||
permissions:
|
|
||||||
contents: read
|
|
||||||
|
|
||||||
jobs:
|
|
||||||
stale:
|
|
||||||
permissions:
|
|
||||||
issues: write
|
|
||||||
pull-requests: write
|
|
||||||
runs-on: ubuntu-latest
|
|
||||||
steps:
|
|
||||||
- name: Stale Action
|
|
||||||
uses: actions/stale@5bef64f19d7facfb25b37b414482c7164d639639 # v9.1.0
|
|
||||||
with:
|
|
||||||
stale-issue-label: 'stale'
|
|
||||||
stale-issue-message: >
|
|
||||||
This issue has been automatically marked as stale because it has not had activity within 60 days.
|
|
||||||
It will be automatically closed if no further activity occurs within 30 days.
|
|
||||||
close-issue-message: >
|
|
||||||
This issue has been automatically closed due to inactivity.
|
|
||||||
Please feel free to reopen if you feel it is still relevant!
|
|
||||||
days-before-issue-stale: 60
|
|
||||||
days-before-issue-close: 30
|
|
||||||
stale-pr-label: 'stale'
|
|
||||||
stale-pr-message: >
|
|
||||||
This pull request has been automatically marked as stale because it has not had activity within 60 days.
|
|
||||||
It will be automatically closed if no further activity occurs within 30 days.
|
|
||||||
close-pr-message: >
|
|
||||||
This pull request has been automatically closed due to inactivity.
|
|
||||||
Please feel free to reopen if you intend to continue working on it!
|
|
||||||
days-before-pr-stale: 60
|
|
||||||
days-before-pr-close: 30
|
|
||||||
operations-per-run: 300
|
|
||||||
71
.github/workflows/test-external-providers.yml
vendored
|
|
@ -1,71 +0,0 @@
|
||||||
name: Test External Providers
|
|
||||||
|
|
||||||
on:
|
|
||||||
push:
|
|
||||||
branches: [ main ]
|
|
||||||
pull_request:
|
|
||||||
branches: [ main ]
|
|
||||||
paths:
|
|
||||||
- 'llama_stack/**'
|
|
||||||
- 'tests/integration/**'
|
|
||||||
- 'uv.lock'
|
|
||||||
- 'pyproject.toml'
|
|
||||||
- 'requirements.txt'
|
|
||||||
- '.github/workflows/test-external-providers.yml' # This workflow
|
|
||||||
|
|
||||||
jobs:
|
|
||||||
test-external-providers:
|
|
||||||
runs-on: ubuntu-latest
|
|
||||||
strategy:
|
|
||||||
matrix:
|
|
||||||
image-type: [venv]
|
|
||||||
# We don't do container yet, it's tricky to install a package from the host into the
|
|
||||||
# container and point 'uv pip install' to the correct path...
|
|
||||||
steps:
|
|
||||||
- name: Checkout repository
|
|
||||||
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
|
|
||||||
|
|
||||||
- name: Install dependencies
|
|
||||||
uses: ./.github/actions/setup-runner
|
|
||||||
|
|
||||||
- name: Apply image type to config file
|
|
||||||
run: |
|
|
||||||
yq -i '.image_type = "${{ matrix.image-type }}"' tests/external-provider/llama-stack-provider-ollama/custom-distro.yaml
|
|
||||||
cat tests/external-provider/llama-stack-provider-ollama/custom-distro.yaml
|
|
||||||
|
|
||||||
- name: Setup directory for Ollama custom provider
|
|
||||||
run: |
|
|
||||||
mkdir -p tests/external-provider/llama-stack-provider-ollama/src/
|
|
||||||
cp -a llama_stack/providers/remote/inference/ollama/ tests/external-provider/llama-stack-provider-ollama/src/llama_stack_provider_ollama
|
|
||||||
|
|
||||||
- name: Create provider configuration
|
|
||||||
run: |
|
|
||||||
mkdir -p /home/runner/.llama/providers.d/remote/inference
|
|
||||||
cp tests/external-provider/llama-stack-provider-ollama/custom_ollama.yaml /home/runner/.llama/providers.d/remote/inference/custom_ollama.yaml
|
|
||||||
|
|
||||||
- name: Build distro from config file
|
|
||||||
run: |
|
|
||||||
USE_COPY_NOT_MOUNT=true LLAMA_STACK_DIR=. uv run llama stack build --config tests/external-provider/llama-stack-provider-ollama/custom-distro.yaml
|
|
||||||
|
|
||||||
- name: Start Llama Stack server in background
|
|
||||||
if: ${{ matrix.image-type }} == 'venv'
|
|
||||||
env:
|
|
||||||
INFERENCE_MODEL: "meta-llama/Llama-3.2-3B-Instruct"
|
|
||||||
run: |
|
|
||||||
uv run pip list
|
|
||||||
nohup uv run --active llama stack run tests/external-provider/llama-stack-provider-ollama/run.yaml --image-type ${{ matrix.image-type }} > server.log 2>&1 &
|
|
||||||
|
|
||||||
- name: Wait for Llama Stack server to be ready
|
|
||||||
run: |
|
|
||||||
for i in {1..30}; do
|
|
||||||
if ! grep -q "remote::custom_ollama from /home/runner/.llama/providers.d/remote/inference/custom_ollama.yaml" server.log; then
|
|
||||||
echo "Waiting for Llama Stack server to load the provider..."
|
|
||||||
sleep 1
|
|
||||||
else
|
|
||||||
echo "Provider loaded"
|
|
||||||
exit 0
|
|
||||||
fi
|
|
||||||
done
|
|
||||||
echo "Provider failed to load"
|
|
||||||
cat server.log
|
|
||||||
exit 1
|
|
||||||
69
.github/workflows/tests.yml
vendored
|
|
@ -1,69 +0,0 @@
|
||||||
name: auto-tests
|
|
||||||
|
|
||||||
on:
|
|
||||||
# pull_request:
|
|
||||||
workflow_dispatch:
|
|
||||||
inputs:
|
|
||||||
commit_sha:
|
|
||||||
description: 'Specific Commit SHA to trigger on'
|
|
||||||
required: false
|
|
||||||
default: $GITHUB_SHA # default to the last commit of $GITHUB_REF branch
|
|
||||||
|
|
||||||
jobs:
|
|
||||||
test-llama-stack-as-library:
|
|
||||||
runs-on: ubuntu-latest
|
|
||||||
env:
|
|
||||||
TOGETHER_API_KEY: ${{ secrets.TOGETHER_API_KEY }}
|
|
||||||
FIREWORKS_API_KEY: ${{ secrets.FIREWORKS_API_KEY }}
|
|
||||||
TAVILY_SEARCH_API_KEY: ${{ secrets.TAVILY_SEARCH_API_KEY }}
|
|
||||||
strategy:
|
|
||||||
matrix:
|
|
||||||
provider: [fireworks, together]
|
|
||||||
steps:
|
|
||||||
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
|
|
||||||
with:
|
|
||||||
ref: ${{ github.event.inputs.commit_sha }}
|
|
||||||
|
|
||||||
- name: Echo commit SHA
|
|
||||||
run: |
|
|
||||||
echo "Triggered on commit SHA: ${{ github.event.inputs.commit_sha }}"
|
|
||||||
git rev-parse HEAD
|
|
||||||
|
|
||||||
- name: Install dependencies
|
|
||||||
run: |
|
|
||||||
python -m pip install --upgrade pip
|
|
||||||
pip install -r requirements.txt pytest
|
|
||||||
pip install -e .
|
|
||||||
|
|
||||||
- name: Build providers
|
|
||||||
run: |
|
|
||||||
llama stack build --template ${{ matrix.provider }} --image-type venv
|
|
||||||
|
|
||||||
- name: Install the latest llama-stack-client & llama-models packages
|
|
||||||
run: |
|
|
||||||
pip install -e git+https://github.com/meta-llama/llama-stack-client-python.git#egg=llama-stack-client
|
|
||||||
pip install -e git+https://github.com/meta-llama/llama-models.git#egg=llama-models
|
|
||||||
|
|
||||||
- name: Run client-sdk test
|
|
||||||
working-directory: "${{ github.workspace }}"
|
|
||||||
env:
|
|
||||||
REPORT_OUTPUT: md_report.md
|
|
||||||
shell: bash
|
|
||||||
run: |
|
|
||||||
pip install --upgrade pytest-md-report
|
|
||||||
echo "REPORT_FILE=${REPORT_OUTPUT}" >> "$GITHUB_ENV"
|
|
||||||
|
|
||||||
export INFERENCE_MODEL=meta-llama/Llama-3.1-8B-Instruct
|
|
||||||
LLAMA_STACK_CONFIG=./llama_stack/templates/${{ matrix.provider }}/run.yaml pytest --md-report --md-report-verbose=1 ./tests/client-sdk/inference/ --md-report-output "$REPORT_OUTPUT"
|
|
||||||
|
|
||||||
- name: Output reports to the job summary
|
|
||||||
if: always()
|
|
||||||
shell: bash
|
|
||||||
run: |
|
|
||||||
if [ -f "$REPORT_FILE" ]; then
|
|
||||||
echo "<details><summary> Test Report for ${{ matrix.provider }} </summary>" >> $GITHUB_STEP_SUMMARY
|
|
||||||
echo "" >> $GITHUB_STEP_SUMMARY
|
|
||||||
cat "$REPORT_FILE" >> $GITHUB_STEP_SUMMARY
|
|
||||||
echo "" >> $GITHUB_STEP_SUMMARY
|
|
||||||
echo "</details>" >> $GITHUB_STEP_SUMMARY
|
|
||||||
fi
|
|
||||||
52
.github/workflows/unit-tests.yml
vendored
|
|
@ -1,52 +0,0 @@
|
||||||
name: Unit Tests
|
|
||||||
|
|
||||||
on:
|
|
||||||
push:
|
|
||||||
branches: [ main ]
|
|
||||||
pull_request:
|
|
||||||
branches: [ main ]
|
|
||||||
paths:
|
|
||||||
- 'llama_stack/**'
|
|
||||||
- 'tests/unit/**'
|
|
||||||
- 'uv.lock'
|
|
||||||
- 'pyproject.toml'
|
|
||||||
- 'requirements.txt'
|
|
||||||
- '.github/workflows/unit-tests.yml' # This workflow
|
|
||||||
workflow_dispatch:
|
|
||||||
|
|
||||||
concurrency:
|
|
||||||
group: ${{ github.workflow }}-${{ github.ref }}
|
|
||||||
cancel-in-progress: true
|
|
||||||
|
|
||||||
jobs:
|
|
||||||
unit-tests:
|
|
||||||
runs-on: ubuntu-latest
|
|
||||||
strategy:
|
|
||||||
fail-fast: false
|
|
||||||
matrix:
|
|
||||||
python:
|
|
||||||
- "3.10"
|
|
||||||
- "3.11"
|
|
||||||
- "3.12"
|
|
||||||
- "3.13"
|
|
||||||
steps:
|
|
||||||
- name: Checkout repository
|
|
||||||
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
|
|
||||||
|
|
||||||
- name: Install dependencies
|
|
||||||
uses: ./.github/actions/setup-runner
|
|
||||||
|
|
||||||
- name: Run unit tests
|
|
||||||
run: |
|
|
||||||
PYTHON_VERSION=${{ matrix.python }} ./scripts/unit-tests.sh --cov=llama_stack --junitxml=pytest-report-${{ matrix.python }}.xml --cov-report=html:htmlcov-${{ matrix.python }}
|
|
||||||
|
|
||||||
- name: Upload test results
|
|
||||||
if: always()
|
|
||||||
uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4.6.2
|
|
||||||
with:
|
|
||||||
name: test-results-${{ matrix.python }}
|
|
||||||
path: |
|
|
||||||
.pytest_cache/
|
|
||||||
pytest-report-${{ matrix.python }}.xml
|
|
||||||
htmlcov-${{ matrix.python }}/
|
|
||||||
retention-days: 7
|
|
||||||
68
.github/workflows/update-readthedocs.yml
vendored
|
|
@ -1,68 +0,0 @@
|
||||||
name: Update ReadTheDocs
|
|
||||||
|
|
||||||
on:
|
|
||||||
workflow_dispatch:
|
|
||||||
inputs:
|
|
||||||
branch:
|
|
||||||
description: 'RTD version to update'
|
|
||||||
required: false
|
|
||||||
default: 'latest'
|
|
||||||
push:
|
|
||||||
branches:
|
|
||||||
- main
|
|
||||||
paths:
|
|
||||||
- 'docs/**'
|
|
||||||
- 'pyproject.toml'
|
|
||||||
- '.github/workflows/update-readthedocs.yml'
|
|
||||||
tags:
|
|
||||||
- '*'
|
|
||||||
pull_request:
|
|
||||||
branches:
|
|
||||||
- main
|
|
||||||
paths:
|
|
||||||
- 'docs/**'
|
|
||||||
- 'pyproject.toml'
|
|
||||||
- '.github/workflows/update-readthedocs.yml'
|
|
||||||
|
|
||||||
concurrency:
|
|
||||||
group: ${{ github.workflow }}-${{ github.ref }}
|
|
||||||
cancel-in-progress: true
|
|
||||||
|
|
||||||
jobs:
|
|
||||||
update-readthedocs:
|
|
||||||
runs-on: ubuntu-latest
|
|
||||||
env:
|
|
||||||
TOKEN: ${{ secrets.READTHEDOCS_TOKEN }}
|
|
||||||
steps:
|
|
||||||
- name: Checkout repository
|
|
||||||
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
|
|
||||||
|
|
||||||
- name: Install dependencies
|
|
||||||
uses: ./.github/actions/setup-runner
|
|
||||||
|
|
||||||
- name: Build HTML
|
|
||||||
run: |
|
|
||||||
cd docs
|
|
||||||
uv run make html
|
|
||||||
|
|
||||||
- name: Trigger ReadTheDocs build
|
|
||||||
if: github.event_name != 'pull_request'
|
|
||||||
run: |
|
|
||||||
if [ -z "$TOKEN" ]; then
|
|
||||||
echo "READTHEDOCS_TOKEN is not set"
|
|
||||||
exit 1
|
|
||||||
fi
|
|
||||||
|
|
||||||
response=$(curl -X POST \
|
|
||||||
-H "Content-Type: application/json" \
|
|
||||||
-d "{
|
|
||||||
\"token\": \"$TOKEN\",
|
|
||||||
\"version\": \"$GITHUB_REF_NAME\"
|
|
||||||
}" \
|
|
||||||
https://readthedocs.org/api/v2/webhook/llama-stack/289768/)
|
|
||||||
|
|
||||||
echo "Response: $response"
|
|
||||||
if [ $(echo $response | jq -r '.build_triggered') != 'true' ]; then
|
|
||||||
echo "Failed to trigger ReadTheDocs build"
|
|
||||||
exit 1
|
|
||||||
fi
|
|
||||||
26
.gitignore
vendored
|
|
@ -1,26 +0,0 @@
|
||||||
.env
|
|
||||||
__pycache__
|
|
||||||
dist
|
|
||||||
*.egg-info
|
|
||||||
dev_requirements.txt
|
|
||||||
build
|
|
||||||
.DS_Store
|
|
||||||
llama_stack/configs/*
|
|
||||||
.cursor/
|
|
||||||
xcuserdata/
|
|
||||||
*.hmap
|
|
||||||
.DS_Store
|
|
||||||
.build/
|
|
||||||
Package.resolved
|
|
||||||
*.pte
|
|
||||||
*.ipynb_checkpoints*
|
|
||||||
.idea
|
|
||||||
.venv/
|
|
||||||
.vscode
|
|
||||||
_build
|
|
||||||
docs/src
|
|
||||||
pyrightconfig.json
|
|
||||||
venv/
|
|
||||||
pytest-report.xml
|
|
||||||
.coverage
|
|
||||||
.python-version
|
|
||||||
|
|
@ -1,118 +0,0 @@
|
||||||
exclude: 'build/'
|
|
||||||
|
|
||||||
default_language_version:
|
|
||||||
python: python3
|
|
||||||
|
|
||||||
repos:
|
|
||||||
- repo: https://github.com/pre-commit/pre-commit-hooks
|
|
||||||
rev: v5.0.0 # Latest stable version
|
|
||||||
hooks:
|
|
||||||
- id: check-merge-conflict
|
|
||||||
args: ['--assume-in-merge']
|
|
||||||
- id: trailing-whitespace
|
|
||||||
exclude: '\.py$' # Exclude Python files as Ruff already handles them
|
|
||||||
- id: check-added-large-files
|
|
||||||
args: ['--maxkb=1000']
|
|
||||||
- id: end-of-file-fixer
|
|
||||||
exclude: '^(.*\.svg)$'
|
|
||||||
- id: no-commit-to-branch
|
|
||||||
- id: check-yaml
|
|
||||||
args: ["--unsafe"]
|
|
||||||
- id: detect-private-key
|
|
||||||
- id: requirements-txt-fixer
|
|
||||||
- id: mixed-line-ending
|
|
||||||
args: [--fix=lf] # Forces to replace line ending by LF (line feed)
|
|
||||||
- id: check-executables-have-shebangs
|
|
||||||
- id: check-json
|
|
||||||
- id: check-shebang-scripts-are-executable
|
|
||||||
- id: check-symlinks
|
|
||||||
- id: check-toml
|
|
||||||
|
|
||||||
- repo: https://github.com/Lucas-C/pre-commit-hooks
|
|
||||||
rev: v1.5.4
|
|
||||||
hooks:
|
|
||||||
- id: insert-license
|
|
||||||
files: \.py$|\.sh$
|
|
||||||
args:
|
|
||||||
- --license-filepath
|
|
||||||
- docs/license_header.txt
|
|
||||||
|
|
||||||
- repo: https://github.com/astral-sh/ruff-pre-commit
|
|
||||||
rev: v0.9.4
|
|
||||||
hooks:
|
|
||||||
- id: ruff
|
|
||||||
args: [ --fix ]
|
|
||||||
exclude: ^llama_stack/strong_typing/.*$
|
|
||||||
- id: ruff-format
|
|
||||||
|
|
||||||
- repo: https://github.com/adamchainz/blacken-docs
|
|
||||||
rev: 1.19.0
|
|
||||||
hooks:
|
|
||||||
- id: blacken-docs
|
|
||||||
additional_dependencies:
|
|
||||||
- black==24.3.0
|
|
||||||
|
|
||||||
- repo: https://github.com/astral-sh/uv-pre-commit
|
|
||||||
rev: 0.7.8
|
|
||||||
hooks:
|
|
||||||
- id: uv-lock
|
|
||||||
- id: uv-export
|
|
||||||
args: [
|
|
||||||
"--frozen",
|
|
||||||
"--no-hashes",
|
|
||||||
"--no-emit-project",
|
|
||||||
"--no-default-groups",
|
|
||||||
"--output-file=requirements.txt"
|
|
||||||
]
|
|
||||||
|
|
||||||
- repo: https://github.com/pre-commit/mirrors-mypy
|
|
||||||
rev: v1.15.0
|
|
||||||
hooks:
|
|
||||||
- id: mypy
|
|
||||||
additional_dependencies:
|
|
||||||
- uv==0.6.2
|
|
||||||
- mypy
|
|
||||||
- pytest
|
|
||||||
- rich
|
|
||||||
- types-requests
|
|
||||||
- pydantic
|
|
||||||
pass_filenames: false
|
|
||||||
|
|
||||||
# - repo: https://github.com/tcort/markdown-link-check
|
|
||||||
# rev: v3.11.2
|
|
||||||
# hooks:
|
|
||||||
# - id: markdown-link-check
|
|
||||||
# args: ['--quiet']
|
|
||||||
|
|
||||||
- repo: local
|
|
||||||
hooks:
|
|
||||||
- id: distro-codegen
|
|
||||||
name: Distribution Template Codegen
|
|
||||||
additional_dependencies:
|
|
||||||
- uv==0.7.8
|
|
||||||
entry: uv run --group codegen ./scripts/distro_codegen.py
|
|
||||||
language: python
|
|
||||||
pass_filenames: false
|
|
||||||
require_serial: true
|
|
||||||
files: ^llama_stack/templates/.*$|^llama_stack/providers/.*/inference/.*/models\.py$
|
|
||||||
- id: openapi-codegen
|
|
||||||
name: API Spec Codegen
|
|
||||||
additional_dependencies:
|
|
||||||
- uv==0.7.8
|
|
||||||
entry: sh -c 'uv run ./docs/openapi_generator/run_openapi_generator.sh > /dev/null'
|
|
||||||
language: python
|
|
||||||
pass_filenames: false
|
|
||||||
require_serial: true
|
|
||||||
files: ^llama_stack/apis/|^docs/openapi_generator/
|
|
||||||
- id: check-workflows-use-hashes
|
|
||||||
name: Check GitHub Actions use SHA-pinned actions
|
|
||||||
entry: ./scripts/check-workflows-use-hashes.sh
|
|
||||||
language: system
|
|
||||||
pass_filenames: false
|
|
||||||
require_serial: true
|
|
||||||
always_run: true
|
|
||||||
files: ^\.github/workflows/.*\.ya?ml$
|
|
||||||
|
|
||||||
ci:
|
|
||||||
autofix_commit_msg: 🎨 [pre-commit.ci] Auto format from pre-commit.com hooks
|
|
||||||
autoupdate_commit_msg: ⬆ [pre-commit.ci] pre-commit autoupdate
|
|
||||||
|
|
@ -1,25 +0,0 @@
|
||||||
# .readthedocs.yaml
|
|
||||||
# Read the Docs configuration file
|
|
||||||
# See https://docs.readthedocs.io/en/stable/config-file/v2.html for details
|
|
||||||
|
|
||||||
# Required
|
|
||||||
version: 2
|
|
||||||
|
|
||||||
# Build documentation in the "docs/" directory with Sphinx
|
|
||||||
sphinx:
|
|
||||||
configuration: docs/source/conf.py
|
|
||||||
|
|
||||||
# Set the OS, Python version and other tools you might need
|
|
||||||
build:
|
|
||||||
os: ubuntu-22.04
|
|
||||||
tools:
|
|
||||||
python: "3.12"
|
|
||||||
jobs:
|
|
||||||
pre_create_environment:
|
|
||||||
- asdf plugin add uv
|
|
||||||
- asdf install uv latest
|
|
||||||
- asdf global uv latest
|
|
||||||
create_environment:
|
|
||||||
- uv venv "${READTHEDOCS_VIRTUALENV_PATH}"
|
|
||||||
install:
|
|
||||||
- UV_PROJECT_ENVIRONMENT="${READTHEDOCS_VIRTUALENV_PATH}" uv sync --frozen --group docs
|
|
||||||
531
CHANGELOG.md
|
|
@ -1,531 +0,0 @@
|
||||||
# Changelog
|
|
||||||
|
|
||||||
# v0.2.10.1
|
|
||||||
Published on: 2025-06-06T20:11:02Z
|
|
||||||
|
|
||||||
## Highlights
|
|
||||||
* ChromaDB provider fix
|
|
||||||
|
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
# v0.2.10
|
|
||||||
Published on: 2025-06-05T23:21:45Z
|
|
||||||
|
|
||||||
## Highlights
|
|
||||||
|
|
||||||
* OpenAI-compatible embeddings API
|
|
||||||
* OpenAI-compatible Files API
|
|
||||||
* Postgres support in starter distro
|
|
||||||
* Enable ingestion of precomputed embeddings
|
|
||||||
* Full multi-turn support in Responses API
|
|
||||||
* Fine-grained access control policy
|
|
||||||
|
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
# v0.2.9
|
|
||||||
Published on: 2025-05-30T20:01:56Z
|
|
||||||
|
|
||||||
## Highlights
|
|
||||||
* Added initial streaming support in Responses API
|
|
||||||
* UI view for Responses
|
|
||||||
* Postgres inference store support
|
|
||||||
|
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
# v0.2.8
|
|
||||||
Published on: 2025-05-27T21:03:47Z
|
|
||||||
|
|
||||||
# Release v0.2.8
|
|
||||||
|
|
||||||
## Highlights
|
|
||||||
|
|
||||||
* Server-side MCP with auth firewalls now works in the Stack - both for Agents and Responses
|
|
||||||
* Get chat completions APIs and UI to show chat completions
|
|
||||||
* Enable keyword search for sqlite-vec
|
|
||||||
|
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
# v0.2.7
|
|
||||||
Published on: 2025-05-16T20:38:10Z
|
|
||||||
|
|
||||||
## Highlights
|
|
||||||
|
|
||||||
This is a small update. But a couple highlights:
|
|
||||||
|
|
||||||
* feat: function tools in OpenAI Responses by @bbrowning in https://github.com/meta-llama/llama-stack/pull/2094, getting closer to ready. Streaming is the next missing piece.
|
|
||||||
* feat: Adding support for customizing chunk context in RAG insertion and querying by @franciscojavierarceo in https://github.com/meta-llama/llama-stack/pull/2134
|
|
||||||
* feat: scaffolding for Llama Stack UI by @ehhuang in https://github.com/meta-llama/llama-stack/pull/2149, more to come in the coming releases.
|
|
||||||
|
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
# v0.2.6
|
|
||||||
Published on: 2025-05-12T18:06:52Z
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
# v0.2.5
|
|
||||||
Published on: 2025-05-04T20:16:49Z
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
# v0.2.4
|
|
||||||
Published on: 2025-04-29T17:26:01Z
|
|
||||||
|
|
||||||
## Highlights
|
|
||||||
|
|
||||||
* One-liner to install and run Llama Stack yay! by @reluctantfuturist in https://github.com/meta-llama/llama-stack/pull/1383
|
|
||||||
* support for NVIDIA NeMo datastore by @raspawar in https://github.com/meta-llama/llama-stack/pull/1852
|
|
||||||
* (yuge!) Kubernetes authentication by @leseb in https://github.com/meta-llama/llama-stack/pull/1778
|
|
||||||
* (yuge!) OpenAI Responses API by @bbrowning in https://github.com/meta-llama/llama-stack/pull/1989
|
|
||||||
* add api.llama provider, llama-guard-4 model by @ashwinb in https://github.com/meta-llama/llama-stack/pull/2058
|
|
||||||
|
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
# v0.2.3
|
|
||||||
Published on: 2025-04-25T22:46:21Z
|
|
||||||
|
|
||||||
## Highlights
|
|
||||||
|
|
||||||
* OpenAI compatible inference endpoints and client-SDK support. `client.chat.completions.create()` now works.
|
|
||||||
* significant improvements and functionality added to the nVIDIA distribution
|
|
||||||
* many improvements to the test verification suite.
|
|
||||||
* new inference providers: Ramalama, IBM WatsonX
|
|
||||||
* many improvements to the Playground UI
|
|
||||||
|
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
# v0.2.2
|
|
||||||
Published on: 2025-04-13T01:19:49Z
|
|
||||||
|
|
||||||
## Main changes
|
|
||||||
|
|
||||||
- Bring Your Own Provider (@leseb) - use out-of-tree provider code to execute the distribution server
|
|
||||||
- OpenAI compatible inference API in progress (@bbrowning)
|
|
||||||
- Provider verifications (@ehhuang)
|
|
||||||
- Many updates and fixes to playground
|
|
||||||
- Several llama4 related fixes
|
|
||||||
|
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
# v0.2.1
|
|
||||||
Published on: 2025-04-05T23:13:00Z
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
# v0.2.0
|
|
||||||
Published on: 2025-04-05T19:04:29Z
|
|
||||||
|
|
||||||
## Llama 4 Support
|
|
||||||
|
|
||||||
Checkout more at https://www.llama.com
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
# v0.1.9
|
|
||||||
Published on: 2025-03-29T00:52:23Z
|
|
||||||
|
|
||||||
### Build and Test Agents
|
|
||||||
* Agents: Entire document context with attachments
|
|
||||||
* RAG: Documentation with sqlite-vec faiss comparison
|
|
||||||
* Getting started: Fixes to getting started notebook.
|
|
||||||
|
|
||||||
### Agent Evals and Model Customization
|
|
||||||
* (**New**) Post-training: Add nemo customizer
|
|
||||||
|
|
||||||
### Better Engineering
|
|
||||||
* Moved sqlite-vec to non-blocking calls
|
|
||||||
* Don't return a payload on file delete
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
# v0.1.8
|
|
||||||
Published on: 2025-03-24T01:28:50Z
|
|
||||||
|
|
||||||
# v0.1.8 Release Notes
|
|
||||||
|
|
||||||
### Build and Test Agents
|
|
||||||
* Safety: Integrated NVIDIA as a safety provider.
|
|
||||||
* VectorDB: Added Qdrant as an inline provider.
|
|
||||||
* Agents: Added support for multiple tool groups in agents.
|
|
||||||
* Agents: Simplified imports for Agents in client package
|
|
||||||
|
|
||||||
|
|
||||||
### Agent Evals and Model Customization
|
|
||||||
* Introduced DocVQA and IfEval benchmarks.
|
|
||||||
|
|
||||||
### Deploying and Monitoring Agents
|
|
||||||
* Introduced a Containerfile and image workflow for the Playground.
|
|
||||||
* Implemented support for Bearer (API Key) authentication.
|
|
||||||
* Added attribute-based access control for resources.
|
|
||||||
* Fixes on docker deployments: use --pull always and standardized the default port to 8321
|
|
||||||
* Deprecated: /v1/inspect/providers use /v1/providers/ instead
|
|
||||||
|
|
||||||
### Better Engineering
|
|
||||||
* Consolidated scripts under the ./scripts directory.
|
|
||||||
* Addressed mypy violations in various modules.
|
|
||||||
* Added Dependabot scans for Python dependencies.
|
|
||||||
* Implemented a scheduled workflow to update the changelog automatically.
|
|
||||||
* Enforced concurrency to reduce CI loads.
|
|
||||||
|
|
||||||
|
|
||||||
### New Contributors
|
|
||||||
* @cmodi-meta made their first contribution in https://github.com/meta-llama/llama-stack/pull/1650
|
|
||||||
* @jeffmaury made their first contribution in https://github.com/meta-llama/llama-stack/pull/1671
|
|
||||||
* @derekhiggins made their first contribution in https://github.com/meta-llama/llama-stack/pull/1698
|
|
||||||
* @Bobbins228 made their first contribution in https://github.com/meta-llama/llama-stack/pull/1745
|
|
||||||
|
|
||||||
**Full Changelog**: https://github.com/meta-llama/llama-stack/compare/v0.1.7...v0.1.8
|
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
# v0.1.7
|
|
||||||
Published on: 2025-03-14T22:30:51Z
|
|
||||||
|
|
||||||
## 0.1.7 Release Notes
|
|
||||||
|
|
||||||
### Build and Test Agents
|
|
||||||
* Inference: ImageType is now refactored to LlamaStackImageType
|
|
||||||
* Inference: Added tests to measure TTFT
|
|
||||||
* Inference: Bring back usage metrics
|
|
||||||
* Agents: Added endpoint for get agent, list agents and list sessions
|
|
||||||
* Agents: Automated conversion of type hints in client tool for lite llm format
|
|
||||||
* Agents: Deprecated ToolResponseMessage in agent.resume API
|
|
||||||
* Added Provider API for listing and inspecting provider info
|
|
||||||
|
|
||||||
### Agent Evals and Model Customization
|
|
||||||
* Eval: Added new eval benchmarks Math 500 and BFCL v3
|
|
||||||
* Deploy and Monitoring of Agents
|
|
||||||
* Telemetry: Fix tracing to work across coroutines
|
|
||||||
|
|
||||||
### Better Engineering
|
|
||||||
* Display code coverage for unit tests
|
|
||||||
* Updated call sites (inference, tool calls, agents) to move to async non blocking calls
|
|
||||||
* Unit tests also run on Python 3.11, 3.12, and 3.13
|
|
||||||
* Added ollama inference to Integration tests CI
|
|
||||||
* Improved documentation across examples, testing, CLI, updated providers table )
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
# v0.1.6
|
|
||||||
Published on: 2025-03-08T04:35:08Z
|
|
||||||
|
|
||||||
## 0.1.6 Release Notes
|
|
||||||
|
|
||||||
### Build and Test Agents
|
|
||||||
* Inference: Fixed support for inline vllm provider
|
|
||||||
* (**New**) Agent: Build & Monitor Agent Workflows with Llama Stack + Anthropic's Best Practice [Notebook](https://github.com/meta-llama/llama-stack/blob/main/docs/notebooks/Llama_Stack_Agent_Workflows.ipynb)
|
|
||||||
* (**New**) Agent: Revamped agent [documentation](https://llama-stack.readthedocs.io/en/latest/building_applications/agent.html) with more details and examples
|
|
||||||
* Agent: Unify tools and Python SDK Agents API
|
|
||||||
* Agent: AsyncAgent Python SDK wrapper supporting async client tool calls
|
|
||||||
* Agent: Support python functions without @client_tool decorator as client tools
|
|
||||||
* Agent: deprecation for allow_resume_turn flag, and remove need to specify tool_prompt_format
|
|
||||||
* VectorIO: MilvusDB support added
|
|
||||||
|
|
||||||
### Agent Evals and Model Customization
|
|
||||||
* (**New**) Agent: Llama Stack RAG Lifecycle [Notebook](https://github.com/meta-llama/llama-stack/blob/main/docs/notebooks/Llama_Stack_RAG_Lifecycle.ipynb)
|
|
||||||
* Eval: Documentation for eval, scoring, adding new benchmarks
|
|
||||||
* Eval: Distribution template to run benchmarks on llama & non-llama models
|
|
||||||
* Eval: Ability to register new custom LLM-as-judge scoring functions
|
|
||||||
* (**New**) Looking for contributors for open benchmarks. See [documentation](https://llama-stack.readthedocs.io/en/latest/references/evals_reference/index.html#open-benchmark-contributing-guide) for details.
|
|
||||||
|
|
||||||
### Deploy and Monitoring of Agents
|
|
||||||
* Better support for different log levels across all components for better monitoring
|
|
||||||
|
|
||||||
### Better Engineering
|
|
||||||
* Enhance OpenAPI spec to include Error types across all APIs
|
|
||||||
* Moved all tests to /tests and created unit tests to run on each PR
|
|
||||||
* Removed all dependencies on llama-models repo
|
|
||||||
|
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
# v0.1.5.1
|
|
||||||
Published on: 2025-02-28T22:37:44Z
|
|
||||||
|
|
||||||
## 0.1.5.1 Release Notes
|
|
||||||
* Fixes for security risk in https://github.com/meta-llama/llama-stack/pull/1327 and https://github.com/meta-llama/llama-stack/pull/1328
|
|
||||||
|
|
||||||
**Full Changelog**: https://github.com/meta-llama/llama-stack/compare/v0.1.5...v0.1.5.1
|
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
# v0.1.5
|
|
||||||
Published on: 2025-02-28T18:14:01Z
|
|
||||||
|
|
||||||
## 0.1.5 Release Notes
|
|
||||||
### Build Agents
|
|
||||||
* Inference: Support more non-llama models (openai, anthropic, gemini)
|
|
||||||
* Inference: Can use the provider's model name in addition to the HF alias
|
|
||||||
* Inference: Fixed issues with calling tools that weren't specified in the prompt
|
|
||||||
* RAG: Improved system prompt for RAG and no more need for hard-coded rag-tool calling
|
|
||||||
* Embeddings: Added support for Nemo retriever embedding models
|
|
||||||
* Tools: Added support for MCP tools in Ollama Distribution
|
|
||||||
* Distributions: Added new Groq distribution
|
|
||||||
|
|
||||||
### Customize Models
|
|
||||||
* Save post-trained checkpoint in SafeTensor format to allow Ollama inference provider to use the post-trained model
|
|
||||||
|
|
||||||
### Monitor agents
|
|
||||||
* More comprehensive logging of agent steps including client tools
|
|
||||||
* Telemetry inputs/outputs are now structured and queryable
|
|
||||||
* Ability to retrieve agents session, turn, step by ids
|
|
||||||
|
|
||||||
### Better Engineering
|
|
||||||
* Moved executorch Swift code out of this repo into the llama-stack-client-swift repo, similar to kotlin
|
|
||||||
* Move most logging to use logger instead of prints
|
|
||||||
* Completed text /chat-completion and /completion tests
|
|
||||||
|
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
# v0.1.4
|
|
||||||
Published on: 2025-02-25T00:02:43Z
|
|
||||||
|
|
||||||
## v0.1.4 Release Notes
|
|
||||||
Here are the key changes coming as part of this release:
|
|
||||||
|
|
||||||
### Build and Test Agents
|
|
||||||
* Inference: Added support for non-llama models
|
|
||||||
* Inference: Added option to list all downloaded models and remove models
|
|
||||||
* Agent: Introduce new api agents.resume_turn to include client side tool execution in the same turn
|
|
||||||
* Agent: AgentConfig introduces new variable “tool_config” that allows for better tool configuration and system prompt overrides
|
|
||||||
* Agent: Added logging for agent step start and completion times
|
|
||||||
* Agent: Added support for logging for tool execution metadata
|
|
||||||
* Embedding: Updated /inference/embeddings to support asymmetric models, truncation and variable sized outputs
|
|
||||||
* Embedding: Updated embedding models for Ollama, Together, and Fireworks with available defaults
|
|
||||||
* VectorIO: Improved performance of sqlite-vec using chunked writes
|
|
||||||
### Agent Evals and Model Customization
|
|
||||||
* Deprecated api /eval-tasks. Use /eval/benchmark instead
|
|
||||||
* Added CPU training support for TorchTune
|
|
||||||
### Deploy and Monitoring of Agents
|
|
||||||
* Consistent view of client and server tool calls in telemetry
|
|
||||||
### Better Engineering
|
|
||||||
* Made tests more data-driven for consistent evaluation
|
|
||||||
* Fixed documentation links and improved API reference generation
|
|
||||||
* Various small fixes for build scripts and system reliability
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
# v0.1.3
|
|
||||||
Published on: 2025-02-14T20:24:32Z
|
|
||||||
|
|
||||||
## v0.1.3 Release
|
|
||||||
|
|
||||||
Here are some key changes that are coming as part of this release.
|
|
||||||
|
|
||||||
### Build and Test Agents
|
|
||||||
Streamlined the initial development experience
|
|
||||||
- Added support for llama stack run --image-type venv
|
|
||||||
- Enhanced vector store options with new sqlite-vec provider and improved Qdrant integration
|
|
||||||
- vLLM improvements for tool calling and logprobs
|
|
||||||
- Better handling of sporadic code_interpreter tool calls
|
|
||||||
|
|
||||||
### Agent Evals
|
|
||||||
Better benchmarking and Agent performance assessment
|
|
||||||
- Renamed eval API /eval-task to /benchmarks
|
|
||||||
- Improved documentation and notebooks for RAG and evals
|
|
||||||
|
|
||||||
### Deploy and Monitoring of Agents
|
|
||||||
Improved production readiness
|
|
||||||
- Added usage metrics collection for chat completions
|
|
||||||
- CLI improvements for provider information
|
|
||||||
- Improved error handling and system reliability
|
|
||||||
- Better model endpoint handling and accessibility
|
|
||||||
- Improved signal handling on distro server
|
|
||||||
|
|
||||||
### Better Engineering
|
|
||||||
Infrastructure and code quality improvements
|
|
||||||
- Faster text-based chat completion tests
|
|
||||||
- Improved testing for non-streaming agent apis
|
|
||||||
- Standardized import formatting with ruff linter
|
|
||||||
- Added conventional commits standard
|
|
||||||
- Fixed documentation parsing issues
|
|
||||||
|
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
# v0.1.2
|
|
||||||
Published on: 2025-02-07T22:06:49Z
|
|
||||||
|
|
||||||
# TL;DR
|
|
||||||
- Several stabilizations to development flows after the switch to `uv`
|
|
||||||
- Migrated CI workflows to new OSS repo - [llama-stack-ops](https://github.com/meta-llama/llama-stack-ops)
|
|
||||||
- Added automated rebuilds for ReadTheDocs
|
|
||||||
- Llama Stack server supports HTTPS
|
|
||||||
- Added system prompt overrides support
|
|
||||||
- Several bug fixes and improvements to documentation (check out Kubernetes deployment guide by @terrytangyuan )
|
|
||||||
|
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
# v0.1.1
|
|
||||||
Published on: 2025-02-02T02:29:24Z
|
|
||||||
|
|
||||||
A bunch of small / big improvements everywhere including support for Windows, switching to `uv` and many provider improvements.
|
|
||||||
|
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
# v0.1.0
|
|
||||||
Published on: 2025-01-24T17:47:47Z
|
|
||||||
|
|
||||||
We are excited to announce a stable API release of Llama Stack, which enables developers to build RAG applications and Agents using tools and safety shields, monitor and those agents with telemetry, and evaluate the agent with scoring functions.
|
|
||||||
|
|
||||||
## Context
|
|
||||||
GenAI application developers need more than just an LLM - they need to integrate tools, connect with their data sources, establish guardrails, and ground the LLM responses effectively. Currently, developers must piece together various tools and APIs, complicating the development lifecycle and increasing costs. The result is that developers are spending more time on these integrations rather than focusing on the application logic itself. The bespoke coupling of components also makes it challenging to adopt state-of-the-art solutions in the rapidly evolving GenAI space. This is particularly difficult for open models like Llama, as best practices are not widely established in the open.
|
|
||||||
|
|
||||||
Llama Stack was created to provide developers with a comprehensive and coherent interface that simplifies AI application development and codifies best practices across the Llama ecosystem. Since our launch in September 2024, we have seen a huge uptick in interest in Llama Stack APIs by both AI developers and from partners building AI services with Llama models. Partners like Nvidia, Fireworks, and Ollama have collaborated with us to develop implementations across various APIs, including inference, memory, and safety.
|
|
||||||
|
|
||||||
With Llama Stack, you can easily build a RAG agent which can also search the web, do complex math, and custom tool calling. You can use telemetry to inspect those traces, and convert telemetry into evals datasets. And with Llama Stack’s plugin architecture and prepackage distributions, you choose to run your agent anywhere - in the cloud with our partners, deploy your own environment using virtualenv, conda, or Docker, operate locally with Ollama, or even run on mobile devices with our SDKs. Llama Stack offers unprecedented flexibility while also simplifying the developer experience.
|
|
||||||
|
|
||||||
## Release
|
|
||||||
After iterating on the APIs for the last 3 months, today we’re launching a stable release (V1) of the Llama Stack APIs and the corresponding llama-stack server and client packages(v0.1.0). We now have automated tests for providers. These tests make sure that all provider implementations are verified. Developers can now easily and reliably select distributions or providers based on their specific requirements.
|
|
||||||
|
|
||||||
There are example standalone apps in llama-stack-apps.
|
|
||||||
|
|
||||||
|
|
||||||
## Key Features of this release
|
|
||||||
|
|
||||||
- **Unified API Layer**
|
|
||||||
- Inference: Run LLM models
|
|
||||||
- RAG: Store and retrieve knowledge for RAG
|
|
||||||
- Agents: Build multi-step agentic workflows
|
|
||||||
- Tools: Register tools that can be called by the agent
|
|
||||||
- Safety: Apply content filtering and safety policies
|
|
||||||
- Evaluation: Test model and agent quality
|
|
||||||
- Telemetry: Collect and analyze usage data and complex agentic traces
|
|
||||||
- Post Training ( Coming Soon ): Fine tune models for specific use cases
|
|
||||||
|
|
||||||
- **Rich Provider Ecosystem**
|
|
||||||
- Local Development: Meta's Reference, Ollama
|
|
||||||
- Cloud: Fireworks, Together, Nvidia, AWS Bedrock, Groq, Cerebras
|
|
||||||
- On-premises: Nvidia NIM, vLLM, TGI, Dell-TGI
|
|
||||||
- On-device: iOS and Android support
|
|
||||||
|
|
||||||
- **Built for Production**
|
|
||||||
- Pre-packaged distributions for common deployment scenarios
|
|
||||||
- Backwards compatibility across model versions
|
|
||||||
- Comprehensive evaluation capabilities
|
|
||||||
- Full observability and monitoring
|
|
||||||
|
|
||||||
- **Multiple developer interfaces**
|
|
||||||
- CLI: Command line interface
|
|
||||||
- Python SDK
|
|
||||||
- Swift iOS SDK
|
|
||||||
- Kotlin Android SDK
|
|
||||||
|
|
||||||
- **Sample llama stack applications**
|
|
||||||
- Python
|
|
||||||
- iOS
|
|
||||||
- Android
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
# v0.1.0rc12
|
|
||||||
Published on: 2025-01-22T22:24:01Z
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
# v0.0.63
|
|
||||||
Published on: 2024-12-18T07:17:43Z
|
|
||||||
|
|
||||||
A small but important bug-fix release to update the URL datatype for the client-SDKs. The issue affected multimodal agentic turns especially.
|
|
||||||
|
|
||||||
**Full Changelog**: https://github.com/meta-llama/llama-stack/compare/v0.0.62...v0.0.63
|
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
# v0.0.62
|
|
||||||
Published on: 2024-12-18T02:39:43Z
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
# v0.0.61
|
|
||||||
Published on: 2024-12-10T20:50:33Z
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
# v0.0.55
|
|
||||||
Published on: 2024-11-23T17:14:07Z
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
# v0.0.54
|
|
||||||
Published on: 2024-11-22T00:36:09Z
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
# v0.0.53
|
|
||||||
Published on: 2024-11-20T22:18:00Z
|
|
||||||
|
|
||||||
🚀 Initial Release Notes for Llama Stack!
|
|
||||||
|
|
||||||
### Added
|
|
||||||
- Resource-oriented design for models, shields, memory banks, datasets and eval tasks
|
|
||||||
- Persistence for registered objects with distribution
|
|
||||||
- Ability to persist memory banks created for FAISS
|
|
||||||
- PostgreSQL KVStore implementation
|
|
||||||
- Environment variable placeholder support in run.yaml files
|
|
||||||
- Comprehensive Zero-to-Hero notebooks and quickstart guides
|
|
||||||
- Support for quantized models in Ollama
|
|
||||||
- Vision models support for Together, Fireworks, Meta-Reference, and Ollama, and vLLM
|
|
||||||
- Bedrock distribution with safety shields support
|
|
||||||
- Evals API with task registration and scoring functions
|
|
||||||
- MMLU and SimpleQA benchmark scoring functions
|
|
||||||
- Huggingface dataset provider integration for benchmarks
|
|
||||||
- Support for custom dataset registration from local paths
|
|
||||||
- Benchmark evaluation CLI tools with visualization tables
|
|
||||||
- RAG evaluation scoring functions and metrics
|
|
||||||
- Local persistence for datasets and eval tasks
|
|
||||||
|
|
||||||
### Changed
|
|
||||||
- Split safety into distinct providers (llama-guard, prompt-guard, code-scanner)
|
|
||||||
- Changed provider naming convention (`impls` → `inline`, `adapters` → `remote`)
|
|
||||||
- Updated API signatures for dataset and eval task registration
|
|
||||||
- Restructured folder organization for providers
|
|
||||||
- Enhanced Docker build configuration
|
|
||||||
- Added version prefixing for REST API routes
|
|
||||||
- Enhanced evaluation task registration workflow
|
|
||||||
- Improved benchmark evaluation output formatting
|
|
||||||
- Restructured evals folder organization for better modularity
|
|
||||||
|
|
||||||
### Removed
|
|
||||||
- `llama stack configure` command
|
|
||||||
|
|
||||||
|
|
||||||
---
|
|
||||||
|
|
@ -1,80 +0,0 @@
|
||||||
# Code of Conduct
|
|
||||||
|
|
||||||
## Our Pledge
|
|
||||||
|
|
||||||
In the interest of fostering an open and welcoming environment, we as
|
|
||||||
contributors and maintainers pledge to make participation in our project and
|
|
||||||
our community a harassment-free experience for everyone, regardless of age, body
|
|
||||||
size, disability, ethnicity, sex characteristics, gender identity and expression,
|
|
||||||
level of experience, education, socio-economic status, nationality, personal
|
|
||||||
appearance, race, religion, or sexual identity and orientation.
|
|
||||||
|
|
||||||
## Our Standards
|
|
||||||
|
|
||||||
Examples of behavior that contributes to creating a positive environment
|
|
||||||
include:
|
|
||||||
|
|
||||||
* Using welcoming and inclusive language
|
|
||||||
* Being respectful of differing viewpoints and experiences
|
|
||||||
* Gracefully accepting constructive criticism
|
|
||||||
* Focusing on what is best for the community
|
|
||||||
* Showing empathy towards other community members
|
|
||||||
|
|
||||||
Examples of unacceptable behavior by participants include:
|
|
||||||
|
|
||||||
* The use of sexualized language or imagery and unwelcome sexual attention or
|
|
||||||
advances
|
|
||||||
* Trolling, insulting/derogatory comments, and personal or political attacks
|
|
||||||
* Public or private harassment
|
|
||||||
* Publishing others' private information, such as a physical or electronic
|
|
||||||
address, without explicit permission
|
|
||||||
* Other conduct which could reasonably be considered inappropriate in a
|
|
||||||
professional setting
|
|
||||||
|
|
||||||
## Our Responsibilities
|
|
||||||
|
|
||||||
Project maintainers are responsible for clarifying the standards of acceptable
|
|
||||||
behavior and are expected to take appropriate and fair corrective action in
|
|
||||||
response to any instances of unacceptable behavior.
|
|
||||||
|
|
||||||
Project maintainers have the right and responsibility to remove, edit, or
|
|
||||||
reject comments, commits, code, wiki edits, issues, and other contributions
|
|
||||||
that are not aligned to this Code of Conduct, or to ban temporarily or
|
|
||||||
permanently any contributor for other behaviors that they deem inappropriate,
|
|
||||||
threatening, offensive, or harmful.
|
|
||||||
|
|
||||||
## Scope
|
|
||||||
|
|
||||||
This Code of Conduct applies within all project spaces, and it also applies when
|
|
||||||
an individual is representing the project or its community in public spaces.
|
|
||||||
Examples of representing a project or community include using an official
|
|
||||||
project e-mail address, posting via an official social media account, or acting
|
|
||||||
as an appointed representative at an online or offline event. Representation of
|
|
||||||
a project may be further defined and clarified by project maintainers.
|
|
||||||
|
|
||||||
This Code of Conduct also applies outside the project spaces when there is a
|
|
||||||
reasonable belief that an individual's behavior may have a negative impact on
|
|
||||||
the project or its community.
|
|
||||||
|
|
||||||
## Enforcement
|
|
||||||
|
|
||||||
Instances of abusive, harassing, or otherwise unacceptable behavior may be
|
|
||||||
reported by contacting the project team at <opensource-conduct@meta.com>. All
|
|
||||||
complaints will be reviewed and investigated and will result in a response that
|
|
||||||
is deemed necessary and appropriate to the circumstances. The project team is
|
|
||||||
obligated to maintain confidentiality with regard to the reporter of an incident.
|
|
||||||
Further details of specific enforcement policies may be posted separately.
|
|
||||||
|
|
||||||
Project maintainers who do not follow or enforce the Code of Conduct in good
|
|
||||||
faith may face temporary or permanent repercussions as determined by other
|
|
||||||
members of the project's leadership.
|
|
||||||
|
|
||||||
## Attribution
|
|
||||||
|
|
||||||
This Code of Conduct is adapted from the [Contributor Covenant][homepage], version 1.4,
|
|
||||||
available at https://www.contributor-covenant.org/version/1/4/code-of-conduct.html
|
|
||||||
|
|
||||||
[homepage]: https://www.contributor-covenant.org
|
|
||||||
|
|
||||||
For answers to common questions about this code of conduct, see
|
|
||||||
https://www.contributor-covenant.org/faq
|
|
||||||
189
CONTRIBUTING.md
|
|
@ -1,189 +0,0 @@
|
||||||
# Contributing to Llama-Stack
|
|
||||||
We want to make contributing to this project as easy and transparent as
|
|
||||||
possible.
|
|
||||||
|
|
||||||
## Discussions -> Issues -> Pull Requests
|
|
||||||
|
|
||||||
We actively welcome your pull requests. However, please read the following. This is heavily inspired by [Ghostty](https://github.com/ghostty-org/ghostty/blob/main/CONTRIBUTING.md).
|
|
||||||
|
|
||||||
If in doubt, please open a [discussion](https://github.com/meta-llama/llama-stack/discussions); we can always convert that to an issue later.
|
|
||||||
|
|
||||||
**I'd like to contribute!**
|
|
||||||
|
|
||||||
All issues are actionable (please report if they are not.) Pick one and start working on it. Thank you.
|
|
||||||
If you need help or guidance, comment on the issue. Issues that are extra friendly to new contributors are tagged with "contributor friendly".
|
|
||||||
|
|
||||||
**I have a bug!**
|
|
||||||
|
|
||||||
1. Search the issue tracker and discussions for similar issues.
|
|
||||||
2. If you don't have steps to reproduce, open a discussion.
|
|
||||||
3. If you have steps to reproduce, open an issue.
|
|
||||||
|
|
||||||
**I have an idea for a feature!**
|
|
||||||
|
|
||||||
1. Open a discussion.
|
|
||||||
|
|
||||||
**I've implemented a feature!**
|
|
||||||
|
|
||||||
1. If there is an issue for the feature, open a pull request.
|
|
||||||
2. If there is no issue, open a discussion and link to your branch.
|
|
||||||
|
|
||||||
**I have a question!**
|
|
||||||
|
|
||||||
1. Open a discussion or use [Discord](https://discord.gg/llama-stack).
|
|
||||||
|
|
||||||
|
|
||||||
**Opening a Pull Request**
|
|
||||||
|
|
||||||
1. Fork the repo and create your branch from `main`.
|
|
||||||
2. If you've changed APIs, update the documentation.
|
|
||||||
3. Ensure the test suite passes.
|
|
||||||
4. Make sure your code lints using `pre-commit`.
|
|
||||||
5. If you haven't already, complete the Contributor License Agreement ("CLA").
|
|
||||||
6. Ensure your pull request follows the [conventional commits format](https://www.conventionalcommits.org/en/v1.0.0/).
|
|
||||||
|
|
||||||
## Contributor License Agreement ("CLA")
|
|
||||||
In order to accept your pull request, we need you to submit a CLA. You only need
|
|
||||||
to do this once to work on any of Meta's open source projects.
|
|
||||||
|
|
||||||
Complete your CLA here: <https://code.facebook.com/cla>
|
|
||||||
|
|
||||||
## Issues
|
|
||||||
We use GitHub issues to track public bugs. Please ensure your description is
|
|
||||||
clear and has sufficient instructions to be able to reproduce the issue.
|
|
||||||
|
|
||||||
Meta has a [bounty program](http://facebook.com/whitehat/info) for the safe
|
|
||||||
disclosure of security bugs. In those cases, please go through the process
|
|
||||||
outlined on that page and do not file a public issue.
|
|
||||||
|
|
||||||
|
|
||||||
## Set up your development environment
|
|
||||||
|
|
||||||
We use [uv](https://github.com/astral-sh/uv) to manage python dependencies and virtual environments.
|
|
||||||
You can install `uv` by following this [guide](https://docs.astral.sh/uv/getting-started/installation/).
|
|
||||||
|
|
||||||
You can install the dependencies by running:
|
|
||||||
|
|
||||||
```bash
|
|
||||||
cd llama-stack
|
|
||||||
uv sync --extra dev
|
|
||||||
uv pip install -e .
|
|
||||||
source .venv/bin/activate
|
|
||||||
```
|
|
||||||
|
|
||||||
> [!NOTE]
|
|
||||||
> You can use a specific version of Python with `uv` by adding the `--python <version>` flag (e.g. `--python 3.11`)
|
|
||||||
> Otherwise, `uv` will automatically select a Python version according to the `requires-python` section of the `pyproject.toml`.
|
|
||||||
> For more info, see the [uv docs around Python versions](https://docs.astral.sh/uv/concepts/python-versions/).
|
|
||||||
|
|
||||||
Note that you can create a dotenv file `.env` that includes necessary environment variables:
|
|
||||||
```
|
|
||||||
LLAMA_STACK_BASE_URL=http://localhost:8321
|
|
||||||
LLAMA_STACK_CLIENT_LOG=debug
|
|
||||||
LLAMA_STACK_PORT=8321
|
|
||||||
LLAMA_STACK_CONFIG=<provider-name>
|
|
||||||
TAVILY_SEARCH_API_KEY=
|
|
||||||
BRAVE_SEARCH_API_KEY=
|
|
||||||
```
|
|
||||||
|
|
||||||
And then use this dotenv file when running client SDK tests via the following:
|
|
||||||
```bash
|
|
||||||
uv run --env-file .env -- pytest -v tests/integration/inference/test_text_inference.py --text-model=meta-llama/Llama-3.1-8B-Instruct
|
|
||||||
```
|
|
||||||
|
|
||||||
## Pre-commit Hooks
|
|
||||||
|
|
||||||
We use [pre-commit](https://pre-commit.com/) to run linting and formatting checks on your code. You can install the pre-commit hooks by running:
|
|
||||||
|
|
||||||
```bash
|
|
||||||
uv run pre-commit install
|
|
||||||
```
|
|
||||||
|
|
||||||
After that, pre-commit hooks will run automatically before each commit.
|
|
||||||
|
|
||||||
Alternatively, if you don't want to install the pre-commit hooks, you can run the checks manually by running:
|
|
||||||
|
|
||||||
```bash
|
|
||||||
uv run pre-commit run --all-files
|
|
||||||
```
|
|
||||||
|
|
||||||
> [!CAUTION]
|
|
||||||
> Before pushing your changes, make sure that the pre-commit hooks have passed successfully.
|
|
||||||
|
|
||||||
## Running tests
|
|
||||||
|
|
||||||
You can find the Llama Stack testing documentation here [here](tests/README.md).
|
|
||||||
|
|
||||||
## Adding a new dependency to the project
|
|
||||||
|
|
||||||
To add a new dependency to the project, you can use the `uv` command. For example, to add `foo` to the project, you can run:
|
|
||||||
|
|
||||||
```bash
|
|
||||||
uv add foo
|
|
||||||
uv sync
|
|
||||||
```
|
|
||||||
|
|
||||||
## Coding Style
|
|
||||||
|
|
||||||
* Comments should provide meaningful insights into the code. Avoid filler comments that simply
|
|
||||||
describe the next step, as they create unnecessary clutter, same goes for docstrings.
|
|
||||||
* Prefer comments to clarify surprising behavior and/or relationships between parts of the code
|
|
||||||
rather than explain what the next line of code does.
|
|
||||||
* Catching exceptions, prefer using a specific exception type rather than a broad catch-all like
|
|
||||||
`Exception`.
|
|
||||||
* Error messages should be prefixed with "Failed to ..."
|
|
||||||
* 4 spaces for indentation rather than tab
|
|
||||||
* When using `# noqa` to suppress a style or linter warning, include a comment explaining the
|
|
||||||
justification for bypassing the check.
|
|
||||||
* When using `# type: ignore` to suppress a mypy warning, include a comment explaining the
|
|
||||||
justification for bypassing the check.
|
|
||||||
* Don't use unicode characters in the codebase. ASCII-only is preferred for compatibility or
|
|
||||||
readability reasons.
|
|
||||||
|
|
||||||
## Common Tasks
|
|
||||||
|
|
||||||
Some tips about common tasks you work on while contributing to Llama Stack:
|
|
||||||
|
|
||||||
### Using `llama stack build`
|
|
||||||
|
|
||||||
Building a stack image (conda / docker) will use the production version of the `llama-stack` and `llama-stack-client` packages. If you are developing with a llama-stack repository checked out and need your code to be reflected in the stack image, set `LLAMA_STACK_DIR` and `LLAMA_STACK_CLIENT_DIR` to the appropriate checked out directories when running any of the `llama` CLI commands.
|
|
||||||
|
|
||||||
Example:
|
|
||||||
```bash
|
|
||||||
cd work/
|
|
||||||
git clone https://github.com/meta-llama/llama-stack.git
|
|
||||||
git clone https://github.com/meta-llama/llama-stack-client-python.git
|
|
||||||
cd llama-stack
|
|
||||||
LLAMA_STACK_DIR=$(pwd) LLAMA_STACK_CLIENT_DIR=../llama-stack-client-python llama stack build --template <...>
|
|
||||||
```
|
|
||||||
|
|
||||||
|
|
||||||
### Updating Provider Configurations
|
|
||||||
|
|
||||||
If you have made changes to a provider's configuration in any form (introducing a new config key, or changing models, etc.), you should run `./scripts/distro_codegen.py` to re-generate various YAML files as well as the documentation. You should not change `docs/source/.../distributions/` files manually as they are auto-generated.
|
|
||||||
|
|
||||||
### Building the Documentation
|
|
||||||
|
|
||||||
If you are making changes to the documentation at [https://llama-stack.readthedocs.io/en/latest/](https://llama-stack.readthedocs.io/en/latest/), you can use the following command to build the documentation and preview your changes. You will need [Sphinx](https://www.sphinx-doc.org/en/master/) and the readthedocs theme.
|
|
||||||
|
|
||||||
```bash
|
|
||||||
# This rebuilds the documentation pages.
|
|
||||||
uv run --group docs make -C docs/ html
|
|
||||||
|
|
||||||
# This will start a local server (usually at http://127.0.0.1:8000) that automatically rebuilds and refreshes when you make changes to the documentation.
|
|
||||||
uv run --group docs sphinx-autobuild docs/source docs/build/html --write-all
|
|
||||||
```
|
|
||||||
|
|
||||||
### Update API Documentation
|
|
||||||
|
|
||||||
If you modify or add new API endpoints, update the API documentation accordingly. You can do this by running the following command:
|
|
||||||
|
|
||||||
```bash
|
|
||||||
uv run ./docs/openapi_generator/run_openapi_generator.sh
|
|
||||||
```
|
|
||||||
|
|
||||||
The generated API documentation will be available in `docs/_static/`. Make sure to review the changes before committing.
|
|
||||||
|
|
||||||
## License
|
|
||||||
By contributing to Llama, you agree that your contributions will be licensed
|
|
||||||
under the LICENSE file in the root directory of this source tree.
|
|
||||||
22
LICENSE
|
|
@ -1,22 +0,0 @@
|
||||||
MIT License
|
|
||||||
|
|
||||||
Copyright (c) Meta Platforms, Inc. and affiliates
|
|
||||||
|
|
||||||
Permission is hereby granted, free of charge, to any person obtaining
|
|
||||||
a copy of this software and associated documentation files (the
|
|
||||||
"Software"), to deal in the Software without restriction, including
|
|
||||||
without limitation the rights to use, copy, modify, merge, publish,
|
|
||||||
distribute, sublicense, and/or sell copies of the Software, and to
|
|
||||||
permit persons to whom the Software is furnished to do so, subject to
|
|
||||||
the following conditions:
|
|
||||||
|
|
||||||
The above copyright notice and this permission notice shall be
|
|
||||||
included in all copies or substantial portions of the Software.
|
|
||||||
|
|
||||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
|
||||||
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
|
||||||
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
|
||||||
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
|
||||||
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
|
||||||
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
|
||||||
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
|
||||||
|
|
@ -1,9 +0,0 @@
|
||||||
include pyproject.toml
|
|
||||||
include llama_stack/models/llama/llama3/tokenizer.model
|
|
||||||
include llama_stack/models/llama/llama4/tokenizer.model
|
|
||||||
include llama_stack/distribution/*.sh
|
|
||||||
include llama_stack/cli/scripts/*.sh
|
|
||||||
include llama_stack/templates/*/*.yaml
|
|
||||||
include llama_stack/providers/tests/test_cases/inference/*.json
|
|
||||||
include llama_stack/models/llama/*/*.md
|
|
||||||
include llama_stack/tests/integration/*.jpg
|
|
||||||
177
README.md
|
|
@ -1,177 +0,0 @@
|
||||||
# Llama Stack
|
|
||||||
|
|
||||||
[](https://pypi.org/project/llama_stack/)
|
|
||||||
[](https://pypi.org/project/llama-stack/)
|
|
||||||
[](https://github.com/meta-llama/llama-stack/blob/main/LICENSE)
|
|
||||||
[](https://discord.gg/llama-stack)
|
|
||||||
[](https://github.com/meta-llama/llama-stack/actions/workflows/unit-tests.yml?query=branch%3Amain)
|
|
||||||
[](https://github.com/meta-llama/llama-stack/actions/workflows/integration-tests.yml?query=branch%3Amain)
|
|
||||||
|
|
||||||
[**Quick Start**](https://llama-stack.readthedocs.io/en/latest/getting_started/index.html) | [**Documentation**](https://llama-stack.readthedocs.io/en/latest/index.html) | [**Colab Notebook**](./docs/getting_started.ipynb) | [**Discord**](https://discord.gg/llama-stack)
|
|
||||||
|
|
||||||
### ✨🎉 Llama 4 Support 🎉✨
|
|
||||||
We released [Version 0.2.0](https://github.com/meta-llama/llama-stack/releases/tag/v0.2.0) with support for the Llama 4 herd of models released by Meta.
|
|
||||||
|
|
||||||
<details>
|
|
||||||
|
|
||||||
<summary>👋 Click here to see how to run Llama 4 models on Llama Stack </summary>
|
|
||||||
|
|
||||||
\
|
|
||||||
*Note you need 8xH100 GPU-host to run these models*
|
|
||||||
|
|
||||||
```bash
|
|
||||||
pip install -U llama_stack
|
|
||||||
|
|
||||||
MODEL="Llama-4-Scout-17B-16E-Instruct"
|
|
||||||
# get meta url from llama.com
|
|
||||||
llama model download --source meta --model-id $MODEL --meta-url <META_URL>
|
|
||||||
|
|
||||||
# start a llama stack server
|
|
||||||
INFERENCE_MODEL=meta-llama/$MODEL llama stack build --run --template meta-reference-gpu
|
|
||||||
|
|
||||||
# install client to interact with the server
|
|
||||||
pip install llama-stack-client
|
|
||||||
```
|
|
||||||
### CLI
|
|
||||||
```bash
|
|
||||||
# Run a chat completion
|
|
||||||
llama-stack-client --endpoint http://localhost:8321 \
|
|
||||||
inference chat-completion \
|
|
||||||
--model-id meta-llama/$MODEL \
|
|
||||||
--message "write a haiku for meta's llama 4 models"
|
|
||||||
|
|
||||||
ChatCompletionResponse(
|
|
||||||
completion_message=CompletionMessage(content="Whispers in code born\nLlama's gentle, wise heartbeat\nFuture's soft unfold", role='assistant', stop_reason='end_of_turn', tool_calls=[]),
|
|
||||||
logprobs=None,
|
|
||||||
metrics=[Metric(metric='prompt_tokens', value=21.0, unit=None), Metric(metric='completion_tokens', value=28.0, unit=None), Metric(metric='total_tokens', value=49.0, unit=None)]
|
|
||||||
)
|
|
||||||
```
|
|
||||||
### Python SDK
|
|
||||||
```python
|
|
||||||
from llama_stack_client import LlamaStackClient
|
|
||||||
|
|
||||||
client = LlamaStackClient(base_url=f"http://localhost:8321")
|
|
||||||
|
|
||||||
model_id = "meta-llama/Llama-4-Scout-17B-16E-Instruct"
|
|
||||||
prompt = "Write a haiku about coding"
|
|
||||||
|
|
||||||
print(f"User> {prompt}")
|
|
||||||
response = client.inference.chat_completion(
|
|
||||||
model_id=model_id,
|
|
||||||
messages=[
|
|
||||||
{"role": "system", "content": "You are a helpful assistant."},
|
|
||||||
{"role": "user", "content": prompt},
|
|
||||||
],
|
|
||||||
)
|
|
||||||
print(f"Assistant> {response.completion_message.content}")
|
|
||||||
```
|
|
||||||
As more providers start supporting Llama 4, you can use them in Llama Stack as well. We are adding to the list. Stay tuned!
|
|
||||||
|
|
||||||
|
|
||||||
</details>
|
|
||||||
|
|
||||||
### 🚀 One-Line Installer 🚀
|
|
||||||
|
|
||||||
To try Llama Stack locally, run:
|
|
||||||
|
|
||||||
```bash
|
|
||||||
curl -LsSf https://github.com/meta-llama/llama-stack/raw/main/install.sh | sh
|
|
||||||
```
|
|
||||||
|
|
||||||
### Overview
|
|
||||||
|
|
||||||
Llama Stack standardizes the core building blocks that simplify AI application development. It codifies best practices across the Llama ecosystem. More specifically, it provides
|
|
||||||
|
|
||||||
- **Unified API layer** for Inference, RAG, Agents, Tools, Safety, Evals, and Telemetry.
|
|
||||||
- **Plugin architecture** to support the rich ecosystem of different API implementations in various environments, including local development, on-premises, cloud, and mobile.
|
|
||||||
- **Prepackaged verified distributions** which offer a one-stop solution for developers to get started quickly and reliably in any environment.
|
|
||||||
- **Multiple developer interfaces** like CLI and SDKs for Python, Typescript, iOS, and Android.
|
|
||||||
- **Standalone applications** as examples for how to build production-grade AI applications with Llama Stack.
|
|
||||||
|
|
||||||
<div style="text-align: center;">
|
|
||||||
<img
|
|
||||||
src="https://github.com/user-attachments/assets/33d9576d-95ea-468d-95e2-8fa233205a50"
|
|
||||||
width="480"
|
|
||||||
title="Llama Stack"
|
|
||||||
alt="Llama Stack"
|
|
||||||
/>
|
|
||||||
</div>
|
|
||||||
|
|
||||||
### Llama Stack Benefits
|
|
||||||
- **Flexible Options**: Developers can choose their preferred infrastructure without changing APIs and enjoy flexible deployment choices.
|
|
||||||
- **Consistent Experience**: With its unified APIs, Llama Stack makes it easier to build, test, and deploy AI applications with consistent application behavior.
|
|
||||||
- **Robust Ecosystem**: Llama Stack is already integrated with distribution partners (cloud providers, hardware vendors, and AI-focused companies) that offer tailored infrastructure, software, and services for deploying Llama models.
|
|
||||||
|
|
||||||
By reducing friction and complexity, Llama Stack empowers developers to focus on what they do best: building transformative generative AI applications.
|
|
||||||
|
|
||||||
### API Providers
|
|
||||||
Here is a list of the various API providers and available distributions that can help developers get started easily with Llama Stack.
|
|
||||||
|
|
||||||
| **API Provider Builder** | **Environments** | **Agents** | **Inference** | **Memory** | **Safety** | **Telemetry** | **Post Training** |
|
|
||||||
|:------------------------:|:----------------------:|:----------:|:-------------:|:----------:|:----------:|:-------------:|:-----------------:|
|
|
||||||
| Meta Reference | Single Node | ✅ | ✅ | ✅ | ✅ | ✅ | |
|
|
||||||
| SambaNova | Hosted | | ✅ | | ✅ | | |
|
|
||||||
| Cerebras | Hosted | | ✅ | | | | |
|
|
||||||
| Fireworks | Hosted | ✅ | ✅ | ✅ | | | |
|
|
||||||
| AWS Bedrock | Hosted | | ✅ | | ✅ | | |
|
|
||||||
| Together | Hosted | ✅ | ✅ | | ✅ | | |
|
|
||||||
| Groq | Hosted | | ✅ | | | | |
|
|
||||||
| Ollama | Single Node | | ✅ | | | | |
|
|
||||||
| TGI | Hosted and Single Node | | ✅ | | | | |
|
|
||||||
| NVIDIA NIM | Hosted and Single Node | | ✅ | | | | |
|
|
||||||
| Chroma | Single Node | | | ✅ | | | |
|
|
||||||
| PG Vector | Single Node | | | ✅ | | | |
|
|
||||||
| PyTorch ExecuTorch | On-device iOS | ✅ | ✅ | | | | |
|
|
||||||
| vLLM | Hosted and Single Node | | ✅ | | | | |
|
|
||||||
| OpenAI | Hosted | | ✅ | | | | |
|
|
||||||
| Anthropic | Hosted | | ✅ | | | | |
|
|
||||||
| Gemini | Hosted | | ✅ | | | | |
|
|
||||||
| watsonx | Hosted | | ✅ | | | | |
|
|
||||||
| HuggingFace | Single Node | | | | | | ✅ |
|
|
||||||
| TorchTune | Single Node | | | | | | ✅ |
|
|
||||||
| NVIDIA NEMO | Hosted | | | | | | ✅ |
|
|
||||||
|
|
||||||
|
|
||||||
### Distributions
|
|
||||||
|
|
||||||
A Llama Stack Distribution (or "distro") is a pre-configured bundle of provider implementations for each API component. Distributions make it easy to get started with a specific deployment scenario - you can begin with a local development setup (eg. ollama) and seamlessly transition to production (eg. Fireworks) without changing your application code. Here are some of the distributions we support:
|
|
||||||
|
|
||||||
| **Distribution** | **Llama Stack Docker** | Start This Distribution |
|
|
||||||
|:---------------------------------------------:|:-------------------------------------------------------------------------------------------------------------------------------------------------------------:|:------------------------------------------------------------------------------------------------------------------------:|
|
|
||||||
| Meta Reference | [llamastack/distribution-meta-reference-gpu](https://hub.docker.com/repository/docker/llamastack/distribution-meta-reference-gpu/general) | [Guide](https://llama-stack.readthedocs.io/en/latest/distributions/self_hosted_distro/meta-reference-gpu.html) |
|
|
||||||
| SambaNova | [llamastack/distribution-sambanova](https://hub.docker.com/repository/docker/llamastack/distribution-sambanova/general) | [Guide](https://llama-stack.readthedocs.io/en/latest/distributions/self_hosted_distro/sambanova.html) |
|
|
||||||
| Cerebras | [llamastack/distribution-cerebras](https://hub.docker.com/repository/docker/llamastack/distribution-cerebras/general) | [Guide](https://llama-stack.readthedocs.io/en/latest/distributions/self_hosted_distro/cerebras.html) |
|
|
||||||
| Ollama | [llamastack/distribution-ollama](https://hub.docker.com/repository/docker/llamastack/distribution-ollama/general) | [Guide](https://llama-stack.readthedocs.io/en/latest/distributions/self_hosted_distro/ollama.html) |
|
|
||||||
| TGI | [llamastack/distribution-tgi](https://hub.docker.com/repository/docker/llamastack/distribution-tgi/general) | [Guide](https://llama-stack.readthedocs.io/en/latest/distributions/self_hosted_distro/tgi.html) |
|
|
||||||
| Together | [llamastack/distribution-together](https://hub.docker.com/repository/docker/llamastack/distribution-together/general) | [Guide](https://llama-stack.readthedocs.io/en/latest/distributions/self_hosted_distro/together.html) |
|
|
||||||
| Fireworks | [llamastack/distribution-fireworks](https://hub.docker.com/repository/docker/llamastack/distribution-fireworks/general) | [Guide](https://llama-stack.readthedocs.io/en/latest/distributions/self_hosted_distro/fireworks.html) |
|
|
||||||
| vLLM | [llamastack/distribution-remote-vllm](https://hub.docker.com/repository/docker/llamastack/distribution-remote-vllm/general) | [Guide](https://llama-stack.readthedocs.io/en/latest/distributions/self_hosted_distro/remote-vllm.html) |
|
|
||||||
|
|
||||||
|
|
||||||
### Documentation
|
|
||||||
|
|
||||||
Please checkout our [Documentation](https://llama-stack.readthedocs.io/en/latest/index.html) page for more details.
|
|
||||||
|
|
||||||
* CLI references
|
|
||||||
* [llama (server-side) CLI Reference](https://llama-stack.readthedocs.io/en/latest/references/llama_cli_reference/index.html): Guide for using the `llama` CLI to work with Llama models (download, study prompts), and building/starting a Llama Stack distribution.
|
|
||||||
* [llama (client-side) CLI Reference](https://llama-stack.readthedocs.io/en/latest/references/llama_stack_client_cli_reference.html): Guide for using the `llama-stack-client` CLI, which allows you to query information about the distribution.
|
|
||||||
* Getting Started
|
|
||||||
* [Quick guide to start a Llama Stack server](https://llama-stack.readthedocs.io/en/latest/getting_started/index.html).
|
|
||||||
* [Jupyter notebook](./docs/getting_started.ipynb) to walk-through how to use simple text and vision inference llama_stack_client APIs
|
|
||||||
* The complete Llama Stack lesson [Colab notebook](https://colab.research.google.com/drive/1dtVmxotBsI4cGZQNsJRYPrLiDeT0Wnwt) of the new [Llama 3.2 course on Deeplearning.ai](https://learn.deeplearning.ai/courses/introducing-multimodal-llama-3-2/lesson/8/llama-stack).
|
|
||||||
* A [Zero-to-Hero Guide](https://github.com/meta-llama/llama-stack/tree/main/docs/zero_to_hero_guide) that guide you through all the key components of llama stack with code samples.
|
|
||||||
* [Contributing](CONTRIBUTING.md)
|
|
||||||
* [Adding a new API Provider](https://llama-stack.readthedocs.io/en/latest/contributing/new_api_provider.html) to walk-through how to add a new API provider.
|
|
||||||
|
|
||||||
### Llama Stack Client SDKs
|
|
||||||
|
|
||||||
| **Language** | **Client SDK** | **Package** |
|
|
||||||
| :----: | :----: | :----: |
|
|
||||||
| Python | [llama-stack-client-python](https://github.com/meta-llama/llama-stack-client-python) | [](https://pypi.org/project/llama_stack_client/)
|
|
||||||
| Swift | [llama-stack-client-swift](https://github.com/meta-llama/llama-stack-client-swift) | [](https://swiftpackageindex.com/meta-llama/llama-stack-client-swift)
|
|
||||||
| Typescript | [llama-stack-client-typescript](https://github.com/meta-llama/llama-stack-client-typescript) | [](https://npmjs.org/package/llama-stack-client)
|
|
||||||
| Kotlin | [llama-stack-client-kotlin](https://github.com/meta-llama/llama-stack-client-kotlin) | [](https://central.sonatype.com/artifact/com.llama.llamastack/llama-stack-client-kotlin)
|
|
||||||
|
|
||||||
Check out our client SDKs for connecting to a Llama Stack server in your preferred language, you can choose from [python](https://github.com/meta-llama/llama-stack-client-python), [typescript](https://github.com/meta-llama/llama-stack-client-typescript), [swift](https://github.com/meta-llama/llama-stack-client-swift), and [kotlin](https://github.com/meta-llama/llama-stack-client-kotlin) programming languages to quickly build your applications.
|
|
||||||
|
|
||||||
You can find more example scripts with client SDKs to talk with the Llama Stack server in our [llama-stack-apps](https://github.com/meta-llama/llama-stack-apps/tree/main/examples) repo.
|
|
||||||
|
|
@ -1,5 +0,0 @@
|
||||||
# Security Policy
|
|
||||||
|
|
||||||
## Reporting a Vulnerability
|
|
||||||
|
|
||||||
Please report vulnerabilities to our bug bounty program at https://bugbounty.meta.com/
|
|
||||||
|
|
@ -1,20 +0,0 @@
|
||||||
# Minimal makefile for Sphinx documentation
|
|
||||||
#
|
|
||||||
|
|
||||||
# You can set these variables from the command line, and also
|
|
||||||
# from the environment for the first two.
|
|
||||||
SPHINXOPTS ?=
|
|
||||||
SPHINXBUILD ?= sphinx-build
|
|
||||||
SOURCEDIR = source
|
|
||||||
BUILDDIR = _build
|
|
||||||
|
|
||||||
# Put it first so that "make" without argument is like "make help".
|
|
||||||
help:
|
|
||||||
@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
|
|
||||||
|
|
||||||
.PHONY: help Makefile
|
|
||||||
|
|
||||||
# Catch-all target: route all unknown targets to Sphinx using the new
|
|
||||||
# "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS).
|
|
||||||
%: Makefile
|
|
||||||
@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
|
|
||||||
35
docs/_static/css/my_theme.css
vendored
|
|
@ -1,35 +0,0 @@
|
||||||
@import url("theme.css");
|
|
||||||
|
|
||||||
.wy-nav-content {
|
|
||||||
max-width: 90%;
|
|
||||||
}
|
|
||||||
|
|
||||||
.wy-nav-side {
|
|
||||||
/* background: linear-gradient(45deg, #2980B9, #16A085); */
|
|
||||||
background: linear-gradient(90deg, #332735, #1b263c);
|
|
||||||
}
|
|
||||||
|
|
||||||
.wy-side-nav-search {
|
|
||||||
background-color: transparent !important;
|
|
||||||
}
|
|
||||||
|
|
||||||
.hide-title h1 {
|
|
||||||
display: none;
|
|
||||||
}
|
|
||||||
|
|
||||||
h2, h3, h4 {
|
|
||||||
font-weight: normal;
|
|
||||||
}
|
|
||||||
html[data-theme="dark"] .rst-content div[class^="highlight"] {
|
|
||||||
background-color: #0b0b0b;
|
|
||||||
}
|
|
||||||
pre {
|
|
||||||
white-space: pre-wrap !important;
|
|
||||||
word-break: break-all;
|
|
||||||
}
|
|
||||||
|
|
||||||
[data-theme="dark"] .mermaid {
|
|
||||||
background-color: #f4f4f6 !important;
|
|
||||||
border-radius: 6px;
|
|
||||||
padding: 0.5em;
|
|
||||||
}
|
|
||||||
32
docs/_static/js/detect_theme.js
vendored
|
|
@ -1,32 +0,0 @@
|
||||||
document.addEventListener("DOMContentLoaded", function () {
|
|
||||||
const prefersDark = window.matchMedia("(prefers-color-scheme: dark)").matches;
|
|
||||||
const htmlElement = document.documentElement;
|
|
||||||
|
|
||||||
// Check if theme is saved in localStorage
|
|
||||||
const savedTheme = localStorage.getItem("sphinx-rtd-theme");
|
|
||||||
|
|
||||||
if (savedTheme) {
|
|
||||||
// Use the saved theme preference
|
|
||||||
htmlElement.setAttribute("data-theme", savedTheme);
|
|
||||||
document.body.classList.toggle("dark", savedTheme === "dark");
|
|
||||||
} else {
|
|
||||||
// Fall back to system preference
|
|
||||||
const theme = prefersDark ? "dark" : "light";
|
|
||||||
htmlElement.setAttribute("data-theme", theme);
|
|
||||||
document.body.classList.toggle("dark", theme === "dark");
|
|
||||||
// Save initial preference
|
|
||||||
localStorage.setItem("sphinx-rtd-theme", theme);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Listen for theme changes from the existing toggle
|
|
||||||
const observer = new MutationObserver(function(mutations) {
|
|
||||||
mutations.forEach(function(mutation) {
|
|
||||||
if (mutation.attributeName === "data-theme") {
|
|
||||||
const currentTheme = htmlElement.getAttribute("data-theme");
|
|
||||||
localStorage.setItem("sphinx-rtd-theme", currentTheme);
|
|
||||||
}
|
|
||||||
});
|
|
||||||
});
|
|
||||||
|
|
||||||
observer.observe(htmlElement, { attributes: true });
|
|
||||||
});
|
|
||||||
BIN
docs/_static/llama-stack-logo.png
vendored
|
Before Width: | Height: | Size: 70 KiB |
14345
docs/_static/llama-stack-spec.html
vendored
10042
docs/_static/llama-stack-spec.yaml
vendored
BIN
docs/_static/llama-stack.png
vendored
|
Before Width: | Height: | Size: 196 KiB |
|
Before Width: | Height: | Size: 33 KiB |
|
Before Width: | Height: | Size: 37 KiB |
|
Before Width: | Height: | Size: 56 KiB |
BIN
docs/_static/remote_or_local.gif
vendored
|
Before Width: | Height: | Size: 204 KiB |
BIN
docs/_static/safety_system.webp
vendored
|
Before Width: | Height: | Size: 31 KiB |
|
|
@ -1,24 +0,0 @@
|
||||||
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
||||||
# All rights reserved.
|
|
||||||
#
|
|
||||||
# This source code is licensed under the terms described in the LICENSE file in
|
|
||||||
# the root directory of this source tree.
|
|
||||||
|
|
||||||
import os
|
|
||||||
import time
|
|
||||||
|
|
||||||
|
|
||||||
def pytest_collection_modifyitems(items):
|
|
||||||
for item in items:
|
|
||||||
item.name = item.name.replace(' ', '_')
|
|
||||||
|
|
||||||
|
|
||||||
def pytest_runtest_teardown(item):
|
|
||||||
interval_seconds = os.getenv("LLAMA_STACK_TEST_INTERVAL_SECONDS")
|
|
||||||
if interval_seconds:
|
|
||||||
time.sleep(float(interval_seconds))
|
|
||||||
|
|
||||||
|
|
||||||
def pytest_configure(config):
|
|
||||||
config.option.tbstyle = "short"
|
|
||||||
config.option.disable_warnings = True
|
|
||||||
|
|
@ -1,7 +0,0 @@
|
||||||
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
||||||
# All rights reserved.
|
|
||||||
#
|
|
||||||
# This source code is licensed under the terms described in the LICENSE file in
|
|
||||||
# the root directory of this source tree.
|
|
||||||
|
|
||||||
sphinx-autobuild --write-all source build/html --watch source/
|
|
||||||
BIN
docs/dog.jpg
|
Before Width: | Height: | Size: 39 KiB |
|
|
@ -1,5 +0,0 @@
|
||||||
Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
||||||
All rights reserved.
|
|
||||||
|
|
||||||
This source code is licensed under the terms described in the LICENSE file in
|
|
||||||
the root directory of this source tree.
|
|
||||||
|
|
@ -1,35 +0,0 @@
|
||||||
@ECHO OFF
|
|
||||||
|
|
||||||
pushd %~dp0
|
|
||||||
|
|
||||||
REM Command file for Sphinx documentation
|
|
||||||
|
|
||||||
if "%SPHINXBUILD%" == "" (
|
|
||||||
set SPHINXBUILD=sphinx-build
|
|
||||||
)
|
|
||||||
set SOURCEDIR=.
|
|
||||||
set BUILDDIR=_build
|
|
||||||
|
|
||||||
%SPHINXBUILD% >NUL 2>NUL
|
|
||||||
if errorlevel 9009 (
|
|
||||||
echo.
|
|
||||||
echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
|
|
||||||
echo.installed, then set the SPHINXBUILD environment variable to point
|
|
||||||
echo.to the full path of the 'sphinx-build' executable. Alternatively you
|
|
||||||
echo.may add the Sphinx directory to PATH.
|
|
||||||
echo.
|
|
||||||
echo.If you don't have Sphinx installed, grab it from
|
|
||||||
echo.https://www.sphinx-doc.org/
|
|
||||||
exit /b 1
|
|
||||||
)
|
|
||||||
|
|
||||||
if "%1" == "" goto help
|
|
||||||
|
|
||||||
%SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
|
|
||||||
goto end
|
|
||||||
|
|
||||||
:help
|
|
||||||
%SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
|
|
||||||
|
|
||||||
:end
|
|
||||||
popd
|
|
||||||
|
|
@ -1 +0,0 @@
|
||||||
The RFC Specification (OpenAPI format) is generated from the set of API endpoints located in `llama_stack/distribution/server/endpoints.py` using the `generate.py` utility.
|
|
||||||
|
|
@ -1,91 +0,0 @@
|
||||||
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
||||||
# All rights reserved.
|
|
||||||
#
|
|
||||||
# This source code is licensed under the terms described in the LICENSE file in
|
|
||||||
# the root directory of this source tree.
|
|
||||||
|
|
||||||
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
||||||
# All rights reserved.
|
|
||||||
#
|
|
||||||
# This source code is licensed under the terms described found in the
|
|
||||||
# LICENSE file in the root directory of this source tree.
|
|
||||||
|
|
||||||
from datetime import datetime
|
|
||||||
from pathlib import Path
|
|
||||||
import sys
|
|
||||||
import fire
|
|
||||||
import ruamel.yaml as yaml
|
|
||||||
|
|
||||||
from llama_stack.apis.version import LLAMA_STACK_API_VERSION # noqa: E402
|
|
||||||
from llama_stack.distribution.stack import LlamaStack # noqa: E402
|
|
||||||
|
|
||||||
from .pyopenapi.options import Options # noqa: E402
|
|
||||||
from .pyopenapi.specification import Info, Server # noqa: E402
|
|
||||||
from .pyopenapi.utility import Specification, validate_api # noqa: E402
|
|
||||||
|
|
||||||
|
|
||||||
def str_presenter(dumper, data):
|
|
||||||
if data.startswith(f"/{LLAMA_STACK_API_VERSION}") or data.startswith(
|
|
||||||
"#/components/schemas/"
|
|
||||||
):
|
|
||||||
style = None
|
|
||||||
else:
|
|
||||||
style = ">" if "\n" in data or len(data) > 40 else None
|
|
||||||
return dumper.represent_scalar("tag:yaml.org,2002:str", data, style=style)
|
|
||||||
|
|
||||||
|
|
||||||
def main(output_dir: str):
|
|
||||||
output_dir = Path(output_dir)
|
|
||||||
if not output_dir.exists():
|
|
||||||
raise ValueError(f"Directory {output_dir} does not exist")
|
|
||||||
|
|
||||||
# Validate API protocols before generating spec
|
|
||||||
return_type_errors = validate_api()
|
|
||||||
if return_type_errors:
|
|
||||||
print("\nAPI Method Return Type Validation Errors:\n")
|
|
||||||
for error in return_type_errors:
|
|
||||||
print(error, file=sys.stderr)
|
|
||||||
sys.exit(1)
|
|
||||||
now = str(datetime.now())
|
|
||||||
print(
|
|
||||||
"Converting the spec to YAML (openapi.yaml) and HTML (openapi.html) at " + now
|
|
||||||
)
|
|
||||||
print("")
|
|
||||||
|
|
||||||
spec = Specification(
|
|
||||||
LlamaStack,
|
|
||||||
Options(
|
|
||||||
server=Server(url="http://any-hosted-llama-stack.com"),
|
|
||||||
info=Info(
|
|
||||||
title="Llama Stack Specification",
|
|
||||||
version=LLAMA_STACK_API_VERSION,
|
|
||||||
description="""This is the specification of the Llama Stack that provides
|
|
||||||
a set of endpoints and their corresponding interfaces that are tailored to
|
|
||||||
best leverage Llama Models.""",
|
|
||||||
),
|
|
||||||
include_standard_error_responses=True,
|
|
||||||
),
|
|
||||||
)
|
|
||||||
|
|
||||||
with open(output_dir / "llama-stack-spec.yaml", "w", encoding="utf-8") as fp:
|
|
||||||
y = yaml.YAML()
|
|
||||||
y.default_flow_style = False
|
|
||||||
y.block_seq_indent = 2
|
|
||||||
y.map_indent = 2
|
|
||||||
y.sequence_indent = 4
|
|
||||||
y.sequence_dash_offset = 2
|
|
||||||
y.width = 80
|
|
||||||
y.allow_unicode = True
|
|
||||||
y.representer.add_representer(str, str_presenter)
|
|
||||||
|
|
||||||
y.dump(
|
|
||||||
spec.get_json(),
|
|
||||||
fp,
|
|
||||||
)
|
|
||||||
|
|
||||||
with open(output_dir / "llama-stack-spec.html", "w") as fp:
|
|
||||||
spec.write_html(fp, pretty_print=True)
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
fire.Fire(main)
|
|
||||||
|
|
@ -1 +0,0 @@
|
||||||
This is forked from https://github.com/hunyadi/pyopenapi
|
|
||||||
|
|
@ -1,5 +0,0 @@
|
||||||
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
||||||
# All rights reserved.
|
|
||||||
#
|
|
||||||
# This source code is licensed under the terms described in the LICENSE file in
|
|
||||||
# the root directory of this source tree.
|
|
||||||
|
|
@ -1,938 +0,0 @@
|
||||||
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
||||||
# All rights reserved.
|
|
||||||
#
|
|
||||||
# This source code is licensed under the terms described in the LICENSE file in
|
|
||||||
# the root directory of this source tree.
|
|
||||||
|
|
||||||
import hashlib
|
|
||||||
import ipaddress
|
|
||||||
import types
|
|
||||||
import typing
|
|
||||||
from dataclasses import make_dataclass
|
|
||||||
from typing import Any, Dict, Set, Union
|
|
||||||
|
|
||||||
from llama_stack.apis.datatypes import Error
|
|
||||||
from llama_stack.strong_typing.core import JsonType
|
|
||||||
from llama_stack.strong_typing.docstring import Docstring, parse_type
|
|
||||||
from llama_stack.strong_typing.inspection import (
|
|
||||||
is_generic_list,
|
|
||||||
is_type_optional,
|
|
||||||
is_type_union,
|
|
||||||
unwrap_generic_list,
|
|
||||||
unwrap_optional_type,
|
|
||||||
unwrap_union_types,
|
|
||||||
)
|
|
||||||
from llama_stack.strong_typing.name import python_type_to_name
|
|
||||||
from llama_stack.strong_typing.schema import (
|
|
||||||
get_schema_identifier,
|
|
||||||
JsonSchemaGenerator,
|
|
||||||
register_schema,
|
|
||||||
Schema,
|
|
||||||
SchemaOptions,
|
|
||||||
)
|
|
||||||
from typing import get_origin, get_args
|
|
||||||
from typing import Annotated
|
|
||||||
from fastapi import UploadFile
|
|
||||||
from llama_stack.strong_typing.serialization import json_dump_string, object_to_json
|
|
||||||
|
|
||||||
from .operations import (
|
|
||||||
EndpointOperation,
|
|
||||||
get_endpoint_events,
|
|
||||||
get_endpoint_operations,
|
|
||||||
HTTPMethod,
|
|
||||||
)
|
|
||||||
from .options import *
|
|
||||||
from .specification import (
|
|
||||||
Components,
|
|
||||||
Document,
|
|
||||||
Example,
|
|
||||||
ExampleRef,
|
|
||||||
MediaType,
|
|
||||||
Operation,
|
|
||||||
Parameter,
|
|
||||||
ParameterLocation,
|
|
||||||
PathItem,
|
|
||||||
RequestBody,
|
|
||||||
Response,
|
|
||||||
ResponseRef,
|
|
||||||
SchemaOrRef,
|
|
||||||
SchemaRef,
|
|
||||||
Tag,
|
|
||||||
TagGroup,
|
|
||||||
)
|
|
||||||
|
|
||||||
register_schema(
|
|
||||||
ipaddress.IPv4Address,
|
|
||||||
schema={
|
|
||||||
"type": "string",
|
|
||||||
"format": "ipv4",
|
|
||||||
"title": "IPv4 address",
|
|
||||||
"description": "IPv4 address, according to dotted-quad ABNF syntax as defined in RFC 2673, section 3.2.",
|
|
||||||
},
|
|
||||||
examples=["192.0.2.0", "198.51.100.1", "203.0.113.255"],
|
|
||||||
)
|
|
||||||
|
|
||||||
register_schema(
|
|
||||||
ipaddress.IPv6Address,
|
|
||||||
schema={
|
|
||||||
"type": "string",
|
|
||||||
"format": "ipv6",
|
|
||||||
"title": "IPv6 address",
|
|
||||||
"description": "IPv6 address, as defined in RFC 2373, section 2.2.",
|
|
||||||
},
|
|
||||||
examples=[
|
|
||||||
"FEDC:BA98:7654:3210:FEDC:BA98:7654:3210",
|
|
||||||
"1080:0:0:0:8:800:200C:417A",
|
|
||||||
"1080::8:800:200C:417A",
|
|
||||||
"FF01::101",
|
|
||||||
"::1",
|
|
||||||
],
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
def http_status_to_string(status_code: HTTPStatusCode) -> str:
|
|
||||||
"Converts an HTTP status code to a string."
|
|
||||||
|
|
||||||
if isinstance(status_code, HTTPStatus):
|
|
||||||
return str(status_code.value)
|
|
||||||
elif isinstance(status_code, int):
|
|
||||||
return str(status_code)
|
|
||||||
elif isinstance(status_code, str):
|
|
||||||
return status_code
|
|
||||||
else:
|
|
||||||
raise TypeError("expected: HTTP status code")
|
|
||||||
|
|
||||||
|
|
||||||
class SchemaBuilder:
|
|
||||||
schema_generator: JsonSchemaGenerator
|
|
||||||
schemas: Dict[str, Schema]
|
|
||||||
|
|
||||||
def __init__(self, schema_generator: JsonSchemaGenerator) -> None:
|
|
||||||
self.schema_generator = schema_generator
|
|
||||||
self.schemas = {}
|
|
||||||
|
|
||||||
def classdef_to_schema(self, typ: type) -> Schema:
|
|
||||||
"""
|
|
||||||
Converts a type to a JSON schema.
|
|
||||||
For nested types found in the type hierarchy, adds the type to the schema registry in the OpenAPI specification section `components`.
|
|
||||||
"""
|
|
||||||
|
|
||||||
type_schema, type_definitions = self.schema_generator.classdef_to_schema(typ)
|
|
||||||
|
|
||||||
# append schema to list of known schemas, to be used in OpenAPI's Components Object section
|
|
||||||
for ref, schema in type_definitions.items():
|
|
||||||
self._add_ref(ref, schema)
|
|
||||||
|
|
||||||
return type_schema
|
|
||||||
|
|
||||||
def classdef_to_named_schema(self, name: str, typ: type) -> Schema:
|
|
||||||
schema = self.classdef_to_schema(typ)
|
|
||||||
self._add_ref(name, schema)
|
|
||||||
return schema
|
|
||||||
|
|
||||||
def classdef_to_ref(self, typ: type) -> SchemaOrRef:
|
|
||||||
"""
|
|
||||||
Converts a type to a JSON schema, and if possible, returns a schema reference.
|
|
||||||
For composite types (such as classes), adds the type to the schema registry in the OpenAPI specification section `components`.
|
|
||||||
"""
|
|
||||||
|
|
||||||
type_schema = self.classdef_to_schema(typ)
|
|
||||||
if typ is str or typ is int or typ is float:
|
|
||||||
# represent simple types as themselves
|
|
||||||
return type_schema
|
|
||||||
|
|
||||||
type_name = get_schema_identifier(typ)
|
|
||||||
if type_name is not None:
|
|
||||||
return self._build_ref(type_name, type_schema)
|
|
||||||
|
|
||||||
try:
|
|
||||||
type_name = python_type_to_name(typ)
|
|
||||||
return self._build_ref(type_name, type_schema)
|
|
||||||
except TypeError:
|
|
||||||
pass
|
|
||||||
|
|
||||||
return type_schema
|
|
||||||
|
|
||||||
def _build_ref(self, type_name: str, type_schema: Schema) -> SchemaRef:
|
|
||||||
self._add_ref(type_name, type_schema)
|
|
||||||
return SchemaRef(type_name)
|
|
||||||
|
|
||||||
def _add_ref(self, type_name: str, type_schema: Schema) -> None:
|
|
||||||
if type_name not in self.schemas:
|
|
||||||
self.schemas[type_name] = type_schema
|
|
||||||
|
|
||||||
|
|
||||||
class ContentBuilder:
|
|
||||||
schema_builder: SchemaBuilder
|
|
||||||
schema_transformer: Optional[Callable[[SchemaOrRef], SchemaOrRef]]
|
|
||||||
sample_transformer: Optional[Callable[[JsonType], JsonType]]
|
|
||||||
|
|
||||||
def __init__(
|
|
||||||
self,
|
|
||||||
schema_builder: SchemaBuilder,
|
|
||||||
schema_transformer: Optional[Callable[[SchemaOrRef], SchemaOrRef]] = None,
|
|
||||||
sample_transformer: Optional[Callable[[JsonType], JsonType]] = None,
|
|
||||||
) -> None:
|
|
||||||
self.schema_builder = schema_builder
|
|
||||||
self.schema_transformer = schema_transformer
|
|
||||||
self.sample_transformer = sample_transformer
|
|
||||||
|
|
||||||
def build_content(
|
|
||||||
self, payload_type: type, examples: Optional[List[Any]] = None
|
|
||||||
) -> Dict[str, MediaType]:
|
|
||||||
"Creates the content subtree for a request or response."
|
|
||||||
|
|
||||||
def is_iterator_type(t):
|
|
||||||
return "StreamChunk" in str(t) or "OpenAIResponseObjectStream" in str(t)
|
|
||||||
|
|
||||||
def get_media_type(t):
|
|
||||||
if is_generic_list(t):
|
|
||||||
return "application/jsonl"
|
|
||||||
elif is_iterator_type(t):
|
|
||||||
return "text/event-stream"
|
|
||||||
else:
|
|
||||||
return "application/json"
|
|
||||||
|
|
||||||
if typing.get_origin(payload_type) in (typing.Union, types.UnionType):
|
|
||||||
media_types = []
|
|
||||||
item_types = []
|
|
||||||
for x in typing.get_args(payload_type):
|
|
||||||
media_types.append(get_media_type(x))
|
|
||||||
item_types.append(x)
|
|
||||||
|
|
||||||
if len(set(media_types)) == 1:
|
|
||||||
# all types have the same media type
|
|
||||||
return {media_types[0]: self.build_media_type(payload_type, examples)}
|
|
||||||
else:
|
|
||||||
# different types have different media types
|
|
||||||
return {
|
|
||||||
media_type: self.build_media_type(item_type, examples)
|
|
||||||
for media_type, item_type in zip(media_types, item_types)
|
|
||||||
}
|
|
||||||
|
|
||||||
if is_generic_list(payload_type):
|
|
||||||
media_type = "application/jsonl"
|
|
||||||
item_type = unwrap_generic_list(payload_type)
|
|
||||||
else:
|
|
||||||
media_type = "application/json"
|
|
||||||
item_type = payload_type
|
|
||||||
|
|
||||||
return {media_type: self.build_media_type(item_type, examples)}
|
|
||||||
|
|
||||||
def build_media_type(
|
|
||||||
self, item_type: type, examples: Optional[List[Any]] = None
|
|
||||||
) -> MediaType:
|
|
||||||
schema = self.schema_builder.classdef_to_ref(item_type)
|
|
||||||
if self.schema_transformer:
|
|
||||||
schema_transformer: Callable[[SchemaOrRef], SchemaOrRef] = (
|
|
||||||
self.schema_transformer
|
|
||||||
)
|
|
||||||
schema = schema_transformer(schema)
|
|
||||||
|
|
||||||
if not examples:
|
|
||||||
return MediaType(schema=schema)
|
|
||||||
|
|
||||||
if len(examples) == 1:
|
|
||||||
return MediaType(schema=schema, example=self._build_example(examples[0]))
|
|
||||||
|
|
||||||
return MediaType(
|
|
||||||
schema=schema,
|
|
||||||
examples=self._build_examples(examples),
|
|
||||||
)
|
|
||||||
|
|
||||||
def _build_examples(
|
|
||||||
self, examples: List[Any]
|
|
||||||
) -> Dict[str, Union[Example, ExampleRef]]:
|
|
||||||
"Creates a set of several examples for a media type."
|
|
||||||
|
|
||||||
if self.sample_transformer:
|
|
||||||
sample_transformer: Callable[[JsonType], JsonType] = self.sample_transformer # type: ignore
|
|
||||||
else:
|
|
||||||
sample_transformer = lambda sample: sample
|
|
||||||
|
|
||||||
results: Dict[str, Union[Example, ExampleRef]] = {}
|
|
||||||
for example in examples:
|
|
||||||
value = sample_transformer(object_to_json(example))
|
|
||||||
|
|
||||||
hash_string = (
|
|
||||||
hashlib.sha256(json_dump_string(value).encode("utf-8"))
|
|
||||||
.digest()
|
|
||||||
.hex()[:16]
|
|
||||||
)
|
|
||||||
name = f"ex-{hash_string}"
|
|
||||||
|
|
||||||
results[name] = Example(value=value)
|
|
||||||
|
|
||||||
return results
|
|
||||||
|
|
||||||
def _build_example(self, example: Any) -> Any:
|
|
||||||
"Creates a single example for a media type."
|
|
||||||
|
|
||||||
if self.sample_transformer:
|
|
||||||
sample_transformer: Callable[[JsonType], JsonType] = self.sample_transformer # type: ignore
|
|
||||||
else:
|
|
||||||
sample_transformer = lambda sample: sample
|
|
||||||
|
|
||||||
return sample_transformer(object_to_json(example))
|
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
|
||||||
class ResponseOptions:
|
|
||||||
"""
|
|
||||||
Configuration options for building a response for an operation.
|
|
||||||
|
|
||||||
:param type_descriptions: Maps each response type to a textual description (if available).
|
|
||||||
:param examples: A list of response examples.
|
|
||||||
:param status_catalog: Maps each response type to an HTTP status code.
|
|
||||||
:param default_status_code: HTTP status code assigned to responses that have no mapping.
|
|
||||||
"""
|
|
||||||
|
|
||||||
type_descriptions: Dict[type, str]
|
|
||||||
examples: Optional[List[Any]]
|
|
||||||
status_catalog: Dict[type, HTTPStatusCode]
|
|
||||||
default_status_code: HTTPStatusCode
|
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
|
||||||
class StatusResponse:
|
|
||||||
status_code: str
|
|
||||||
types: List[type] = dataclasses.field(default_factory=list)
|
|
||||||
examples: List[Any] = dataclasses.field(default_factory=list)
|
|
||||||
|
|
||||||
|
|
||||||
def create_docstring_for_request(
|
|
||||||
request_name: str, fields: List[Tuple[str, type, Any]], doc_params: Dict[str, str]
|
|
||||||
) -> str:
|
|
||||||
"""Creates a ReST-style docstring for a dynamically generated request dataclass."""
|
|
||||||
lines = ["\n"] # Short description
|
|
||||||
|
|
||||||
# Add parameter documentation in ReST format
|
|
||||||
for name, type_ in fields:
|
|
||||||
desc = doc_params.get(name, "")
|
|
||||||
lines.append(f":param {name}: {desc}")
|
|
||||||
|
|
||||||
return "\n".join(lines)
|
|
||||||
|
|
||||||
|
|
||||||
class ResponseBuilder:
|
|
||||||
content_builder: ContentBuilder
|
|
||||||
|
|
||||||
def __init__(self, content_builder: ContentBuilder) -> None:
|
|
||||||
self.content_builder = content_builder
|
|
||||||
|
|
||||||
def _get_status_responses(
|
|
||||||
self, options: ResponseOptions
|
|
||||||
) -> Dict[str, StatusResponse]:
|
|
||||||
status_responses: Dict[str, StatusResponse] = {}
|
|
||||||
|
|
||||||
for response_type in options.type_descriptions.keys():
|
|
||||||
status_code = http_status_to_string(
|
|
||||||
options.status_catalog.get(response_type, options.default_status_code)
|
|
||||||
)
|
|
||||||
|
|
||||||
# look up response for status code
|
|
||||||
if status_code not in status_responses:
|
|
||||||
status_responses[status_code] = StatusResponse(status_code)
|
|
||||||
status_response = status_responses[status_code]
|
|
||||||
|
|
||||||
# append response types that are assigned the given status code
|
|
||||||
status_response.types.append(response_type)
|
|
||||||
|
|
||||||
# append examples that have the matching response type
|
|
||||||
if options.examples:
|
|
||||||
status_response.examples.extend(
|
|
||||||
example
|
|
||||||
for example in options.examples
|
|
||||||
if isinstance(example, response_type)
|
|
||||||
)
|
|
||||||
|
|
||||||
return dict(sorted(status_responses.items()))
|
|
||||||
|
|
||||||
def build_response(
|
|
||||||
self, options: ResponseOptions
|
|
||||||
) -> Dict[str, Union[Response, ResponseRef]]:
|
|
||||||
"""
|
|
||||||
Groups responses that have the same status code.
|
|
||||||
"""
|
|
||||||
|
|
||||||
responses: Dict[str, Union[Response, ResponseRef]] = {}
|
|
||||||
status_responses = self._get_status_responses(options)
|
|
||||||
for status_code, status_response in status_responses.items():
|
|
||||||
response_types = tuple(status_response.types)
|
|
||||||
if len(response_types) > 1:
|
|
||||||
composite_response_type: type = Union[response_types] # type: ignore
|
|
||||||
else:
|
|
||||||
(response_type,) = response_types
|
|
||||||
composite_response_type = response_type
|
|
||||||
|
|
||||||
description = " **OR** ".join(
|
|
||||||
filter(
|
|
||||||
None,
|
|
||||||
(
|
|
||||||
options.type_descriptions[response_type]
|
|
||||||
for response_type in response_types
|
|
||||||
),
|
|
||||||
)
|
|
||||||
)
|
|
||||||
|
|
||||||
responses[status_code] = self._build_response(
|
|
||||||
response_type=composite_response_type,
|
|
||||||
description=description,
|
|
||||||
examples=status_response.examples or None,
|
|
||||||
)
|
|
||||||
|
|
||||||
return responses
|
|
||||||
|
|
||||||
def _build_response(
|
|
||||||
self,
|
|
||||||
response_type: type,
|
|
||||||
description: str,
|
|
||||||
examples: Optional[List[Any]] = None,
|
|
||||||
) -> Response:
|
|
||||||
"Creates a response subtree."
|
|
||||||
|
|
||||||
if response_type is not None:
|
|
||||||
return Response(
|
|
||||||
description=description,
|
|
||||||
content=self.content_builder.build_content(response_type, examples),
|
|
||||||
)
|
|
||||||
else:
|
|
||||||
return Response(description=description)
|
|
||||||
|
|
||||||
|
|
||||||
def schema_error_wrapper(schema: SchemaOrRef) -> Schema:
|
|
||||||
"Wraps an error output schema into a top-level error schema."
|
|
||||||
|
|
||||||
return {
|
|
||||||
"type": "object",
|
|
||||||
"properties": {
|
|
||||||
"error": schema, # type: ignore
|
|
||||||
},
|
|
||||||
"additionalProperties": False,
|
|
||||||
"required": [
|
|
||||||
"error",
|
|
||||||
],
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
def sample_error_wrapper(error: JsonType) -> JsonType:
|
|
||||||
"Wraps an error output sample into a top-level error sample."
|
|
||||||
|
|
||||||
return {"error": error}
|
|
||||||
|
|
||||||
|
|
||||||
class Generator:
|
|
||||||
endpoint: type
|
|
||||||
options: Options
|
|
||||||
schema_builder: SchemaBuilder
|
|
||||||
responses: Dict[str, Response]
|
|
||||||
|
|
||||||
def __init__(self, endpoint: type, options: Options) -> None:
|
|
||||||
self.endpoint = endpoint
|
|
||||||
self.options = options
|
|
||||||
schema_generator = JsonSchemaGenerator(
|
|
||||||
SchemaOptions(
|
|
||||||
definitions_path="#/components/schemas/",
|
|
||||||
use_examples=self.options.use_examples,
|
|
||||||
property_description_fun=options.property_description_fun,
|
|
||||||
)
|
|
||||||
)
|
|
||||||
self.schema_builder = SchemaBuilder(schema_generator)
|
|
||||||
self.responses = {}
|
|
||||||
|
|
||||||
# Create standard error responses
|
|
||||||
self._create_standard_error_responses()
|
|
||||||
|
|
||||||
def _create_standard_error_responses(self) -> None:
|
|
||||||
"""
|
|
||||||
Creates standard error responses that can be reused across operations.
|
|
||||||
These will be added to the components.responses section of the OpenAPI document.
|
|
||||||
"""
|
|
||||||
# Get the Error schema
|
|
||||||
error_schema = self.schema_builder.classdef_to_ref(Error)
|
|
||||||
|
|
||||||
# Create standard error responses
|
|
||||||
self.responses["BadRequest400"] = Response(
|
|
||||||
description="The request was invalid or malformed",
|
|
||||||
content={
|
|
||||||
"application/json": MediaType(
|
|
||||||
schema=error_schema,
|
|
||||||
example={
|
|
||||||
"status": 400,
|
|
||||||
"title": "Bad Request",
|
|
||||||
"detail": "The request was invalid or malformed",
|
|
||||||
},
|
|
||||||
)
|
|
||||||
},
|
|
||||||
)
|
|
||||||
|
|
||||||
self.responses["TooManyRequests429"] = Response(
|
|
||||||
description="The client has sent too many requests in a given amount of time",
|
|
||||||
content={
|
|
||||||
"application/json": MediaType(
|
|
||||||
schema=error_schema,
|
|
||||||
example={
|
|
||||||
"status": 429,
|
|
||||||
"title": "Too Many Requests",
|
|
||||||
"detail": "You have exceeded the rate limit. Please try again later.",
|
|
||||||
},
|
|
||||||
)
|
|
||||||
},
|
|
||||||
)
|
|
||||||
|
|
||||||
self.responses["InternalServerError500"] = Response(
|
|
||||||
description="The server encountered an unexpected error",
|
|
||||||
content={
|
|
||||||
"application/json": MediaType(
|
|
||||||
schema=error_schema,
|
|
||||||
example={
|
|
||||||
"status": 500,
|
|
||||||
"title": "Internal Server Error",
|
|
||||||
"detail": "An unexpected error occurred. Our team has been notified.",
|
|
||||||
},
|
|
||||||
)
|
|
||||||
},
|
|
||||||
)
|
|
||||||
|
|
||||||
# Add a default error response for any unhandled error cases
|
|
||||||
self.responses["DefaultError"] = Response(
|
|
||||||
description="An unexpected error occurred",
|
|
||||||
content={
|
|
||||||
"application/json": MediaType(
|
|
||||||
schema=error_schema,
|
|
||||||
example={
|
|
||||||
"status": 0,
|
|
||||||
"title": "Error",
|
|
||||||
"detail": "An unexpected error occurred",
|
|
||||||
},
|
|
||||||
)
|
|
||||||
},
|
|
||||||
)
|
|
||||||
|
|
||||||
def _build_type_tag(self, ref: str, schema: Schema) -> Tag:
|
|
||||||
# Don't include schema definition in the tag description because for one,
|
|
||||||
# it is not very valuable and for another, it causes string formatting
|
|
||||||
# discrepancies via the Stainless Studio.
|
|
||||||
#
|
|
||||||
# definition = f'<SchemaDefinition schemaRef="#/components/schemas/{ref}" />'
|
|
||||||
title = typing.cast(str, schema.get("title"))
|
|
||||||
description = typing.cast(str, schema.get("description"))
|
|
||||||
return Tag(
|
|
||||||
name=ref,
|
|
||||||
description="\n\n".join(s for s in (title, description) if s is not None),
|
|
||||||
)
|
|
||||||
|
|
||||||
def _build_extra_tag_groups(
|
|
||||||
self, extra_types: Dict[str, Dict[str, type]]
|
|
||||||
) -> Dict[str, List[Tag]]:
|
|
||||||
"""
|
|
||||||
Creates a dictionary of tag group captions as keys, and tag lists as values.
|
|
||||||
|
|
||||||
:param extra_types: A dictionary of type categories and list of types in that category.
|
|
||||||
"""
|
|
||||||
|
|
||||||
extra_tags: Dict[str, List[Tag]] = {}
|
|
||||||
|
|
||||||
for category_name, category_items in extra_types.items():
|
|
||||||
tag_list: List[Tag] = []
|
|
||||||
|
|
||||||
for name, extra_type in category_items.items():
|
|
||||||
schema = self.schema_builder.classdef_to_schema(extra_type)
|
|
||||||
tag_list.append(self._build_type_tag(name, schema))
|
|
||||||
|
|
||||||
if tag_list:
|
|
||||||
extra_tags[category_name] = tag_list
|
|
||||||
|
|
||||||
return extra_tags
|
|
||||||
|
|
||||||
def _build_operation(self, op: EndpointOperation) -> Operation:
|
|
||||||
if op.defining_class.__name__ in [
|
|
||||||
"SyntheticDataGeneration",
|
|
||||||
"PostTraining",
|
|
||||||
"BatchInference",
|
|
||||||
]:
|
|
||||||
op.defining_class.__name__ = f"{op.defining_class.__name__} (Coming Soon)"
|
|
||||||
print(op.defining_class.__name__)
|
|
||||||
|
|
||||||
# TODO (xiyan): temporary fix for datasetio inner impl + datasets api
|
|
||||||
# if op.defining_class.__name__ in ["DatasetIO"]:
|
|
||||||
# op.defining_class.__name__ = "Datasets"
|
|
||||||
|
|
||||||
doc_string = parse_type(op.func_ref)
|
|
||||||
doc_params = dict(
|
|
||||||
(param.name, param.description) for param in doc_string.params.values()
|
|
||||||
)
|
|
||||||
|
|
||||||
# parameters passed in URL component path
|
|
||||||
path_parameters = [
|
|
||||||
Parameter(
|
|
||||||
name=param_name,
|
|
||||||
in_=ParameterLocation.Path,
|
|
||||||
description=doc_params.get(param_name),
|
|
||||||
required=True,
|
|
||||||
schema=self.schema_builder.classdef_to_ref(param_type),
|
|
||||||
)
|
|
||||||
for param_name, param_type in op.path_params
|
|
||||||
]
|
|
||||||
|
|
||||||
# parameters passed in URL component query string
|
|
||||||
query_parameters = []
|
|
||||||
for param_name, param_type in op.query_params:
|
|
||||||
if is_type_optional(param_type):
|
|
||||||
inner_type: type = unwrap_optional_type(param_type)
|
|
||||||
required = False
|
|
||||||
else:
|
|
||||||
inner_type = param_type
|
|
||||||
required = True
|
|
||||||
|
|
||||||
query_parameter = Parameter(
|
|
||||||
name=param_name,
|
|
||||||
in_=ParameterLocation.Query,
|
|
||||||
description=doc_params.get(param_name),
|
|
||||||
required=required,
|
|
||||||
schema=self.schema_builder.classdef_to_ref(inner_type),
|
|
||||||
)
|
|
||||||
query_parameters.append(query_parameter)
|
|
||||||
|
|
||||||
# parameters passed anywhere
|
|
||||||
parameters = path_parameters + query_parameters
|
|
||||||
|
|
||||||
webmethod = getattr(op.func_ref, "__webmethod__", None)
|
|
||||||
raw_bytes_request_body = False
|
|
||||||
if webmethod:
|
|
||||||
raw_bytes_request_body = getattr(webmethod, "raw_bytes_request_body", False)
|
|
||||||
|
|
||||||
# data passed in request body as raw bytes cannot have request parameters
|
|
||||||
if raw_bytes_request_body and op.request_params:
|
|
||||||
raise ValueError(
|
|
||||||
"Cannot have both raw bytes request body and request parameters"
|
|
||||||
)
|
|
||||||
|
|
||||||
# data passed in request body as raw bytes
|
|
||||||
if raw_bytes_request_body:
|
|
||||||
requestBody = RequestBody(
|
|
||||||
content={
|
|
||||||
"application/octet-stream": {
|
|
||||||
"schema": {
|
|
||||||
"type": "string",
|
|
||||||
"format": "binary",
|
|
||||||
}
|
|
||||||
}
|
|
||||||
},
|
|
||||||
required=True,
|
|
||||||
)
|
|
||||||
# data passed in request body as multipart/form-data
|
|
||||||
elif op.multipart_params:
|
|
||||||
builder = ContentBuilder(self.schema_builder)
|
|
||||||
|
|
||||||
# Create schema properties for multipart form fields
|
|
||||||
properties = {}
|
|
||||||
required_fields = []
|
|
||||||
|
|
||||||
for name, param_type in op.multipart_params:
|
|
||||||
if get_origin(param_type) is Annotated:
|
|
||||||
base_type = get_args(param_type)[0]
|
|
||||||
else:
|
|
||||||
base_type = param_type
|
|
||||||
if base_type is UploadFile:
|
|
||||||
# File upload
|
|
||||||
properties[name] = {
|
|
||||||
"type": "string",
|
|
||||||
"format": "binary"
|
|
||||||
}
|
|
||||||
else:
|
|
||||||
# Form field
|
|
||||||
properties[name] = self.schema_builder.classdef_to_ref(base_type)
|
|
||||||
|
|
||||||
required_fields.append(name)
|
|
||||||
|
|
||||||
multipart_schema = {
|
|
||||||
"type": "object",
|
|
||||||
"properties": properties,
|
|
||||||
"required": required_fields
|
|
||||||
}
|
|
||||||
|
|
||||||
requestBody = RequestBody(
|
|
||||||
content={
|
|
||||||
"multipart/form-data": {
|
|
||||||
"schema": multipart_schema
|
|
||||||
}
|
|
||||||
},
|
|
||||||
required=True,
|
|
||||||
)
|
|
||||||
# data passed in payload as JSON and mapped to request parameters
|
|
||||||
elif op.request_params:
|
|
||||||
builder = ContentBuilder(self.schema_builder)
|
|
||||||
first = next(iter(op.request_params))
|
|
||||||
request_name, request_type = first
|
|
||||||
|
|
||||||
op_name = "".join(word.capitalize() for word in op.name.split("_"))
|
|
||||||
request_name = f"{op_name}Request"
|
|
||||||
fields = [
|
|
||||||
(
|
|
||||||
name,
|
|
||||||
type_,
|
|
||||||
)
|
|
||||||
for name, type_ in op.request_params
|
|
||||||
]
|
|
||||||
request_type = make_dataclass(
|
|
||||||
request_name,
|
|
||||||
fields,
|
|
||||||
namespace={
|
|
||||||
"__doc__": create_docstring_for_request(
|
|
||||||
request_name, fields, doc_params
|
|
||||||
)
|
|
||||||
},
|
|
||||||
)
|
|
||||||
|
|
||||||
requestBody = RequestBody(
|
|
||||||
content={
|
|
||||||
"application/json": builder.build_media_type(
|
|
||||||
request_type, op.request_examples
|
|
||||||
)
|
|
||||||
},
|
|
||||||
description=doc_params.get(request_name),
|
|
||||||
required=True,
|
|
||||||
)
|
|
||||||
else:
|
|
||||||
requestBody = None
|
|
||||||
|
|
||||||
# success response types
|
|
||||||
if doc_string.returns is None and is_type_union(op.response_type):
|
|
||||||
# split union of return types into a list of response types
|
|
||||||
success_type_docstring: Dict[type, Docstring] = {
|
|
||||||
typing.cast(type, item): parse_type(item)
|
|
||||||
for item in unwrap_union_types(op.response_type)
|
|
||||||
}
|
|
||||||
success_type_descriptions = {
|
|
||||||
item: doc_string.short_description
|
|
||||||
for item, doc_string in success_type_docstring.items()
|
|
||||||
}
|
|
||||||
else:
|
|
||||||
# use return type as a single response type
|
|
||||||
success_type_descriptions = {
|
|
||||||
op.response_type: (
|
|
||||||
doc_string.returns.description if doc_string.returns else "OK"
|
|
||||||
)
|
|
||||||
}
|
|
||||||
|
|
||||||
response_examples = op.response_examples or []
|
|
||||||
success_examples = [
|
|
||||||
example
|
|
||||||
for example in response_examples
|
|
||||||
if not isinstance(example, Exception)
|
|
||||||
]
|
|
||||||
|
|
||||||
content_builder = ContentBuilder(self.schema_builder)
|
|
||||||
response_builder = ResponseBuilder(content_builder)
|
|
||||||
response_options = ResponseOptions(
|
|
||||||
success_type_descriptions,
|
|
||||||
success_examples if self.options.use_examples else None,
|
|
||||||
self.options.success_responses,
|
|
||||||
"200",
|
|
||||||
)
|
|
||||||
responses = response_builder.build_response(response_options)
|
|
||||||
|
|
||||||
# failure response types
|
|
||||||
if doc_string.raises:
|
|
||||||
exception_types: Dict[type, str] = {
|
|
||||||
item.raise_type: item.description for item in doc_string.raises.values()
|
|
||||||
}
|
|
||||||
exception_examples = [
|
|
||||||
example
|
|
||||||
for example in response_examples
|
|
||||||
if isinstance(example, Exception)
|
|
||||||
]
|
|
||||||
|
|
||||||
if self.options.error_wrapper:
|
|
||||||
schema_transformer = schema_error_wrapper
|
|
||||||
sample_transformer = sample_error_wrapper
|
|
||||||
else:
|
|
||||||
schema_transformer = None
|
|
||||||
sample_transformer = None
|
|
||||||
|
|
||||||
content_builder = ContentBuilder(
|
|
||||||
self.schema_builder,
|
|
||||||
schema_transformer=schema_transformer,
|
|
||||||
sample_transformer=sample_transformer,
|
|
||||||
)
|
|
||||||
response_builder = ResponseBuilder(content_builder)
|
|
||||||
response_options = ResponseOptions(
|
|
||||||
exception_types,
|
|
||||||
exception_examples if self.options.use_examples else None,
|
|
||||||
self.options.error_responses,
|
|
||||||
"500",
|
|
||||||
)
|
|
||||||
responses.update(response_builder.build_response(response_options))
|
|
||||||
|
|
||||||
assert len(responses.keys()) > 0, f"No responses found for {op.name}"
|
|
||||||
|
|
||||||
# Add standard error response references
|
|
||||||
if self.options.include_standard_error_responses:
|
|
||||||
if "400" not in responses:
|
|
||||||
responses["400"] = ResponseRef("BadRequest400")
|
|
||||||
if "429" not in responses:
|
|
||||||
responses["429"] = ResponseRef("TooManyRequests429")
|
|
||||||
if "500" not in responses:
|
|
||||||
responses["500"] = ResponseRef("InternalServerError500")
|
|
||||||
if "default" not in responses:
|
|
||||||
responses["default"] = ResponseRef("DefaultError")
|
|
||||||
|
|
||||||
if op.event_type is not None:
|
|
||||||
builder = ContentBuilder(self.schema_builder)
|
|
||||||
callbacks = {
|
|
||||||
f"{op.func_name}_callback": {
|
|
||||||
"{$request.query.callback}": PathItem(
|
|
||||||
post=Operation(
|
|
||||||
requestBody=RequestBody(
|
|
||||||
content=builder.build_content(op.event_type)
|
|
||||||
),
|
|
||||||
responses={"200": Response(description="OK")},
|
|
||||||
)
|
|
||||||
)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
else:
|
|
||||||
callbacks = None
|
|
||||||
|
|
||||||
description = "\n".join(
|
|
||||||
filter(None, [doc_string.short_description, doc_string.long_description])
|
|
||||||
)
|
|
||||||
|
|
||||||
return Operation(
|
|
||||||
tags=[getattr(op.defining_class, "API_NAMESPACE", op.defining_class.__name__)],
|
|
||||||
summary=None,
|
|
||||||
# summary=doc_string.short_description,
|
|
||||||
description=description,
|
|
||||||
parameters=parameters,
|
|
||||||
requestBody=requestBody,
|
|
||||||
responses=responses,
|
|
||||||
callbacks=callbacks,
|
|
||||||
deprecated=True if "DEPRECATED" in op.func_name else None,
|
|
||||||
security=[] if op.public else None,
|
|
||||||
)
|
|
||||||
|
|
||||||
def generate(self) -> Document:
|
|
||||||
paths: Dict[str, PathItem] = {}
|
|
||||||
endpoint_classes: Set[type] = set()
|
|
||||||
for op in get_endpoint_operations(
|
|
||||||
self.endpoint, use_examples=self.options.use_examples
|
|
||||||
):
|
|
||||||
endpoint_classes.add(op.defining_class)
|
|
||||||
|
|
||||||
operation = self._build_operation(op)
|
|
||||||
|
|
||||||
if op.http_method is HTTPMethod.GET:
|
|
||||||
pathItem = PathItem(get=operation)
|
|
||||||
elif op.http_method is HTTPMethod.PUT:
|
|
||||||
pathItem = PathItem(put=operation)
|
|
||||||
elif op.http_method is HTTPMethod.POST:
|
|
||||||
pathItem = PathItem(post=operation)
|
|
||||||
elif op.http_method is HTTPMethod.DELETE:
|
|
||||||
pathItem = PathItem(delete=operation)
|
|
||||||
elif op.http_method is HTTPMethod.PATCH:
|
|
||||||
pathItem = PathItem(patch=operation)
|
|
||||||
else:
|
|
||||||
raise NotImplementedError(f"unknown HTTP method: {op.http_method}")
|
|
||||||
|
|
||||||
route = op.get_route()
|
|
||||||
route = route.replace(":path", "")
|
|
||||||
print(f"route: {route}")
|
|
||||||
if route in paths:
|
|
||||||
paths[route].update(pathItem)
|
|
||||||
else:
|
|
||||||
paths[route] = pathItem
|
|
||||||
|
|
||||||
operation_tags: List[Tag] = []
|
|
||||||
for cls in endpoint_classes:
|
|
||||||
doc_string = parse_type(cls)
|
|
||||||
if hasattr(cls, "API_NAMESPACE") and cls.API_NAMESPACE != cls.__name__:
|
|
||||||
continue
|
|
||||||
operation_tags.append(
|
|
||||||
Tag(
|
|
||||||
name=cls.__name__,
|
|
||||||
description=doc_string.long_description,
|
|
||||||
displayName=doc_string.short_description,
|
|
||||||
)
|
|
||||||
)
|
|
||||||
|
|
||||||
# types that are emitted by events
|
|
||||||
event_tags: List[Tag] = []
|
|
||||||
events = get_endpoint_events(self.endpoint)
|
|
||||||
for ref, event_type in events.items():
|
|
||||||
event_schema = self.schema_builder.classdef_to_named_schema(ref, event_type)
|
|
||||||
event_tags.append(self._build_type_tag(ref, event_schema))
|
|
||||||
|
|
||||||
# types that are explicitly declared
|
|
||||||
extra_tag_groups: Dict[str, List[Tag]] = {}
|
|
||||||
if self.options.extra_types is not None:
|
|
||||||
if isinstance(self.options.extra_types, list):
|
|
||||||
extra_tag_groups = self._build_extra_tag_groups(
|
|
||||||
{"AdditionalTypes": self.options.extra_types}
|
|
||||||
)
|
|
||||||
elif isinstance(self.options.extra_types, dict):
|
|
||||||
extra_tag_groups = self._build_extra_tag_groups(
|
|
||||||
self.options.extra_types
|
|
||||||
)
|
|
||||||
else:
|
|
||||||
raise TypeError(
|
|
||||||
f"type mismatch for collection of extra types: {type(self.options.extra_types)}"
|
|
||||||
)
|
|
||||||
|
|
||||||
# list all operations and types
|
|
||||||
tags: List[Tag] = []
|
|
||||||
tags.extend(operation_tags)
|
|
||||||
tags.extend(event_tags)
|
|
||||||
for extra_tag_group in extra_tag_groups.values():
|
|
||||||
tags.extend(extra_tag_group)
|
|
||||||
|
|
||||||
tags = sorted(tags, key=lambda t: t.name)
|
|
||||||
|
|
||||||
tag_groups = []
|
|
||||||
if operation_tags:
|
|
||||||
tag_groups.append(
|
|
||||||
TagGroup(
|
|
||||||
name=self.options.map("Operations"),
|
|
||||||
tags=sorted(tag.name for tag in operation_tags),
|
|
||||||
)
|
|
||||||
)
|
|
||||||
if event_tags:
|
|
||||||
tag_groups.append(
|
|
||||||
TagGroup(
|
|
||||||
name=self.options.map("Events"),
|
|
||||||
tags=sorted(tag.name for tag in event_tags),
|
|
||||||
)
|
|
||||||
)
|
|
||||||
for caption, extra_tag_group in extra_tag_groups.items():
|
|
||||||
tag_groups.append(
|
|
||||||
TagGroup(
|
|
||||||
name=caption,
|
|
||||||
tags=sorted(tag.name for tag in extra_tag_group),
|
|
||||||
)
|
|
||||||
)
|
|
||||||
|
|
||||||
if self.options.default_security_scheme:
|
|
||||||
securitySchemes = {"Default": self.options.default_security_scheme}
|
|
||||||
else:
|
|
||||||
securitySchemes = None
|
|
||||||
|
|
||||||
return Document(
|
|
||||||
openapi=".".join(str(item) for item in self.options.version),
|
|
||||||
info=self.options.info,
|
|
||||||
jsonSchemaDialect=(
|
|
||||||
"https://json-schema.org/draft/2020-12/schema"
|
|
||||||
if self.options.version >= (3, 1, 0)
|
|
||||||
else None
|
|
||||||
),
|
|
||||||
servers=[self.options.server],
|
|
||||||
paths=paths,
|
|
||||||
components=Components(
|
|
||||||
schemas=self.schema_builder.schemas,
|
|
||||||
responses=self.responses,
|
|
||||||
securitySchemes=securitySchemes,
|
|
||||||
),
|
|
||||||
security=[{"Default": []}],
|
|
||||||
tags=tags,
|
|
||||||
tagGroups=tag_groups,
|
|
||||||
)
|
|
||||||
|
|
@ -1,424 +0,0 @@
|
||||||
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
||||||
# All rights reserved.
|
|
||||||
#
|
|
||||||
# This source code is licensed under the terms described in the LICENSE file in
|
|
||||||
# the root directory of this source tree.
|
|
||||||
|
|
||||||
import collections.abc
|
|
||||||
import enum
|
|
||||||
import inspect
|
|
||||||
import typing
|
|
||||||
from dataclasses import dataclass
|
|
||||||
from typing import Any, Callable, Dict, Iterable, Iterator, List, Optional, Tuple, Union
|
|
||||||
|
|
||||||
from llama_stack.apis.version import LLAMA_STACK_API_VERSION
|
|
||||||
|
|
||||||
from termcolor import colored
|
|
||||||
|
|
||||||
from llama_stack.strong_typing.inspection import get_signature
|
|
||||||
|
|
||||||
from typing import get_origin, get_args
|
|
||||||
|
|
||||||
from fastapi import UploadFile
|
|
||||||
from fastapi.params import File, Form
|
|
||||||
from typing import Annotated
|
|
||||||
|
|
||||||
|
|
||||||
def split_prefix(
|
|
||||||
s: str, sep: str, prefix: Union[str, Iterable[str]]
|
|
||||||
) -> Tuple[Optional[str], str]:
|
|
||||||
"""
|
|
||||||
Recognizes a prefix at the beginning of a string.
|
|
||||||
|
|
||||||
:param s: The string to check.
|
|
||||||
:param sep: A separator between (one of) the prefix(es) and the rest of the string.
|
|
||||||
:param prefix: A string or a set of strings to identify as a prefix.
|
|
||||||
:return: A tuple of the recognized prefix (if any) and the rest of the string excluding the separator (or the entire string).
|
|
||||||
"""
|
|
||||||
|
|
||||||
if isinstance(prefix, str):
|
|
||||||
if s.startswith(prefix + sep):
|
|
||||||
return prefix, s[len(prefix) + len(sep) :]
|
|
||||||
else:
|
|
||||||
return None, s
|
|
||||||
|
|
||||||
for p in prefix:
|
|
||||||
if s.startswith(p + sep):
|
|
||||||
return p, s[len(p) + len(sep) :]
|
|
||||||
|
|
||||||
return None, s
|
|
||||||
|
|
||||||
|
|
||||||
def _get_annotation_type(annotation: Union[type, str], callable: Callable) -> type:
|
|
||||||
"Maps a stringized reference to a type, as if using `from __future__ import annotations`."
|
|
||||||
|
|
||||||
if isinstance(annotation, str):
|
|
||||||
return eval(annotation, callable.__globals__)
|
|
||||||
else:
|
|
||||||
return annotation
|
|
||||||
|
|
||||||
|
|
||||||
class HTTPMethod(enum.Enum):
|
|
||||||
"HTTP method used to invoke an endpoint operation."
|
|
||||||
|
|
||||||
GET = "GET"
|
|
||||||
POST = "POST"
|
|
||||||
PUT = "PUT"
|
|
||||||
DELETE = "DELETE"
|
|
||||||
PATCH = "PATCH"
|
|
||||||
|
|
||||||
|
|
||||||
OperationParameter = Tuple[str, type]
|
|
||||||
|
|
||||||
|
|
||||||
class ValidationError(TypeError):
|
|
||||||
pass
|
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
|
||||||
class EndpointOperation:
|
|
||||||
"""
|
|
||||||
Type information and metadata associated with an endpoint operation.
|
|
||||||
|
|
||||||
"param defining_class: The most specific class that defines the endpoint operation.
|
|
||||||
:param name: The short name of the endpoint operation.
|
|
||||||
:param func_name: The name of the function to invoke when the operation is triggered.
|
|
||||||
:param func_ref: The callable to invoke when the operation is triggered.
|
|
||||||
:param route: A custom route string assigned to the operation.
|
|
||||||
:param path_params: Parameters of the operation signature that are passed in the path component of the URL string.
|
|
||||||
:param query_params: Parameters of the operation signature that are passed in the query string as `key=value` pairs.
|
|
||||||
:param request_params: The parameter that corresponds to the data transmitted in the request body.
|
|
||||||
:param multipart_params: Parameters that indicate multipart/form-data request body.
|
|
||||||
:param event_type: The Python type of the data that is transmitted out-of-band (e.g. via websockets) while the operation is in progress.
|
|
||||||
:param response_type: The Python type of the data that is transmitted in the response body.
|
|
||||||
:param http_method: The HTTP method used to invoke the endpoint such as POST, GET or PUT.
|
|
||||||
:param public: True if the operation can be invoked without prior authentication.
|
|
||||||
:param request_examples: Sample requests that the operation might take.
|
|
||||||
:param response_examples: Sample responses that the operation might produce.
|
|
||||||
"""
|
|
||||||
|
|
||||||
defining_class: type
|
|
||||||
name: str
|
|
||||||
func_name: str
|
|
||||||
func_ref: Callable[..., Any]
|
|
||||||
route: Optional[str]
|
|
||||||
path_params: List[OperationParameter]
|
|
||||||
query_params: List[OperationParameter]
|
|
||||||
request_params: Optional[OperationParameter]
|
|
||||||
multipart_params: List[OperationParameter]
|
|
||||||
event_type: Optional[type]
|
|
||||||
response_type: type
|
|
||||||
http_method: HTTPMethod
|
|
||||||
public: bool
|
|
||||||
request_examples: Optional[List[Any]] = None
|
|
||||||
response_examples: Optional[List[Any]] = None
|
|
||||||
|
|
||||||
def get_route(self) -> str:
|
|
||||||
if self.route is not None:
|
|
||||||
return "/".join(["", LLAMA_STACK_API_VERSION, self.route.lstrip("/")])
|
|
||||||
|
|
||||||
route_parts = ["", LLAMA_STACK_API_VERSION, self.name]
|
|
||||||
for param_name, _ in self.path_params:
|
|
||||||
route_parts.append("{" + param_name + "}")
|
|
||||||
return "/".join(route_parts)
|
|
||||||
|
|
||||||
|
|
||||||
class _FormatParameterExtractor:
|
|
||||||
"A visitor to exract parameters in a format string."
|
|
||||||
|
|
||||||
keys: List[str]
|
|
||||||
|
|
||||||
def __init__(self) -> None:
|
|
||||||
self.keys = []
|
|
||||||
|
|
||||||
def __getitem__(self, key: str) -> None:
|
|
||||||
self.keys.append(key)
|
|
||||||
return None
|
|
||||||
|
|
||||||
|
|
||||||
def _get_route_parameters(route: str) -> List[str]:
|
|
||||||
extractor = _FormatParameterExtractor()
|
|
||||||
# Replace all occurrences of ":path" with empty string
|
|
||||||
route = route.replace(":path", "")
|
|
||||||
route.format_map(extractor)
|
|
||||||
return extractor.keys
|
|
||||||
|
|
||||||
|
|
||||||
def _get_endpoint_functions(
|
|
||||||
endpoint: type, prefixes: List[str]
|
|
||||||
) -> Iterator[Tuple[str, str, str, Callable]]:
|
|
||||||
if not inspect.isclass(endpoint):
|
|
||||||
raise ValueError(f"object is not a class type: {endpoint}")
|
|
||||||
|
|
||||||
functions = inspect.getmembers(endpoint, inspect.isfunction)
|
|
||||||
for func_name, func_ref in functions:
|
|
||||||
webmethod = getattr(func_ref, "__webmethod__", None)
|
|
||||||
if not webmethod:
|
|
||||||
continue
|
|
||||||
|
|
||||||
print(f"Processing {colored(func_name, 'white')}...")
|
|
||||||
operation_name = func_name
|
|
||||||
|
|
||||||
if webmethod.method == "GET":
|
|
||||||
prefix = "get"
|
|
||||||
elif webmethod.method == "DELETE":
|
|
||||||
prefix = "delete"
|
|
||||||
elif webmethod.method == "POST":
|
|
||||||
prefix = "post"
|
|
||||||
elif operation_name.startswith("get_") or operation_name.endswith("/get"):
|
|
||||||
prefix = "get"
|
|
||||||
elif (
|
|
||||||
operation_name.startswith("delete_")
|
|
||||||
or operation_name.startswith("remove_")
|
|
||||||
or operation_name.endswith("/delete")
|
|
||||||
or operation_name.endswith("/remove")
|
|
||||||
):
|
|
||||||
prefix = "delete"
|
|
||||||
else:
|
|
||||||
# by default everything else is a POST
|
|
||||||
prefix = "post"
|
|
||||||
|
|
||||||
yield prefix, operation_name, func_name, func_ref
|
|
||||||
|
|
||||||
|
|
||||||
def _get_defining_class(member_fn: str, derived_cls: type) -> type:
|
|
||||||
"Find the class in which a member function is first defined in a class inheritance hierarchy."
|
|
||||||
|
|
||||||
# This import must be dynamic here
|
|
||||||
from llama_stack.apis.tools import RAGToolRuntime, ToolRuntime
|
|
||||||
|
|
||||||
# iterate in reverse member resolution order to find most specific class first
|
|
||||||
for cls in reversed(inspect.getmro(derived_cls)):
|
|
||||||
for name, _ in inspect.getmembers(cls, inspect.isfunction):
|
|
||||||
if name == member_fn:
|
|
||||||
# HACK ALERT
|
|
||||||
if cls == RAGToolRuntime:
|
|
||||||
return ToolRuntime
|
|
||||||
return cls
|
|
||||||
|
|
||||||
raise ValidationError(
|
|
||||||
f"cannot find defining class for {member_fn} in {derived_cls}"
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
def get_endpoint_operations(
|
|
||||||
endpoint: type, use_examples: bool = True
|
|
||||||
) -> List[EndpointOperation]:
|
|
||||||
"""
|
|
||||||
Extracts a list of member functions in a class eligible for HTTP interface binding.
|
|
||||||
|
|
||||||
These member functions are expected to have a signature like
|
|
||||||
```
|
|
||||||
async def get_object(self, uuid: str, version: int) -> Object:
|
|
||||||
...
|
|
||||||
```
|
|
||||||
where the prefix `get_` translates to an HTTP GET, `object` corresponds to the name of the endpoint operation,
|
|
||||||
`uuid` and `version` are mapped to route path elements in "/object/{uuid}/{version}", and `Object` becomes
|
|
||||||
the response payload type, transmitted as an object serialized to JSON.
|
|
||||||
|
|
||||||
If the member function has a composite class type in the argument list, it becomes the request payload type,
|
|
||||||
and the caller is expected to provide the data as serialized JSON in an HTTP POST request.
|
|
||||||
|
|
||||||
:param endpoint: A class with member functions that can be mapped to an HTTP endpoint.
|
|
||||||
:param use_examples: Whether to return examples associated with member functions.
|
|
||||||
"""
|
|
||||||
|
|
||||||
result = []
|
|
||||||
|
|
||||||
for prefix, operation_name, func_name, func_ref in _get_endpoint_functions(
|
|
||||||
endpoint,
|
|
||||||
[
|
|
||||||
"create",
|
|
||||||
"delete",
|
|
||||||
"do",
|
|
||||||
"get",
|
|
||||||
"post",
|
|
||||||
"put",
|
|
||||||
"remove",
|
|
||||||
"set",
|
|
||||||
"update",
|
|
||||||
],
|
|
||||||
):
|
|
||||||
# extract routing information from function metadata
|
|
||||||
webmethod = getattr(func_ref, "__webmethod__", None)
|
|
||||||
if webmethod is not None:
|
|
||||||
route = webmethod.route
|
|
||||||
route_params = _get_route_parameters(route) if route is not None else None
|
|
||||||
public = webmethod.public
|
|
||||||
request_examples = webmethod.request_examples
|
|
||||||
response_examples = webmethod.response_examples
|
|
||||||
else:
|
|
||||||
route = None
|
|
||||||
route_params = None
|
|
||||||
public = False
|
|
||||||
request_examples = None
|
|
||||||
response_examples = None
|
|
||||||
|
|
||||||
# inspect function signature for path and query parameters, and request/response payload type
|
|
||||||
signature = get_signature(func_ref)
|
|
||||||
|
|
||||||
path_params = []
|
|
||||||
query_params = []
|
|
||||||
request_params = []
|
|
||||||
multipart_params = []
|
|
||||||
|
|
||||||
for param_name, parameter in signature.parameters.items():
|
|
||||||
param_type = _get_annotation_type(parameter.annotation, func_ref)
|
|
||||||
|
|
||||||
# omit "self" for instance methods
|
|
||||||
if param_name == "self" and param_type is inspect.Parameter.empty:
|
|
||||||
continue
|
|
||||||
|
|
||||||
# check if all parameters have explicit type
|
|
||||||
if parameter.annotation is inspect.Parameter.empty:
|
|
||||||
raise ValidationError(
|
|
||||||
f"parameter '{param_name}' in function '{func_name}' has no type annotation"
|
|
||||||
)
|
|
||||||
|
|
||||||
is_multipart = _is_multipart_param(param_type)
|
|
||||||
|
|
||||||
if prefix in ["get", "delete"]:
|
|
||||||
if route_params is not None and param_name in route_params:
|
|
||||||
path_params.append((param_name, param_type))
|
|
||||||
else:
|
|
||||||
query_params.append((param_name, param_type))
|
|
||||||
else:
|
|
||||||
if route_params is not None and param_name in route_params:
|
|
||||||
path_params.append((param_name, param_type))
|
|
||||||
elif is_multipart:
|
|
||||||
multipart_params.append((param_name, param_type))
|
|
||||||
else:
|
|
||||||
request_params.append((param_name, param_type))
|
|
||||||
|
|
||||||
# check if function has explicit return type
|
|
||||||
if signature.return_annotation is inspect.Signature.empty:
|
|
||||||
raise ValidationError(
|
|
||||||
f"function '{func_name}' has no return type annotation"
|
|
||||||
)
|
|
||||||
|
|
||||||
return_type = _get_annotation_type(signature.return_annotation, func_ref)
|
|
||||||
|
|
||||||
# operations that produce events are labeled as Generator[YieldType, SendType, ReturnType]
|
|
||||||
# where YieldType is the event type, SendType is None, and ReturnType is the immediate response type to the request
|
|
||||||
if typing.get_origin(return_type) is collections.abc.Generator:
|
|
||||||
event_type, send_type, response_type = typing.get_args(return_type)
|
|
||||||
if send_type is not type(None):
|
|
||||||
raise ValidationError(
|
|
||||||
f"function '{func_name}' has a return type Generator[Y,S,R] and therefore looks like an event but has an explicit send type"
|
|
||||||
)
|
|
||||||
else:
|
|
||||||
event_type = None
|
|
||||||
|
|
||||||
def process_type(t):
|
|
||||||
if typing.get_origin(t) is collections.abc.AsyncIterator:
|
|
||||||
# NOTE(ashwin): this is SSE and there is no way to represent it. either we make it a List
|
|
||||||
# or the item type. I am choosing it to be the latter
|
|
||||||
args = typing.get_args(t)
|
|
||||||
return args[0]
|
|
||||||
elif typing.get_origin(t) is typing.Union:
|
|
||||||
types = [process_type(a) for a in typing.get_args(t)]
|
|
||||||
return typing._UnionGenericAlias(typing.Union, tuple(types))
|
|
||||||
else:
|
|
||||||
return t
|
|
||||||
|
|
||||||
response_type = process_type(return_type)
|
|
||||||
|
|
||||||
if prefix in ["delete", "remove"]:
|
|
||||||
http_method = HTTPMethod.DELETE
|
|
||||||
elif prefix == "post":
|
|
||||||
http_method = HTTPMethod.POST
|
|
||||||
elif prefix == "get":
|
|
||||||
http_method = HTTPMethod.GET
|
|
||||||
elif prefix == "set":
|
|
||||||
http_method = HTTPMethod.PUT
|
|
||||||
elif prefix == "update":
|
|
||||||
http_method = HTTPMethod.PATCH
|
|
||||||
else:
|
|
||||||
raise ValidationError(f"unknown prefix {prefix}")
|
|
||||||
|
|
||||||
result.append(
|
|
||||||
EndpointOperation(
|
|
||||||
defining_class=_get_defining_class(func_name, endpoint),
|
|
||||||
name=operation_name,
|
|
||||||
func_name=func_name,
|
|
||||||
func_ref=func_ref,
|
|
||||||
route=route,
|
|
||||||
path_params=path_params,
|
|
||||||
query_params=query_params,
|
|
||||||
request_params=request_params,
|
|
||||||
multipart_params=multipart_params,
|
|
||||||
event_type=event_type,
|
|
||||||
response_type=response_type,
|
|
||||||
http_method=http_method,
|
|
||||||
public=public,
|
|
||||||
request_examples=request_examples if use_examples else None,
|
|
||||||
response_examples=response_examples if use_examples else None,
|
|
||||||
)
|
|
||||||
)
|
|
||||||
|
|
||||||
if not result:
|
|
||||||
raise ValidationError(f"no eligible endpoint operations in type {endpoint}")
|
|
||||||
|
|
||||||
return result
|
|
||||||
|
|
||||||
|
|
||||||
def get_endpoint_events(endpoint: type) -> Dict[str, type]:
|
|
||||||
results = {}
|
|
||||||
|
|
||||||
for decl in typing.get_type_hints(endpoint).values():
|
|
||||||
# check if signature is Callable[...]
|
|
||||||
origin = typing.get_origin(decl)
|
|
||||||
if origin is None or not issubclass(origin, Callable): # type: ignore
|
|
||||||
continue
|
|
||||||
|
|
||||||
# check if signature is Callable[[...], Any]
|
|
||||||
args = typing.get_args(decl)
|
|
||||||
if len(args) != 2:
|
|
||||||
continue
|
|
||||||
params_type, return_type = args
|
|
||||||
if not isinstance(params_type, list):
|
|
||||||
continue
|
|
||||||
|
|
||||||
# check if signature is Callable[[...], None]
|
|
||||||
if not issubclass(return_type, type(None)):
|
|
||||||
continue
|
|
||||||
|
|
||||||
# check if signature is Callable[[EventType], None]
|
|
||||||
if len(params_type) != 1:
|
|
||||||
continue
|
|
||||||
|
|
||||||
param_type = params_type[0]
|
|
||||||
results[param_type.__name__] = param_type
|
|
||||||
|
|
||||||
return results
|
|
||||||
|
|
||||||
|
|
||||||
def _is_multipart_param(param_type: type) -> bool:
|
|
||||||
"""
|
|
||||||
Check if a parameter type indicates multipart form data.
|
|
||||||
|
|
||||||
Returns True if the type is:
|
|
||||||
- UploadFile
|
|
||||||
- Annotated[UploadFile, File()]
|
|
||||||
- Annotated[str, Form()]
|
|
||||||
- Annotated[Any, File()]
|
|
||||||
- Annotated[Any, Form()]
|
|
||||||
"""
|
|
||||||
if param_type is UploadFile:
|
|
||||||
return True
|
|
||||||
|
|
||||||
# Check for Annotated types
|
|
||||||
origin = get_origin(param_type)
|
|
||||||
if origin is None:
|
|
||||||
return False
|
|
||||||
|
|
||||||
if origin is Annotated:
|
|
||||||
args = get_args(param_type)
|
|
||||||
if len(args) < 2:
|
|
||||||
return False
|
|
||||||
|
|
||||||
# Check the annotations for File() or Form()
|
|
||||||
for annotation in args[1:]:
|
|
||||||
if isinstance(annotation, (File, Form)):
|
|
||||||
return True
|
|
||||||
return False
|
|
||||||
|
|
@ -1,77 +0,0 @@
|
||||||
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
||||||
# All rights reserved.
|
|
||||||
#
|
|
||||||
# This source code is licensed under the terms described in the LICENSE file in
|
|
||||||
# the root directory of this source tree.
|
|
||||||
|
|
||||||
import dataclasses
|
|
||||||
from dataclasses import dataclass
|
|
||||||
from http import HTTPStatus
|
|
||||||
from typing import Callable, ClassVar, Dict, List, Optional, Tuple, Union
|
|
||||||
|
|
||||||
from .specification import (
|
|
||||||
Info,
|
|
||||||
SecurityScheme,
|
|
||||||
SecuritySchemeAPI,
|
|
||||||
SecuritySchemeHTTP,
|
|
||||||
SecuritySchemeOpenIDConnect,
|
|
||||||
Server,
|
|
||||||
)
|
|
||||||
|
|
||||||
HTTPStatusCode = Union[HTTPStatus, int, str]
|
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
|
||||||
class Options:
|
|
||||||
"""
|
|
||||||
:param server: Base URL for the API endpoint.
|
|
||||||
:param info: Meta-information for the endpoint specification.
|
|
||||||
:param version: OpenAPI specification version as a tuple of major, minor, revision.
|
|
||||||
:param default_security_scheme: Security scheme to apply to endpoints, unless overridden on a per-endpoint basis.
|
|
||||||
:param extra_types: Extra types in addition to those found in operation signatures. Use a dictionary to group related types.
|
|
||||||
:param use_examples: Whether to emit examples for operations.
|
|
||||||
:param success_responses: Associates operation response types with HTTP status codes.
|
|
||||||
:param error_responses: Associates error response types with HTTP status codes.
|
|
||||||
:param error_wrapper: True if errors are encapsulated in an error object wrapper.
|
|
||||||
:param property_description_fun: Custom transformation function to apply to class property documentation strings.
|
|
||||||
:param captions: User-defined captions for sections such as "Operations" or "Types", and (if applicable) groups of extra types.
|
|
||||||
:param include_standard_error_responses: Whether to include standard error responses (400, 429, 500, 503) in all operations.
|
|
||||||
"""
|
|
||||||
|
|
||||||
server: Server
|
|
||||||
info: Info
|
|
||||||
version: Tuple[int, int, int] = (3, 1, 0)
|
|
||||||
default_security_scheme: Optional[SecurityScheme] = None
|
|
||||||
extra_types: Union[List[type], Dict[str, List[type]], None] = None
|
|
||||||
use_examples: bool = True
|
|
||||||
success_responses: Dict[type, HTTPStatusCode] = dataclasses.field(
|
|
||||||
default_factory=dict
|
|
||||||
)
|
|
||||||
error_responses: Dict[type, HTTPStatusCode] = dataclasses.field(
|
|
||||||
default_factory=dict
|
|
||||||
)
|
|
||||||
error_wrapper: bool = False
|
|
||||||
property_description_fun: Optional[Callable[[type, str, str], str]] = None
|
|
||||||
captions: Optional[Dict[str, str]] = None
|
|
||||||
include_standard_error_responses: bool = True
|
|
||||||
|
|
||||||
default_captions: ClassVar[Dict[str, str]] = {
|
|
||||||
"Operations": "Operations",
|
|
||||||
"Types": "Types",
|
|
||||||
"Events": "Events",
|
|
||||||
"AdditionalTypes": "Additional types",
|
|
||||||
}
|
|
||||||
|
|
||||||
def map(self, id: str) -> str:
|
|
||||||
"Maps a language-neutral placeholder string to language-dependent text."
|
|
||||||
|
|
||||||
if self.captions is not None:
|
|
||||||
caption = self.captions.get(id)
|
|
||||||
if caption is not None:
|
|
||||||
return caption
|
|
||||||
|
|
||||||
caption = self.__class__.default_captions.get(id)
|
|
||||||
if caption is not None:
|
|
||||||
return caption
|
|
||||||
|
|
||||||
raise KeyError(f"no caption found for ID: {id}")
|
|
||||||
|
|
@ -1,259 +0,0 @@
|
||||||
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
||||||
# All rights reserved.
|
|
||||||
#
|
|
||||||
# This source code is licensed under the terms described in the LICENSE file in
|
|
||||||
# the root directory of this source tree.
|
|
||||||
|
|
||||||
import dataclasses
|
|
||||||
import enum
|
|
||||||
from dataclasses import dataclass
|
|
||||||
from typing import Any, ClassVar, Dict, List, Optional, Union
|
|
||||||
|
|
||||||
from llama_stack.strong_typing.schema import JsonType, Schema, StrictJsonType
|
|
||||||
|
|
||||||
URL = str
|
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
|
||||||
class Ref:
|
|
||||||
ref_type: ClassVar[str]
|
|
||||||
id: str
|
|
||||||
|
|
||||||
def to_json(self) -> StrictJsonType:
|
|
||||||
return {"$ref": f"#/components/{self.ref_type}/{self.id}"}
|
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
|
||||||
class SchemaRef(Ref):
|
|
||||||
ref_type: ClassVar[str] = "schemas"
|
|
||||||
|
|
||||||
|
|
||||||
SchemaOrRef = Union[Schema, SchemaRef]
|
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
|
||||||
class ResponseRef(Ref):
|
|
||||||
ref_type: ClassVar[str] = "responses"
|
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
|
||||||
class ParameterRef(Ref):
|
|
||||||
ref_type: ClassVar[str] = "parameters"
|
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
|
||||||
class ExampleRef(Ref):
|
|
||||||
ref_type: ClassVar[str] = "examples"
|
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
|
||||||
class Contact:
|
|
||||||
name: Optional[str] = None
|
|
||||||
url: Optional[URL] = None
|
|
||||||
email: Optional[str] = None
|
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
|
||||||
class License:
|
|
||||||
name: str
|
|
||||||
url: Optional[URL] = None
|
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
|
||||||
class Info:
|
|
||||||
title: str
|
|
||||||
version: str
|
|
||||||
description: Optional[str] = None
|
|
||||||
termsOfService: Optional[str] = None
|
|
||||||
contact: Optional[Contact] = None
|
|
||||||
license: Optional[License] = None
|
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
|
||||||
class MediaType:
|
|
||||||
schema: Optional[SchemaOrRef] = None
|
|
||||||
example: Optional[Any] = None
|
|
||||||
examples: Optional[Dict[str, Union["Example", ExampleRef]]] = None
|
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
|
||||||
class RequestBody:
|
|
||||||
content: Dict[str, MediaType | Dict[str, Any]]
|
|
||||||
description: Optional[str] = None
|
|
||||||
required: Optional[bool] = None
|
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
|
||||||
class Response:
|
|
||||||
description: str
|
|
||||||
content: Optional[Dict[str, MediaType]] = None
|
|
||||||
|
|
||||||
|
|
||||||
class ParameterLocation(enum.Enum):
|
|
||||||
Query = "query"
|
|
||||||
Header = "header"
|
|
||||||
Path = "path"
|
|
||||||
Cookie = "cookie"
|
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
|
||||||
class Parameter:
|
|
||||||
name: str
|
|
||||||
in_: ParameterLocation
|
|
||||||
description: Optional[str] = None
|
|
||||||
required: Optional[bool] = None
|
|
||||||
schema: Optional[SchemaOrRef] = None
|
|
||||||
example: Optional[Any] = None
|
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
|
||||||
class Operation:
|
|
||||||
responses: Dict[str, Union[Response, ResponseRef]]
|
|
||||||
tags: Optional[List[str]] = None
|
|
||||||
summary: Optional[str] = None
|
|
||||||
description: Optional[str] = None
|
|
||||||
operationId: Optional[str] = None
|
|
||||||
parameters: Optional[List[Parameter]] = None
|
|
||||||
requestBody: Optional[RequestBody] = None
|
|
||||||
callbacks: Optional[Dict[str, "Callback"]] = None
|
|
||||||
security: Optional[List["SecurityRequirement"]] = None
|
|
||||||
deprecated: Optional[bool] = None
|
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
|
||||||
class PathItem:
|
|
||||||
summary: Optional[str] = None
|
|
||||||
description: Optional[str] = None
|
|
||||||
get: Optional[Operation] = None
|
|
||||||
put: Optional[Operation] = None
|
|
||||||
post: Optional[Operation] = None
|
|
||||||
delete: Optional[Operation] = None
|
|
||||||
options: Optional[Operation] = None
|
|
||||||
head: Optional[Operation] = None
|
|
||||||
patch: Optional[Operation] = None
|
|
||||||
trace: Optional[Operation] = None
|
|
||||||
|
|
||||||
def update(self, other: "PathItem") -> None:
|
|
||||||
"Merges another instance of this class into this object."
|
|
||||||
|
|
||||||
for field in dataclasses.fields(self.__class__):
|
|
||||||
value = getattr(other, field.name)
|
|
||||||
if value is not None:
|
|
||||||
setattr(self, field.name, value)
|
|
||||||
|
|
||||||
|
|
||||||
# maps run-time expressions such as "$request.body#/url" to path items
|
|
||||||
Callback = Dict[str, PathItem]
|
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
|
||||||
class Example:
|
|
||||||
summary: Optional[str] = None
|
|
||||||
description: Optional[str] = None
|
|
||||||
value: Optional[Any] = None
|
|
||||||
externalValue: Optional[URL] = None
|
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
|
||||||
class Server:
|
|
||||||
url: URL
|
|
||||||
description: Optional[str] = None
|
|
||||||
|
|
||||||
|
|
||||||
class SecuritySchemeType(enum.Enum):
|
|
||||||
ApiKey = "apiKey"
|
|
||||||
HTTP = "http"
|
|
||||||
OAuth2 = "oauth2"
|
|
||||||
OpenIDConnect = "openIdConnect"
|
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
|
||||||
class SecurityScheme:
|
|
||||||
type: SecuritySchemeType
|
|
||||||
description: str
|
|
||||||
|
|
||||||
|
|
||||||
@dataclass(init=False)
|
|
||||||
class SecuritySchemeAPI(SecurityScheme):
|
|
||||||
name: str
|
|
||||||
in_: ParameterLocation
|
|
||||||
|
|
||||||
def __init__(self, description: str, name: str, in_: ParameterLocation) -> None:
|
|
||||||
super().__init__(SecuritySchemeType.ApiKey, description)
|
|
||||||
self.name = name
|
|
||||||
self.in_ = in_
|
|
||||||
|
|
||||||
|
|
||||||
@dataclass(init=False)
|
|
||||||
class SecuritySchemeHTTP(SecurityScheme):
|
|
||||||
scheme: str
|
|
||||||
bearerFormat: Optional[str] = None
|
|
||||||
|
|
||||||
def __init__(
|
|
||||||
self, description: str, scheme: str, bearerFormat: Optional[str] = None
|
|
||||||
) -> None:
|
|
||||||
super().__init__(SecuritySchemeType.HTTP, description)
|
|
||||||
self.scheme = scheme
|
|
||||||
self.bearerFormat = bearerFormat
|
|
||||||
|
|
||||||
|
|
||||||
@dataclass(init=False)
|
|
||||||
class SecuritySchemeOpenIDConnect(SecurityScheme):
|
|
||||||
openIdConnectUrl: str
|
|
||||||
|
|
||||||
def __init__(self, description: str, openIdConnectUrl: str) -> None:
|
|
||||||
super().__init__(SecuritySchemeType.OpenIDConnect, description)
|
|
||||||
self.openIdConnectUrl = openIdConnectUrl
|
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
|
||||||
class Components:
|
|
||||||
schemas: Optional[Dict[str, Schema]] = None
|
|
||||||
responses: Optional[Dict[str, Response]] = None
|
|
||||||
parameters: Optional[Dict[str, Parameter]] = None
|
|
||||||
examples: Optional[Dict[str, Example]] = None
|
|
||||||
requestBodies: Optional[Dict[str, RequestBody]] = None
|
|
||||||
securitySchemes: Optional[Dict[str, SecurityScheme]] = None
|
|
||||||
callbacks: Optional[Dict[str, Callback]] = None
|
|
||||||
|
|
||||||
|
|
||||||
SecurityScope = str
|
|
||||||
SecurityRequirement = Dict[str, List[SecurityScope]]
|
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
|
||||||
class Tag:
|
|
||||||
name: str
|
|
||||||
description: Optional[str] = None
|
|
||||||
displayName: Optional[str] = None
|
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
|
||||||
class TagGroup:
|
|
||||||
"""
|
|
||||||
A ReDoc extension to provide information about groups of tags.
|
|
||||||
|
|
||||||
Exposed via the vendor-specific property "x-tagGroups" of the top-level object.
|
|
||||||
"""
|
|
||||||
|
|
||||||
name: str
|
|
||||||
tags: List[str]
|
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
|
||||||
class Document:
|
|
||||||
"""
|
|
||||||
This class is a Python dataclass adaptation of the OpenAPI Specification.
|
|
||||||
|
|
||||||
For details, see <https://swagger.io/specification/>
|
|
||||||
"""
|
|
||||||
|
|
||||||
openapi: str
|
|
||||||
info: Info
|
|
||||||
servers: List[Server]
|
|
||||||
paths: Dict[str, PathItem]
|
|
||||||
jsonSchemaDialect: Optional[str] = None
|
|
||||||
components: Optional[Components] = None
|
|
||||||
security: Optional[List[SecurityRequirement]] = None
|
|
||||||
tags: Optional[List[Tag]] = None
|
|
||||||
tagGroups: Optional[List[TagGroup]] = None
|
|
||||||
|
|
@ -1,41 +0,0 @@
|
||||||
<!DOCTYPE html>
|
|
||||||
<html>
|
|
||||||
|
|
||||||
<head>
|
|
||||||
<meta charset="utf-8" />
|
|
||||||
<meta name="viewport" content="width=device-width, initial-scale=1">
|
|
||||||
<title>OpenAPI specification</title>
|
|
||||||
<link href="https://fonts.googleapis.com/css?family=Montserrat:300,400,700|Roboto:300,400,700" rel="stylesheet">
|
|
||||||
<script type="module" src="https://cdn.jsdelivr.net/npm/@stoplight/elements/web-components.min.js"></script>
|
|
||||||
<link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/@stoplight/elements/styles.min.css">
|
|
||||||
<style>
|
|
||||||
body {
|
|
||||||
margin: 0;
|
|
||||||
padding: 0;
|
|
||||||
height: 100vh;
|
|
||||||
}
|
|
||||||
|
|
||||||
elements-api {
|
|
||||||
height: 100%;
|
|
||||||
}
|
|
||||||
</style>
|
|
||||||
</head>
|
|
||||||
|
|
||||||
<body>
|
|
||||||
<elements-api id="openapi-container" router="hash" layout="sidebar" hideExport="true"
|
|
||||||
hideInternal="true"></elements-api>
|
|
||||||
|
|
||||||
<script>
|
|
||||||
document.addEventListener("DOMContentLoaded", function () {
|
|
||||||
const spec = { /* OPENAPI_SPECIFICATION */ };
|
|
||||||
const element = document.getElementById("openapi-container");
|
|
||||||
element.apiDescriptionDocument = spec;
|
|
||||||
|
|
||||||
if (spec.info && spec.info.title) {
|
|
||||||
document.title = spec.info.title;
|
|
||||||
}
|
|
||||||
});
|
|
||||||
</script>
|
|
||||||
</body>
|
|
||||||
|
|
||||||
</html>
|
|
||||||
|
|
@ -1,268 +0,0 @@
|
||||||
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
||||||
# All rights reserved.
|
|
||||||
#
|
|
||||||
# This source code is licensed under the terms described in the LICENSE file in
|
|
||||||
# the root directory of this source tree.
|
|
||||||
|
|
||||||
import json
|
|
||||||
import typing
|
|
||||||
import inspect
|
|
||||||
from pathlib import Path
|
|
||||||
from typing import TextIO
|
|
||||||
from typing import Any, List, Optional, Union, get_type_hints, get_origin, get_args
|
|
||||||
|
|
||||||
from llama_stack.strong_typing.schema import object_to_json, StrictJsonType
|
|
||||||
from llama_stack.distribution.resolver import api_protocol_map
|
|
||||||
|
|
||||||
from .generator import Generator
|
|
||||||
from .options import Options
|
|
||||||
from .specification import Document
|
|
||||||
|
|
||||||
THIS_DIR = Path(__file__).parent
|
|
||||||
|
|
||||||
|
|
||||||
class Specification:
|
|
||||||
document: Document
|
|
||||||
|
|
||||||
def __init__(self, endpoint: type, options: Options):
|
|
||||||
generator = Generator(endpoint, options)
|
|
||||||
self.document = generator.generate()
|
|
||||||
|
|
||||||
def get_json(self) -> StrictJsonType:
|
|
||||||
"""
|
|
||||||
Returns the OpenAPI specification as a Python data type (e.g. `dict` for an object, `list` for an array).
|
|
||||||
|
|
||||||
The result can be serialized to a JSON string with `json.dump` or `json.dumps`.
|
|
||||||
"""
|
|
||||||
|
|
||||||
json_doc = typing.cast(StrictJsonType, object_to_json(self.document))
|
|
||||||
|
|
||||||
if isinstance(json_doc, dict):
|
|
||||||
# rename vendor-specific properties
|
|
||||||
tag_groups = json_doc.pop("tagGroups", None)
|
|
||||||
if tag_groups:
|
|
||||||
json_doc["x-tagGroups"] = tag_groups
|
|
||||||
tags = json_doc.get("tags")
|
|
||||||
if tags and isinstance(tags, list):
|
|
||||||
for tag in tags:
|
|
||||||
if not isinstance(tag, dict):
|
|
||||||
continue
|
|
||||||
|
|
||||||
display_name = tag.pop("displayName", None)
|
|
||||||
if display_name:
|
|
||||||
tag["x-displayName"] = display_name
|
|
||||||
|
|
||||||
return json_doc
|
|
||||||
|
|
||||||
def get_json_string(self, pretty_print: bool = False) -> str:
|
|
||||||
"""
|
|
||||||
Returns the OpenAPI specification as a JSON string.
|
|
||||||
|
|
||||||
:param pretty_print: Whether to use line indents to beautify the output.
|
|
||||||
"""
|
|
||||||
|
|
||||||
json_doc = self.get_json()
|
|
||||||
if pretty_print:
|
|
||||||
return json.dumps(
|
|
||||||
json_doc, check_circular=False, ensure_ascii=False, indent=4
|
|
||||||
)
|
|
||||||
else:
|
|
||||||
return json.dumps(
|
|
||||||
json_doc,
|
|
||||||
check_circular=False,
|
|
||||||
ensure_ascii=False,
|
|
||||||
separators=(",", ":"),
|
|
||||||
)
|
|
||||||
|
|
||||||
def write_json(self, f: TextIO, pretty_print: bool = False) -> None:
|
|
||||||
"""
|
|
||||||
Writes the OpenAPI specification to a file as a JSON string.
|
|
||||||
|
|
||||||
:param pretty_print: Whether to use line indents to beautify the output.
|
|
||||||
"""
|
|
||||||
|
|
||||||
json_doc = self.get_json()
|
|
||||||
if pretty_print:
|
|
||||||
json.dump(
|
|
||||||
json_doc,
|
|
||||||
f,
|
|
||||||
check_circular=False,
|
|
||||||
ensure_ascii=False,
|
|
||||||
indent=4,
|
|
||||||
)
|
|
||||||
else:
|
|
||||||
json.dump(
|
|
||||||
json_doc,
|
|
||||||
f,
|
|
||||||
check_circular=False,
|
|
||||||
ensure_ascii=False,
|
|
||||||
separators=(",", ":"),
|
|
||||||
)
|
|
||||||
|
|
||||||
def write_html(self, f: TextIO, pretty_print: bool = False) -> None:
|
|
||||||
"""
|
|
||||||
Creates a stand-alone HTML page for the OpenAPI specification with ReDoc.
|
|
||||||
|
|
||||||
:param pretty_print: Whether to use line indents to beautify the JSON string in the HTML file.
|
|
||||||
"""
|
|
||||||
|
|
||||||
path = THIS_DIR / "template.html"
|
|
||||||
with path.open(encoding="utf-8", errors="strict") as html_template_file:
|
|
||||||
html_template = html_template_file.read()
|
|
||||||
|
|
||||||
html = html_template.replace(
|
|
||||||
"{ /* OPENAPI_SPECIFICATION */ }",
|
|
||||||
self.get_json_string(pretty_print=pretty_print),
|
|
||||||
)
|
|
||||||
|
|
||||||
f.write(html)
|
|
||||||
|
|
||||||
def is_optional_type(type_: Any) -> bool:
|
|
||||||
"""Check if a type is Optional."""
|
|
||||||
origin = get_origin(type_)
|
|
||||||
args = get_args(type_)
|
|
||||||
return origin is Optional or (origin is Union and type(None) in args)
|
|
||||||
|
|
||||||
|
|
||||||
def _validate_api_method_return_type(method) -> str | None:
|
|
||||||
hints = get_type_hints(method)
|
|
||||||
|
|
||||||
if 'return' not in hints:
|
|
||||||
return "has no return type annotation"
|
|
||||||
|
|
||||||
return_type = hints['return']
|
|
||||||
if is_optional_type(return_type):
|
|
||||||
return "returns Optional type where a return value is mandatory"
|
|
||||||
|
|
||||||
|
|
||||||
def _validate_api_method_doesnt_return_list(method) -> str | None:
|
|
||||||
hints = get_type_hints(method)
|
|
||||||
|
|
||||||
if 'return' not in hints:
|
|
||||||
return "has no return type annotation"
|
|
||||||
|
|
||||||
return_type = hints['return']
|
|
||||||
if get_origin(return_type) is list:
|
|
||||||
return "returns a list where a PaginatedResponse or List*Response object is expected"
|
|
||||||
|
|
||||||
|
|
||||||
def _validate_api_delete_method_returns_none(method) -> str | None:
|
|
||||||
hints = get_type_hints(method)
|
|
||||||
|
|
||||||
if 'return' not in hints:
|
|
||||||
return "has no return type annotation"
|
|
||||||
|
|
||||||
return_type = hints['return']
|
|
||||||
|
|
||||||
# Allow OpenAI endpoints to return response objects since they follow OpenAI specification
|
|
||||||
method_name = getattr(method, '__name__', '')
|
|
||||||
if method_name.startswith('openai_'):
|
|
||||||
return None
|
|
||||||
|
|
||||||
if return_type is not None and return_type is not type(None):
|
|
||||||
return "does not return None where None is mandatory"
|
|
||||||
|
|
||||||
|
|
||||||
def _validate_list_parameters_contain_data(method) -> str | None:
|
|
||||||
hints = get_type_hints(method)
|
|
||||||
|
|
||||||
if 'return' not in hints:
|
|
||||||
return "has no return type annotation"
|
|
||||||
|
|
||||||
return_type = hints['return']
|
|
||||||
if not inspect.isclass(return_type):
|
|
||||||
return
|
|
||||||
|
|
||||||
if not return_type.__name__.startswith('List'):
|
|
||||||
return
|
|
||||||
|
|
||||||
if 'data' not in return_type.model_fields:
|
|
||||||
return "does not have a mandatory data attribute containing the list of objects"
|
|
||||||
|
|
||||||
|
|
||||||
def _validate_has_ellipsis(method) -> str | None:
|
|
||||||
source = inspect.getsource(method)
|
|
||||||
if "..." not in source and not "NotImplementedError" in source:
|
|
||||||
return "does not contain ellipsis (...) in its implementation"
|
|
||||||
|
|
||||||
def _validate_has_return_in_docstring(method) -> str | None:
|
|
||||||
source = inspect.getsource(method)
|
|
||||||
return_type = method.__annotations__.get('return')
|
|
||||||
if return_type is not None and return_type != type(None) and ":returns:" not in source:
|
|
||||||
return "does not have a ':returns:' in its docstring"
|
|
||||||
|
|
||||||
def _validate_has_params_in_docstring(method) -> str | None:
|
|
||||||
source = inspect.getsource(method)
|
|
||||||
sig = inspect.signature(method)
|
|
||||||
# Only check if the method has more than one parameter
|
|
||||||
if len(sig.parameters) > 1 and ":param" not in source:
|
|
||||||
return "does not have a ':param' in its docstring"
|
|
||||||
|
|
||||||
def _validate_has_no_return_none_in_docstring(method) -> str | None:
|
|
||||||
source = inspect.getsource(method)
|
|
||||||
return_type = method.__annotations__.get('return')
|
|
||||||
if return_type is None and ":returns: None" in source:
|
|
||||||
return "has a ':returns: None' in its docstring which is redundant for None-returning functions"
|
|
||||||
|
|
||||||
def _validate_docstring_lines_end_with_dot(method) -> str | None:
|
|
||||||
docstring = inspect.getdoc(method)
|
|
||||||
if docstring is None:
|
|
||||||
return None
|
|
||||||
|
|
||||||
lines = docstring.split('\n')
|
|
||||||
for line in lines:
|
|
||||||
line = line.strip()
|
|
||||||
if line and not any(line.endswith(char) for char in '.:{}[]()",'):
|
|
||||||
return f"docstring line '{line}' does not end with a valid character: . : {{ }} [ ] ( ) , \""
|
|
||||||
|
|
||||||
_VALIDATORS = {
|
|
||||||
"GET": [
|
|
||||||
_validate_api_method_return_type,
|
|
||||||
_validate_list_parameters_contain_data,
|
|
||||||
_validate_api_method_doesnt_return_list,
|
|
||||||
_validate_has_ellipsis,
|
|
||||||
_validate_has_return_in_docstring,
|
|
||||||
_validate_has_params_in_docstring,
|
|
||||||
_validate_docstring_lines_end_with_dot,
|
|
||||||
],
|
|
||||||
"DELETE": [
|
|
||||||
_validate_api_delete_method_returns_none,
|
|
||||||
_validate_has_ellipsis,
|
|
||||||
_validate_has_return_in_docstring,
|
|
||||||
_validate_has_params_in_docstring,
|
|
||||||
_validate_has_no_return_none_in_docstring
|
|
||||||
],
|
|
||||||
"POST": [
|
|
||||||
_validate_has_ellipsis,
|
|
||||||
_validate_has_return_in_docstring,
|
|
||||||
_validate_has_params_in_docstring,
|
|
||||||
_validate_has_no_return_none_in_docstring,
|
|
||||||
_validate_docstring_lines_end_with_dot,
|
|
||||||
],
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
def _get_methods_by_type(protocol, method_type: str):
|
|
||||||
members = inspect.getmembers(protocol, predicate=inspect.isfunction)
|
|
||||||
return {
|
|
||||||
method_name: method
|
|
||||||
for method_name, method in members
|
|
||||||
if (webmethod := getattr(method, '__webmethod__', None))
|
|
||||||
if webmethod and webmethod.method == method_type
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
def validate_api() -> List[str]:
|
|
||||||
"""Validate the API protocols."""
|
|
||||||
errors = []
|
|
||||||
protocols = api_protocol_map()
|
|
||||||
|
|
||||||
for target, validators in _VALIDATORS.items():
|
|
||||||
for protocol_name, protocol in protocols.items():
|
|
||||||
for validator in validators:
|
|
||||||
for method_name, method in _get_methods_by_type(protocol, target).items():
|
|
||||||
err = validator(method)
|
|
||||||
if err:
|
|
||||||
errors.append(f"Method {protocol_name}.{method_name} {err}")
|
|
||||||
|
|
||||||
return errors
|
|
||||||
|
|
@ -1,32 +0,0 @@
|
||||||
#!/bin/bash
|
|
||||||
|
|
||||||
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
||||||
# All rights reserved.
|
|
||||||
#
|
|
||||||
# This source code is licensed under the terms described in the LICENSE file in
|
|
||||||
# the root directory of this source tree.
|
|
||||||
|
|
||||||
PYTHONPATH=${PYTHONPATH:-}
|
|
||||||
THIS_DIR="$(cd "$(dirname "$(readlink -f "${BASH_SOURCE[0]}")")" && pwd)"
|
|
||||||
|
|
||||||
set -euo pipefail
|
|
||||||
|
|
||||||
missing_packages=()
|
|
||||||
|
|
||||||
check_package() {
|
|
||||||
if ! pip show "$1" &>/dev/null; then
|
|
||||||
missing_packages+=("$1")
|
|
||||||
fi
|
|
||||||
}
|
|
||||||
|
|
||||||
if [ ${#missing_packages[@]} -ne 0 ]; then
|
|
||||||
echo "Error: The following package(s) are not installed:"
|
|
||||||
printf " - %s\n" "${missing_packages[@]}"
|
|
||||||
echo "Please install them using:"
|
|
||||||
echo "pip install ${missing_packages[*]}"
|
|
||||||
exit 1
|
|
||||||
fi
|
|
||||||
|
|
||||||
stack_dir=$(dirname $(dirname $THIS_DIR))
|
|
||||||
PYTHONPATH=$PYTHONPATH:$stack_dir \
|
|
||||||
python -m docs.openapi_generator.generate $(dirname $THIS_DIR)/_static
|
|
||||||
|
|
@ -1,19 +0,0 @@
|
||||||
# Llama Stack Documentation
|
|
||||||
|
|
||||||
Here's a collection of comprehensive guides, examples, and resources for building AI applications with Llama Stack. For the complete documentation, visit our [ReadTheDocs page](https://llama-stack.readthedocs.io/en/latest/index.html).
|
|
||||||
|
|
||||||
## Render locally
|
|
||||||
|
|
||||||
From the llama-stack root directory, run the following command to render the docs locally:
|
|
||||||
```bash
|
|
||||||
uv run --group docs sphinx-autobuild docs/source docs/build/html --write-all
|
|
||||||
```
|
|
||||||
You can open up the docs in your browser at http://localhost:8000
|
|
||||||
|
|
||||||
## Content
|
|
||||||
|
|
||||||
Try out Llama Stack's capabilities through our detailed Jupyter notebooks:
|
|
||||||
|
|
||||||
* [Building AI Applications Notebook](./getting_started.ipynb) - A comprehensive guide to building production-ready AI applications using Llama Stack
|
|
||||||
* [Benchmark Evaluations Notebook](./notebooks/Llama_Stack_Benchmark_Evals.ipynb) - Detailed performance evaluations and benchmarking results
|
|
||||||
* [Zero-to-Hero Guide](./zero_to_hero_guide) - Step-by-step guide for getting started with Llama Stack
|
|
||||||
|
Before Width: | Height: | Size: 128 KiB |
|
Before Width: | Height: | Size: 220 KiB |
|
Before Width: | Height: | Size: 71 KiB |
|
Before Width: | Height: | Size: 17 KiB |
|
Before Width: | Height: | Size: 170 KiB |
|
|
@ -1,92 +0,0 @@
|
||||||
# Agents
|
|
||||||
|
|
||||||
An Agent in Llama Stack is a powerful abstraction that allows you to build complex AI applications.
|
|
||||||
|
|
||||||
The Llama Stack agent framework is built on a modular architecture that allows for flexible and powerful AI
|
|
||||||
applications. This document explains the key components and how they work together.
|
|
||||||
|
|
||||||
## Core Concepts
|
|
||||||
|
|
||||||
### 1. Agent Configuration
|
|
||||||
|
|
||||||
Agents are configured using the `AgentConfig` class, which includes:
|
|
||||||
|
|
||||||
- **Model**: The underlying LLM to power the agent
|
|
||||||
- **Instructions**: System prompt that defines the agent's behavior
|
|
||||||
- **Tools**: Capabilities the agent can use to interact with external systems
|
|
||||||
- **Safety Shields**: Guardrails to ensure responsible AI behavior
|
|
||||||
|
|
||||||
```python
|
|
||||||
from llama_stack_client import Agent
|
|
||||||
|
|
||||||
|
|
||||||
# Create the agent
|
|
||||||
agent = Agent(
|
|
||||||
llama_stack_client,
|
|
||||||
model="meta-llama/Llama-3-70b-chat",
|
|
||||||
instructions="You are a helpful assistant that can use tools to answer questions.",
|
|
||||||
tools=["builtin::code_interpreter", "builtin::rag/knowledge_search"],
|
|
||||||
)
|
|
||||||
```
|
|
||||||
|
|
||||||
### 2. Sessions
|
|
||||||
|
|
||||||
Agents maintain state through sessions, which represent a conversation thread:
|
|
||||||
|
|
||||||
```python
|
|
||||||
# Create a session
|
|
||||||
session_id = agent.create_session(session_name="My conversation")
|
|
||||||
```
|
|
||||||
|
|
||||||
### 3. Turns
|
|
||||||
|
|
||||||
Each interaction with an agent is called a "turn" and consists of:
|
|
||||||
|
|
||||||
- **Input Messages**: What the user sends to the agent
|
|
||||||
- **Steps**: The agent's internal processing (inference, tool execution, etc.)
|
|
||||||
- **Output Message**: The agent's response
|
|
||||||
|
|
||||||
```python
|
|
||||||
from llama_stack_client import AgentEventLogger
|
|
||||||
|
|
||||||
# Create a turn with streaming response
|
|
||||||
turn_response = agent.create_turn(
|
|
||||||
session_id=session_id,
|
|
||||||
messages=[{"role": "user", "content": "Tell me about Llama models"}],
|
|
||||||
)
|
|
||||||
for log in AgentEventLogger().log(turn_response):
|
|
||||||
log.print()
|
|
||||||
```
|
|
||||||
### Non-Streaming
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
```python
|
|
||||||
from rich.pretty import pprint
|
|
||||||
|
|
||||||
# Non-streaming API
|
|
||||||
response = agent.create_turn(
|
|
||||||
session_id=session_id,
|
|
||||||
messages=[{"role": "user", "content": "Tell me about Llama models"}],
|
|
||||||
stream=False,
|
|
||||||
)
|
|
||||||
print("Inputs:")
|
|
||||||
pprint(response.input_messages)
|
|
||||||
print("Output:")
|
|
||||||
pprint(response.output_message.content)
|
|
||||||
print("Steps:")
|
|
||||||
pprint(response.steps)
|
|
||||||
```
|
|
||||||
|
|
||||||
### 4. Steps
|
|
||||||
|
|
||||||
Each turn consists of multiple steps that represent the agent's thought process:
|
|
||||||
|
|
||||||
- **Inference Steps**: The agent generating text responses
|
|
||||||
- **Tool Execution Steps**: The agent using tools to gather information
|
|
||||||
- **Shield Call Steps**: Safety checks being performed
|
|
||||||
|
|
||||||
## Agent Execution Loop
|
|
||||||
|
|
||||||
|
|
||||||
Refer to the [Agent Execution Loop](agent_execution_loop) for more details on what happens within an agent turn.
|
|
||||||
|
|
@ -1,139 +0,0 @@
|
||||||
## Agent Execution Loop
|
|
||||||
|
|
||||||
Agents are the heart of Llama Stack applications. They combine inference, memory, safety, and tool usage into coherent
|
|
||||||
workflows. At its core, an agent follows a sophisticated execution loop that enables multi-step reasoning, tool usage,
|
|
||||||
and safety checks.
|
|
||||||
|
|
||||||
### Steps in the Agent Workflow
|
|
||||||
|
|
||||||
Each agent turn follows these key steps:
|
|
||||||
|
|
||||||
1. **Initial Safety Check**: The user's input is first screened through configured safety shields
|
|
||||||
|
|
||||||
2. **Context Retrieval**:
|
|
||||||
- If RAG is enabled, the agent can choose to query relevant documents from memory banks. You can use the `instructions` field to steer the agent.
|
|
||||||
- For new documents, they are first inserted into the memory bank.
|
|
||||||
- Retrieved context is provided to the LLM as a tool response in the message history.
|
|
||||||
|
|
||||||
3. **Inference Loop**: The agent enters its main execution loop:
|
|
||||||
- The LLM receives a user prompt (with previous tool outputs)
|
|
||||||
- The LLM generates a response, potentially with [tool calls](tools)
|
|
||||||
- If tool calls are present:
|
|
||||||
- Tool inputs are safety-checked
|
|
||||||
- Tools are executed (e.g., web search, code execution)
|
|
||||||
- Tool responses are fed back to the LLM for synthesis
|
|
||||||
- The loop continues until:
|
|
||||||
- The LLM provides a final response without tool calls
|
|
||||||
- Maximum iterations are reached
|
|
||||||
- Token limit is exceeded
|
|
||||||
|
|
||||||
4. **Final Safety Check**: The agent's final response is screened through safety shields
|
|
||||||
|
|
||||||
```{mermaid}
|
|
||||||
sequenceDiagram
|
|
||||||
participant U as User
|
|
||||||
participant E as Executor
|
|
||||||
participant M as Memory Bank
|
|
||||||
participant L as LLM
|
|
||||||
participant T as Tools
|
|
||||||
participant S as Safety Shield
|
|
||||||
|
|
||||||
Note over U,S: Agent Turn Start
|
|
||||||
U->>S: 1. Submit Prompt
|
|
||||||
activate S
|
|
||||||
S->>E: Input Safety Check
|
|
||||||
deactivate S
|
|
||||||
|
|
||||||
loop Inference Loop
|
|
||||||
E->>L: 2.1 Augment with Context
|
|
||||||
L-->>E: 2.2 Response (with/without tool calls)
|
|
||||||
|
|
||||||
alt Has Tool Calls
|
|
||||||
E->>S: Check Tool Input
|
|
||||||
S->>T: 3.1 Execute Tool
|
|
||||||
T-->>E: 3.2 Tool Response
|
|
||||||
E->>L: 4.1 Tool Response
|
|
||||||
L-->>E: 4.2 Synthesized Response
|
|
||||||
end
|
|
||||||
|
|
||||||
opt Stop Conditions
|
|
||||||
Note over E: Break if:
|
|
||||||
Note over E: - No tool calls
|
|
||||||
Note over E: - Max iterations reached
|
|
||||||
Note over E: - Token limit exceeded
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
E->>S: Output Safety Check
|
|
||||||
S->>U: 5. Final Response
|
|
||||||
```
|
|
||||||
|
|
||||||
Each step in this process can be monitored and controlled through configurations.
|
|
||||||
|
|
||||||
### Agent Execution Loop Example
|
|
||||||
Here's an example that demonstrates monitoring the agent's execution:
|
|
||||||
|
|
||||||
```python
|
|
||||||
from llama_stack_client import LlamaStackClient, Agent, AgentEventLogger
|
|
||||||
from rich.pretty import pprint
|
|
||||||
|
|
||||||
# Replace host and port
|
|
||||||
client = LlamaStackClient(base_url=f"http://{HOST}:{PORT}")
|
|
||||||
|
|
||||||
agent = Agent(
|
|
||||||
client,
|
|
||||||
# Check with `llama-stack-client models list`
|
|
||||||
model="Llama3.2-3B-Instruct",
|
|
||||||
instructions="You are a helpful assistant",
|
|
||||||
# Enable both RAG and tool usage
|
|
||||||
tools=[
|
|
||||||
{
|
|
||||||
"name": "builtin::rag/knowledge_search",
|
|
||||||
"args": {"vector_db_ids": ["my_docs"]},
|
|
||||||
},
|
|
||||||
"builtin::code_interpreter",
|
|
||||||
],
|
|
||||||
# Configure safety (optional)
|
|
||||||
input_shields=["llama_guard"],
|
|
||||||
output_shields=["llama_guard"],
|
|
||||||
# Control the inference loop
|
|
||||||
max_infer_iters=5,
|
|
||||||
sampling_params={
|
|
||||||
"strategy": {"type": "top_p", "temperature": 0.7, "top_p": 0.95},
|
|
||||||
"max_tokens": 2048,
|
|
||||||
},
|
|
||||||
)
|
|
||||||
session_id = agent.create_session("monitored_session")
|
|
||||||
|
|
||||||
# Stream the agent's execution steps
|
|
||||||
response = agent.create_turn(
|
|
||||||
messages=[{"role": "user", "content": "Analyze this code and run it"}],
|
|
||||||
documents=[
|
|
||||||
{
|
|
||||||
"content": "https://raw.githubusercontent.com/example/code.py",
|
|
||||||
"mime_type": "text/plain",
|
|
||||||
}
|
|
||||||
],
|
|
||||||
session_id=session_id,
|
|
||||||
)
|
|
||||||
|
|
||||||
# Monitor each step of execution
|
|
||||||
for log in AgentEventLogger().log(response):
|
|
||||||
log.print()
|
|
||||||
|
|
||||||
# Using non-streaming API, the response contains input, steps, and output.
|
|
||||||
response = agent.create_turn(
|
|
||||||
messages=[{"role": "user", "content": "Analyze this code and run it"}],
|
|
||||||
documents=[
|
|
||||||
{
|
|
||||||
"content": "https://raw.githubusercontent.com/example/code.py",
|
|
||||||
"mime_type": "text/plain",
|
|
||||||
}
|
|
||||||
],
|
|
||||||
session_id=session_id,
|
|
||||||
)
|
|
||||||
|
|
||||||
pprint(f"Input: {response.input_messages}")
|
|
||||||
pprint(f"Output: {response.output_message.content}")
|
|
||||||
pprint(f"Steps: {response.steps}")
|
|
||||||
```
|
|
||||||
|
|
@ -1,125 +0,0 @@
|
||||||
# Evaluations
|
|
||||||
|
|
||||||
The Llama Stack provides a set of APIs in Llama Stack for supporting running evaluations of LLM applications.
|
|
||||||
- `/datasetio` + `/datasets` API
|
|
||||||
- `/scoring` + `/scoring_functions` API
|
|
||||||
- `/eval` + `/benchmarks` API
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
This guides walks you through the process of evaluating an LLM application built using Llama Stack. Checkout the [Evaluation Reference](../references/evals_reference/index.md) guide goes over the sets of APIs and developer experience flow of using Llama Stack to run evaluations for benchmark and application use cases. Checkout our Colab notebook on working examples with evaluations [here](https://colab.research.google.com/drive/10CHyykee9j2OigaIcRv47BKG9mrNm0tJ?usp=sharing).
|
|
||||||
|
|
||||||
|
|
||||||
## Application Evaluation
|
|
||||||
|
|
||||||
[](https://colab.research.google.com/github/meta-llama/llama-stack/blob/main/docs/getting_started.ipynb)
|
|
||||||
|
|
||||||
Llama Stack offers a library of scoring functions and the `/scoring` API, allowing you to run evaluations on your pre-annotated AI application datasets.
|
|
||||||
|
|
||||||
In this example, we will show you how to:
|
|
||||||
1. Build an Agent with Llama Stack
|
|
||||||
2. Query the agent's sessions, turns, and steps
|
|
||||||
3. Evaluate the results.
|
|
||||||
|
|
||||||
##### Building a Search Agent
|
|
||||||
```python
|
|
||||||
from llama_stack_client import LlamaStackClient, Agent, AgentEventLogger
|
|
||||||
|
|
||||||
client = LlamaStackClient(base_url=f"http://{HOST}:{PORT}")
|
|
||||||
|
|
||||||
agent = Agent(
|
|
||||||
client,
|
|
||||||
model="meta-llama/Llama-3.3-70B-Instruct",
|
|
||||||
instructions="You are a helpful assistant. Use search tool to answer the questions. ",
|
|
||||||
tools=["builtin::websearch"],
|
|
||||||
)
|
|
||||||
user_prompts = [
|
|
||||||
"Which teams played in the NBA Western Conference Finals of 2024. Search the web for the answer.",
|
|
||||||
"In which episode and season of South Park does Bill Cosby (BSM-471) first appear? Give me the number and title. Search the web for the answer.",
|
|
||||||
"What is the British-American kickboxer Andrew Tate's kickboxing name? Search the web for the answer.",
|
|
||||||
]
|
|
||||||
|
|
||||||
session_id = agent.create_session("test-session")
|
|
||||||
|
|
||||||
for prompt in user_prompts:
|
|
||||||
response = agent.create_turn(
|
|
||||||
messages=[
|
|
||||||
{
|
|
||||||
"role": "user",
|
|
||||||
"content": prompt,
|
|
||||||
}
|
|
||||||
],
|
|
||||||
session_id=session_id,
|
|
||||||
)
|
|
||||||
|
|
||||||
for log in AgentEventLogger().log(response):
|
|
||||||
log.print()
|
|
||||||
```
|
|
||||||
|
|
||||||
|
|
||||||
##### Query Agent Execution Steps
|
|
||||||
|
|
||||||
Now, let's look deeper into the agent's execution steps and see if how well our agent performs.
|
|
||||||
```python
|
|
||||||
# query the agents session
|
|
||||||
from rich.pretty import pprint
|
|
||||||
|
|
||||||
session_response = client.agents.session.retrieve(
|
|
||||||
session_id=session_id,
|
|
||||||
agent_id=agent.agent_id,
|
|
||||||
)
|
|
||||||
|
|
||||||
pprint(session_response)
|
|
||||||
```
|
|
||||||
|
|
||||||
As a sanity check, we will first check if all user prompts is followed by a tool call to `brave_search`.
|
|
||||||
```python
|
|
||||||
num_tool_call = 0
|
|
||||||
for turn in session_response.turns:
|
|
||||||
for step in turn.steps:
|
|
||||||
if (
|
|
||||||
step.step_type == "tool_execution"
|
|
||||||
and step.tool_calls[0].tool_name == "brave_search"
|
|
||||||
):
|
|
||||||
num_tool_call += 1
|
|
||||||
|
|
||||||
print(
|
|
||||||
f"{num_tool_call}/{len(session_response.turns)} user prompts are followed by a tool call to `brave_search`"
|
|
||||||
)
|
|
||||||
```
|
|
||||||
|
|
||||||
##### Evaluate Agent Responses
|
|
||||||
Now, we want to evaluate the agent's responses to the user prompts.
|
|
||||||
|
|
||||||
1. First, we will process the agent's execution history into a list of rows that can be used for evaluation.
|
|
||||||
2. Next, we will label the rows with the expected answer.
|
|
||||||
3. Finally, we will use the `/scoring` API to score the agent's responses.
|
|
||||||
|
|
||||||
```python
|
|
||||||
eval_rows = []
|
|
||||||
|
|
||||||
expected_answers = [
|
|
||||||
"Dallas Mavericks and the Minnesota Timberwolves",
|
|
||||||
"Season 4, Episode 12",
|
|
||||||
"King Cobra",
|
|
||||||
]
|
|
||||||
|
|
||||||
for i, turn in enumerate(session_response.turns):
|
|
||||||
eval_rows.append(
|
|
||||||
{
|
|
||||||
"input_query": turn.input_messages[0].content,
|
|
||||||
"generated_answer": turn.output_message.content,
|
|
||||||
"expected_answer": expected_answers[i],
|
|
||||||
}
|
|
||||||
)
|
|
||||||
|
|
||||||
pprint(eval_rows)
|
|
||||||
|
|
||||||
scoring_params = {
|
|
||||||
"basic::subset_of": None,
|
|
||||||
}
|
|
||||||
scoring_response = client.scoring.score(
|
|
||||||
input_rows=eval_rows, scoring_functions=scoring_params
|
|
||||||
)
|
|
||||||
pprint(scoring_response)
|
|
||||||
```
|
|
||||||
|
|
@ -1,30 +0,0 @@
|
||||||
# Building AI Applications (Examples)
|
|
||||||
|
|
||||||
Llama Stack provides all the building blocks needed to create sophisticated AI applications.
|
|
||||||
|
|
||||||
The best way to get started is to look at this notebook which walks through the various APIs (from basic inference, to RAG agents) and how to use them.
|
|
||||||
|
|
||||||
**Notebook**: [Building AI Applications](https://github.com/meta-llama/llama-stack/blob/main/docs/getting_started.ipynb)
|
|
||||||
|
|
||||||
Here are some key topics that will help you build effective agents:
|
|
||||||
|
|
||||||
- **[RAG (Retrieval-Augmented Generation)](rag)**: Learn how to enhance your agents with external knowledge through retrieval mechanisms.
|
|
||||||
- **[Agent](agent)**: Understand the components and design patterns of the Llama Stack agent framework.
|
|
||||||
- **[Agent Execution Loop](agent_execution_loop)**: Understand how agents process information, make decisions, and execute actions in a continuous loop.
|
|
||||||
- **[Tools](tools)**: Extend your agents' capabilities by integrating with external tools and APIs.
|
|
||||||
- **[Evals](evals)**: Evaluate your agents' effectiveness and identify areas for improvement.
|
|
||||||
- **[Telemetry](telemetry)**: Monitor and analyze your agents' performance and behavior.
|
|
||||||
- **[Safety](safety)**: Implement guardrails and safety measures to ensure responsible AI behavior.
|
|
||||||
|
|
||||||
```{toctree}
|
|
||||||
:hidden:
|
|
||||||
:maxdepth: 1
|
|
||||||
|
|
||||||
rag
|
|
||||||
agent
|
|
||||||
agent_execution_loop
|
|
||||||
tools
|
|
||||||
evals
|
|
||||||
telemetry
|
|
||||||
safety
|
|
||||||
```
|
|
||||||
|
|
@ -1,259 +0,0 @@
|
||||||
## Retrieval Augmented Generation (RAG)
|
|
||||||
|
|
||||||
RAG enables your applications to reference and recall information from previous interactions or external documents.
|
|
||||||
|
|
||||||
Llama Stack organizes the APIs that enable RAG into three layers:
|
|
||||||
1. The lowermost APIs deal with raw storage and retrieval. These include Vector IO, KeyValue IO (coming soon) and Relational IO (also coming soon.).
|
|
||||||
2. The next is the "Rag Tool", a first-class tool as part of the [Tools API](tools.md) that allows you to ingest documents (from URLs, files, etc) with various chunking strategies and query them smartly.
|
|
||||||
3. Finally, it all comes together with the top-level ["Agents" API](agent.md) that allows you to create agents that can use the tools to answer questions, perform tasks, and more.
|
|
||||||
|
|
||||||
<img src="rag.png" alt="RAG System" width="50%">
|
|
||||||
|
|
||||||
The RAG system uses lower-level storage for different types of data:
|
|
||||||
* **Vector IO**: For semantic search and retrieval
|
|
||||||
* **Key-Value and Relational IO**: For structured data storage
|
|
||||||
|
|
||||||
We may add more storage types like Graph IO in the future.
|
|
||||||
|
|
||||||
### Setting up Vector DBs
|
|
||||||
|
|
||||||
For this guide, we will use [Ollama](https://ollama.com/) as the inference provider.
|
|
||||||
Ollama is an LLM runtime that allows you to run Llama models locally.
|
|
||||||
|
|
||||||
Here's how to set up a vector database for RAG:
|
|
||||||
|
|
||||||
```python
|
|
||||||
# Create http client
|
|
||||||
import os
|
|
||||||
from llama_stack_client import LlamaStackClient
|
|
||||||
|
|
||||||
client = LlamaStackClient(base_url=f"http://localhost:{os.environ['LLAMA_STACK_PORT']}")
|
|
||||||
|
|
||||||
|
|
||||||
# Register a vector db
|
|
||||||
vector_db_id = "my_documents"
|
|
||||||
response = client.vector_dbs.register(
|
|
||||||
vector_db_id=vector_db_id,
|
|
||||||
embedding_model="all-MiniLM-L6-v2",
|
|
||||||
embedding_dimension=384,
|
|
||||||
provider_id="faiss",
|
|
||||||
)
|
|
||||||
```
|
|
||||||
|
|
||||||
### Ingesting Documents
|
|
||||||
You can ingest documents into the vector database using two methods: directly inserting pre-chunked
|
|
||||||
documents or using the RAG Tool.
|
|
||||||
```python
|
|
||||||
# You can insert a pre-chunked document directly into the vector db
|
|
||||||
chunks = [
|
|
||||||
{
|
|
||||||
"content": "Your document text here",
|
|
||||||
"mime_type": "text/plain",
|
|
||||||
"metadata": {
|
|
||||||
"document_id": "doc1",
|
|
||||||
"author": "Jane Doe",
|
|
||||||
},
|
|
||||||
},
|
|
||||||
]
|
|
||||||
client.vector_io.insert(vector_db_id=vector_db_id, chunks=chunks)
|
|
||||||
```
|
|
||||||
|
|
||||||
#### Using Precomputed Embeddings
|
|
||||||
If you decide to precompute embeddings for your documents, you can insert them directly into the vector database by
|
|
||||||
including the embedding vectors in the chunk data. This is useful if you have a separate embedding service or if you
|
|
||||||
want to customize the ingestion process.
|
|
||||||
```python
|
|
||||||
chunks_with_embeddings = [
|
|
||||||
{
|
|
||||||
"content": "First chunk of text",
|
|
||||||
"mime_type": "text/plain",
|
|
||||||
"embedding": [0.1, 0.2, 0.3, ...], # Your precomputed embedding vector
|
|
||||||
"metadata": {"document_id": "doc1", "section": "introduction"},
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"content": "Second chunk of text",
|
|
||||||
"mime_type": "text/plain",
|
|
||||||
"embedding": [0.2, 0.3, 0.4, ...], # Your precomputed embedding vector
|
|
||||||
"metadata": {"document_id": "doc1", "section": "methodology"},
|
|
||||||
},
|
|
||||||
]
|
|
||||||
client.vector_io.insert(vector_db_id=vector_db_id, chunks=chunks_with_embeddings)
|
|
||||||
```
|
|
||||||
When providing precomputed embeddings, ensure the embedding dimension matches the embedding_dimension specified when
|
|
||||||
registering the vector database.
|
|
||||||
|
|
||||||
### Retrieval
|
|
||||||
You can query the vector database to retrieve documents based on their embeddings.
|
|
||||||
```python
|
|
||||||
# You can then query for these chunks
|
|
||||||
chunks_response = client.vector_io.query(
|
|
||||||
vector_db_id=vector_db_id, query="What do you know about..."
|
|
||||||
)
|
|
||||||
```
|
|
||||||
|
|
||||||
### Using the RAG Tool
|
|
||||||
|
|
||||||
A better way to ingest documents is to use the RAG Tool. This tool allows you to ingest documents from URLs, files, etc.
|
|
||||||
and automatically chunks them into smaller pieces. More examples for how to format a RAGDocument can be found in the
|
|
||||||
[appendix](#more-ragdocument-examples).
|
|
||||||
|
|
||||||
```python
|
|
||||||
from llama_stack_client import RAGDocument
|
|
||||||
|
|
||||||
urls = ["memory_optimizations.rst", "chat.rst", "llama3.rst"]
|
|
||||||
documents = [
|
|
||||||
RAGDocument(
|
|
||||||
document_id=f"num-{i}",
|
|
||||||
content=f"https://raw.githubusercontent.com/pytorch/torchtune/main/docs/source/tutorials/{url}",
|
|
||||||
mime_type="text/plain",
|
|
||||||
metadata={},
|
|
||||||
)
|
|
||||||
for i, url in enumerate(urls)
|
|
||||||
]
|
|
||||||
|
|
||||||
client.tool_runtime.rag_tool.insert(
|
|
||||||
documents=documents,
|
|
||||||
vector_db_id=vector_db_id,
|
|
||||||
chunk_size_in_tokens=512,
|
|
||||||
)
|
|
||||||
|
|
||||||
# Query documents
|
|
||||||
results = client.tool_runtime.rag_tool.query(
|
|
||||||
vector_db_ids=[vector_db_id],
|
|
||||||
content="What do you know about...",
|
|
||||||
)
|
|
||||||
```
|
|
||||||
|
|
||||||
You can configure how the RAG tool adds metadata to the context if you find it useful for your application. Simply add:
|
|
||||||
```python
|
|
||||||
# Query documents
|
|
||||||
results = client.tool_runtime.rag_tool.query(
|
|
||||||
vector_db_ids=[vector_db_id],
|
|
||||||
content="What do you know about...",
|
|
||||||
query_config={
|
|
||||||
"chunk_template": "Result {index}\nContent: {chunk.content}\nMetadata: {metadata}\n",
|
|
||||||
},
|
|
||||||
)
|
|
||||||
```
|
|
||||||
### Building RAG-Enhanced Agents
|
|
||||||
|
|
||||||
One of the most powerful patterns is combining agents with RAG capabilities. Here's a complete example:
|
|
||||||
|
|
||||||
```python
|
|
||||||
from llama_stack_client import Agent
|
|
||||||
|
|
||||||
# Create agent with memory
|
|
||||||
agent = Agent(
|
|
||||||
client,
|
|
||||||
model="meta-llama/Llama-3.3-70B-Instruct",
|
|
||||||
instructions="You are a helpful assistant",
|
|
||||||
tools=[
|
|
||||||
{
|
|
||||||
"name": "builtin::rag/knowledge_search",
|
|
||||||
"args": {
|
|
||||||
"vector_db_ids": [vector_db_id],
|
|
||||||
# Defaults
|
|
||||||
"query_config": {
|
|
||||||
"chunk_size_in_tokens": 512,
|
|
||||||
"chunk_overlap_in_tokens": 0,
|
|
||||||
"chunk_template": "Result {index}\nContent: {chunk.content}\nMetadata: {metadata}\n",
|
|
||||||
},
|
|
||||||
},
|
|
||||||
}
|
|
||||||
],
|
|
||||||
)
|
|
||||||
session_id = agent.create_session("rag_session")
|
|
||||||
|
|
||||||
|
|
||||||
# Ask questions about documents in the vector db, and the agent will query the db to answer the question.
|
|
||||||
response = agent.create_turn(
|
|
||||||
messages=[{"role": "user", "content": "How to optimize memory in PyTorch?"}],
|
|
||||||
session_id=session_id,
|
|
||||||
)
|
|
||||||
```
|
|
||||||
|
|
||||||
> **NOTE:** the `instructions` field in the `AgentConfig` can be used to guide the agent's behavior. It is important to experiment with different instructions to see what works best for your use case.
|
|
||||||
|
|
||||||
|
|
||||||
You can also pass documents along with the user's message and ask questions about them.
|
|
||||||
```python
|
|
||||||
# Initial document ingestion
|
|
||||||
response = agent.create_turn(
|
|
||||||
messages=[
|
|
||||||
{"role": "user", "content": "I am providing some documents for reference."}
|
|
||||||
],
|
|
||||||
documents=[
|
|
||||||
{
|
|
||||||
"content": "https://raw.githubusercontent.com/pytorch/torchtune/main/docs/source/tutorials/memory_optimizations.rst",
|
|
||||||
"mime_type": "text/plain",
|
|
||||||
}
|
|
||||||
],
|
|
||||||
session_id=session_id,
|
|
||||||
)
|
|
||||||
|
|
||||||
# Query with RAG
|
|
||||||
response = agent.create_turn(
|
|
||||||
messages=[{"role": "user", "content": "What are the key topics in the documents?"}],
|
|
||||||
session_id=session_id,
|
|
||||||
)
|
|
||||||
```
|
|
||||||
|
|
||||||
You can print the response with below.
|
|
||||||
```python
|
|
||||||
from llama_stack_client import AgentEventLogger
|
|
||||||
|
|
||||||
for log in AgentEventLogger().log(response):
|
|
||||||
log.print()
|
|
||||||
```
|
|
||||||
|
|
||||||
### Unregistering Vector DBs
|
|
||||||
|
|
||||||
If you need to clean up and unregister vector databases, you can do so as follows:
|
|
||||||
|
|
||||||
```python
|
|
||||||
# Unregister a specified vector database
|
|
||||||
vector_db_id = "my_vector_db_id"
|
|
||||||
print(f"Unregistering vector database: {vector_db_id}")
|
|
||||||
client.vector_dbs.unregister(vector_db_id=vector_db_id)
|
|
||||||
|
|
||||||
|
|
||||||
# Unregister all vector databases
|
|
||||||
for vector_db_id in client.vector_dbs.list():
|
|
||||||
print(f"Unregistering vector database: {vector_db_id.identifier}")
|
|
||||||
client.vector_dbs.unregister(vector_db_id=vector_db_id.identifier)
|
|
||||||
```
|
|
||||||
|
|
||||||
### Appendix
|
|
||||||
|
|
||||||
#### More RAGDocument Examples
|
|
||||||
```python
|
|
||||||
from llama_stack_client import RAGDocument
|
|
||||||
import base64
|
|
||||||
|
|
||||||
RAGDocument(document_id="num-0", content={"uri": "file://path/to/file"})
|
|
||||||
RAGDocument(document_id="num-1", content="plain text")
|
|
||||||
RAGDocument(
|
|
||||||
document_id="num-2",
|
|
||||||
content={
|
|
||||||
"type": "text",
|
|
||||||
"text": "plain text input",
|
|
||||||
}, # for inputs that should be treated as text explicitly
|
|
||||||
)
|
|
||||||
RAGDocument(
|
|
||||||
document_id="num-3",
|
|
||||||
content={
|
|
||||||
"type": "image",
|
|
||||||
"image": {"url": {"uri": "https://mywebsite.com/image.jpg"}},
|
|
||||||
},
|
|
||||||
)
|
|
||||||
B64_ENCODED_IMAGE = base64.b64encode(
|
|
||||||
requests.get(
|
|
||||||
"https://raw.githubusercontent.com/meta-llama/llama-stack/refs/heads/main/docs/_static/llama-stack.png"
|
|
||||||
).content
|
|
||||||
)
|
|
||||||
RAGDocuemnt(
|
|
||||||
document_id="num-4",
|
|
||||||
content={"type": "image", "image": {"data": B64_ENCODED_IMAGE}},
|
|
||||||
)
|
|
||||||
```
|
|
||||||
for more strongly typed interaction use the typed dicts found [here](https://github.com/meta-llama/llama-stack-client-python/blob/38cd91c9e396f2be0bec1ee96a19771582ba6f17/src/llama_stack_client/types/shared_params/document.py).
|
|
||||||
|
Before Width: | Height: | Size: 145 KiB |
|
|
@ -1,17 +0,0 @@
|
||||||
## Safety Guardrails
|
|
||||||
|
|
||||||
Safety is a critical component of any AI application. Llama Stack provides a Shield system that can be applied at multiple touchpoints:
|
|
||||||
|
|
||||||
```python
|
|
||||||
# Register a safety shield
|
|
||||||
shield_id = "content_safety"
|
|
||||||
client.shields.register(shield_id=shield_id, provider_shield_id="llama-guard-basic")
|
|
||||||
|
|
||||||
# Run content through shield
|
|
||||||
response = client.safety.run_shield(
|
|
||||||
shield_id=shield_id, messages=[{"role": "user", "content": "User message here"}]
|
|
||||||
)
|
|
||||||
|
|
||||||
if response.violation:
|
|
||||||
print(f"Safety violation detected: {response.violation.user_message}")
|
|
||||||
```
|
|
||||||
|
|
@ -1,71 +0,0 @@
|
||||||
## Telemetry
|
|
||||||
|
|
||||||
The Llama Stack telemetry system provides comprehensive tracing, metrics, and logging capabilities. It supports multiple sink types including OpenTelemetry, SQLite, and Console output.
|
|
||||||
|
|
||||||
### Events
|
|
||||||
The telemetry system supports three main types of events:
|
|
||||||
|
|
||||||
- **Unstructured Log Events**: Free-form log messages with severity levels
|
|
||||||
```python
|
|
||||||
unstructured_log_event = UnstructuredLogEvent(
|
|
||||||
message="This is a log message", severity=LogSeverity.INFO
|
|
||||||
)
|
|
||||||
```
|
|
||||||
- **Metric Events**: Numerical measurements with units
|
|
||||||
```python
|
|
||||||
metric_event = MetricEvent(metric="my_metric", value=10, unit="count")
|
|
||||||
```
|
|
||||||
- **Structured Log Events**: System events like span start/end. Extensible to add more structured log types.
|
|
||||||
```python
|
|
||||||
structured_log_event = SpanStartPayload(name="my_span", parent_span_id="parent_span_id")
|
|
||||||
```
|
|
||||||
|
|
||||||
### Spans and Traces
|
|
||||||
- **Spans**: Represent operations with timing and hierarchical relationships
|
|
||||||
- **Traces**: Collection of related spans forming a complete request flow
|
|
||||||
|
|
||||||
### Sinks
|
|
||||||
- **OpenTelemetry**: Send events to an OpenTelemetry Collector. This is useful for visualizing traces in a tool like Jaeger.
|
|
||||||
- **SQLite**: Store events in a local SQLite database. This is needed if you want to query the events later through the Llama Stack API.
|
|
||||||
- **Console**: Print events to the console.
|
|
||||||
|
|
||||||
### Providers
|
|
||||||
|
|
||||||
#### Meta-Reference Provider
|
|
||||||
Currently, only the meta-reference provider is implemented. It can be configured to send events to three sink types:
|
|
||||||
1) OpenTelemetry Collector
|
|
||||||
2) SQLite
|
|
||||||
3) Console
|
|
||||||
|
|
||||||
#### Configuration
|
|
||||||
|
|
||||||
Here's an example that sends telemetry signals to all three sink types. Your configuration might use only one.
|
|
||||||
```yaml
|
|
||||||
telemetry:
|
|
||||||
- provider_id: meta-reference
|
|
||||||
provider_type: inline::meta-reference
|
|
||||||
config:
|
|
||||||
sinks: ['console', 'sqlite', 'otel_trace', 'otel_metric']
|
|
||||||
otel_trace_endpoint: "http://localhost:4318/v1/traces"
|
|
||||||
otel_metric_endpoint: "http://localhost:4318/v1/metrics"
|
|
||||||
sqlite_db_path: "/path/to/telemetry.db"
|
|
||||||
```
|
|
||||||
|
|
||||||
### Jaeger to visualize traces
|
|
||||||
|
|
||||||
The `otel` sink works with any service compatible with the OpenTelemetry collector, traces and metrics has two separate endpoints.
|
|
||||||
Let's use Jaeger to visualize this data.
|
|
||||||
|
|
||||||
Start a Jaeger instance with the OTLP HTTP endpoint at 4318 and the Jaeger UI at 16686 using the following command:
|
|
||||||
|
|
||||||
```bash
|
|
||||||
$ docker run --pull always --rm --name jaeger \
|
|
||||||
-p 16686:16686 -p 4318:4318 \
|
|
||||||
jaegertracing/jaeger:2.1.0
|
|
||||||
```
|
|
||||||
|
|
||||||
Once the Jaeger instance is running, you can visualize traces by navigating to http://localhost:16686/.
|
|
||||||
|
|
||||||
### Querying Traces Stored in SQLite
|
|
||||||
|
|
||||||
The `sqlite` sink allows you to query traces without an external system. Here are some example queries. Refer to the notebook at [Llama Stack Building AI Applications](https://github.com/meta-llama/llama-stack/blob/main/docs/getting_started.ipynb) for more examples on how to query traces and spaces.
|
|
||||||
|
|
@ -1,262 +0,0 @@
|
||||||
# Tools
|
|
||||||
|
|
||||||
Tools are functions that can be invoked by an agent to perform tasks. They are organized into tool groups and registered with specific providers. Each tool group represents a collection of related tools from a single provider. They are organized into groups so that state can be externalized: the collection operates on the same state typically.
|
|
||||||
An example of this would be a "db_access" tool group that contains tools for interacting with a database. "list_tables", "query_table", "insert_row" could be examples of tools in this group.
|
|
||||||
|
|
||||||
Tools are treated as any other resource in llama stack like models. You can register them, have providers for them etc.
|
|
||||||
|
|
||||||
When instantiating an agent, you can provide it a list of tool groups that it has access to. Agent gets the corresponding tool definitions for the specified tool groups and passes them along to the model.
|
|
||||||
|
|
||||||
Refer to the [Building AI Applications](https://github.com/meta-llama/llama-stack/blob/main/docs/getting_started.ipynb) notebook for more examples on how to use tools.
|
|
||||||
|
|
||||||
## Server-side vs. client-side tool execution
|
|
||||||
|
|
||||||
Llama Stack allows you to use both server-side and client-side tools. With server-side tools, `agent.create_turn` can perform execution of the tool calls emitted by the model
|
|
||||||
transparently giving the user the final answer desired. If client-side tools are provided, the tool call is sent back to the user for execution
|
|
||||||
and optional continuation using the `agent.resume_turn` method.
|
|
||||||
|
|
||||||
|
|
||||||
### Server-side tools
|
|
||||||
|
|
||||||
Llama Stack provides built-in providers for some common tools. These include web search, math, and RAG capabilities.
|
|
||||||
|
|
||||||
#### Web Search
|
|
||||||
|
|
||||||
You have three providers to execute the web search tool calls generated by a model: Brave Search, Bing Search, and Tavily Search.
|
|
||||||
|
|
||||||
To indicate that the web search tool calls should be executed by brave-search, you can point the "builtin::websearch" toolgroup to the "brave-search" provider.
|
|
||||||
|
|
||||||
```python
|
|
||||||
client.toolgroups.register(
|
|
||||||
toolgroup_id="builtin::websearch",
|
|
||||||
provider_id="brave-search",
|
|
||||||
args={"max_results": 5},
|
|
||||||
)
|
|
||||||
```
|
|
||||||
|
|
||||||
The tool requires an API key which can be provided either in the configuration or through the request header `X-LlamaStack-Provider-Data`. The format of the header is:
|
|
||||||
```
|
|
||||||
{"<provider_name>_api_key": <your api key>}
|
|
||||||
```
|
|
||||||
|
|
||||||
|
|
||||||
#### Math
|
|
||||||
|
|
||||||
The WolframAlpha tool provides access to computational knowledge through the WolframAlpha API.
|
|
||||||
|
|
||||||
```python
|
|
||||||
client.toolgroups.register(
|
|
||||||
toolgroup_id="builtin::wolfram_alpha", provider_id="wolfram-alpha"
|
|
||||||
)
|
|
||||||
```
|
|
||||||
|
|
||||||
Example usage:
|
|
||||||
```python
|
|
||||||
result = client.tool_runtime.invoke_tool(
|
|
||||||
tool_name="wolfram_alpha", args={"query": "solve x^2 + 2x + 1 = 0"}
|
|
||||||
)
|
|
||||||
```
|
|
||||||
|
|
||||||
#### RAG
|
|
||||||
|
|
||||||
The RAG tool enables retrieval of context from various types of memory banks (vector, key-value, keyword, and graph).
|
|
||||||
|
|
||||||
```python
|
|
||||||
# Register Memory tool group
|
|
||||||
client.toolgroups.register(
|
|
||||||
toolgroup_id="builtin::rag",
|
|
||||||
provider_id="faiss",
|
|
||||||
args={"max_chunks": 5, "max_tokens_in_context": 4096},
|
|
||||||
)
|
|
||||||
```
|
|
||||||
|
|
||||||
Features:
|
|
||||||
- Support for multiple memory bank types
|
|
||||||
- Configurable query generation
|
|
||||||
- Context retrieval with token limits
|
|
||||||
|
|
||||||
|
|
||||||
> **Note:** By default, llama stack run.yaml defines toolgroups for web search, wolfram alpha and rag, that are provided by tavily-search, wolfram-alpha and rag providers.
|
|
||||||
|
|
||||||
## Model Context Protocol (MCP)
|
|
||||||
|
|
||||||
[MCP](https://github.com/modelcontextprotocol) is an upcoming, popular standard for tool discovery and execution. It is a protocol that allows tools to be dynamically discovered
|
|
||||||
from an MCP endpoint and can be used to extend the agent's capabilities.
|
|
||||||
|
|
||||||
|
|
||||||
### Using Remote MCP Servers
|
|
||||||
|
|
||||||
You can find some popular remote MCP servers [here](https://github.com/jaw9c/awesome-remote-mcp-servers). You can register them as toolgroups in the same way as local providers.
|
|
||||||
|
|
||||||
```python
|
|
||||||
client.toolgroups.register(
|
|
||||||
toolgroup_id="mcp::deepwiki",
|
|
||||||
provider_id="model-context-protocol",
|
|
||||||
mcp_endpoint=URL(uri="https://mcp.deepwiki.com/sse"),
|
|
||||||
)
|
|
||||||
```
|
|
||||||
|
|
||||||
Note that most of the more useful MCP servers need you to authenticate with them. Many of them use OAuth2.0 for authentication. You can provide authorization headers to send to the MCP server
|
|
||||||
using the "Provider Data" abstraction provided by Llama Stack. When making an agent call,
|
|
||||||
|
|
||||||
```python
|
|
||||||
agent = Agent(
|
|
||||||
...,
|
|
||||||
tools=["mcp::deepwiki"],
|
|
||||||
extra_headers={
|
|
||||||
"X-LlamaStack-Provider-Data": json.dumps(
|
|
||||||
{
|
|
||||||
"mcp_headers": {
|
|
||||||
"http://mcp.deepwiki.com/sse": {
|
|
||||||
"Authorization": "Bearer <your_access_token>",
|
|
||||||
},
|
|
||||||
},
|
|
||||||
}
|
|
||||||
),
|
|
||||||
},
|
|
||||||
)
|
|
||||||
agent.create_turn(...)
|
|
||||||
```
|
|
||||||
|
|
||||||
### Running your own MCP server
|
|
||||||
|
|
||||||
Here's an example of how to run a simple MCP server that exposes a File System as a set of tools to the Llama Stack agent.
|
|
||||||
|
|
||||||
```shell
|
|
||||||
# start your MCP server
|
|
||||||
mkdir /tmp/content
|
|
||||||
touch /tmp/content/foo
|
|
||||||
touch /tmp/content/bar
|
|
||||||
npx -y supergateway --port 8000 --stdio 'npx -y @modelcontextprotocol/server-filesystem /tmp/content'
|
|
||||||
```
|
|
||||||
|
|
||||||
Then register the MCP server as a tool group,
|
|
||||||
```python
|
|
||||||
client.toolgroups.register(
|
|
||||||
toolgroup_id="mcp::filesystem",
|
|
||||||
provider_id="model-context-protocol",
|
|
||||||
mcp_endpoint=URL(uri="http://localhost:8000/sse"),
|
|
||||||
)
|
|
||||||
```
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
## Adding Custom (Client-side) Tools
|
|
||||||
|
|
||||||
When you want to use tools other than the built-in tools, you just need to implement a python function with a docstring. The content of the docstring will be used to describe the tool and the parameters and passed
|
|
||||||
along to the generative model.
|
|
||||||
|
|
||||||
```python
|
|
||||||
# Example tool definition
|
|
||||||
def my_tool(input: int) -> int:
|
|
||||||
"""
|
|
||||||
Runs my awesome tool.
|
|
||||||
|
|
||||||
:param input: some int parameter
|
|
||||||
"""
|
|
||||||
return input * 2
|
|
||||||
```
|
|
||||||
> **NOTE:** We employ python docstrings to describe the tool and the parameters. It is important to document the tool and the parameters so that the model can use the tool correctly. It is recommended to experiment with different docstrings to see how they affect the model's behavior.
|
|
||||||
|
|
||||||
Once defined, simply pass the tool to the agent config. `Agent` will take care of the rest (calling the model with the tool definition, executing the tool, and returning the result to the model for the next iteration).
|
|
||||||
```python
|
|
||||||
# Example agent config with client provided tools
|
|
||||||
agent = Agent(client, ..., tools=[my_tool])
|
|
||||||
```
|
|
||||||
|
|
||||||
Refer to [llama-stack-apps](https://github.com/meta-llama/llama-stack-apps/blob/main/examples/agents/e2e_loop_with_client_tools.py) for an example of how to use client provided tools.
|
|
||||||
|
|
||||||
|
|
||||||
## Tool Invocation
|
|
||||||
|
|
||||||
Tools can be invoked using the `invoke_tool` method:
|
|
||||||
|
|
||||||
```python
|
|
||||||
result = client.tool_runtime.invoke_tool(
|
|
||||||
tool_name="web_search", kwargs={"query": "What is the capital of France?"}
|
|
||||||
)
|
|
||||||
```
|
|
||||||
|
|
||||||
The result contains:
|
|
||||||
- `content`: The tool's output
|
|
||||||
- `error_message`: Optional error message if the tool failed
|
|
||||||
- `error_code`: Optional error code if the tool failed
|
|
||||||
|
|
||||||
## Listing Available Tools
|
|
||||||
|
|
||||||
You can list all available tools or filter by tool group:
|
|
||||||
|
|
||||||
```python
|
|
||||||
# List all tools
|
|
||||||
all_tools = client.tools.list_tools()
|
|
||||||
|
|
||||||
# List tools in a specific group
|
|
||||||
group_tools = client.tools.list_tools(toolgroup_id="search_tools")
|
|
||||||
```
|
|
||||||
|
|
||||||
## Simple Example 2: Using an Agent with the Web Search Tool
|
|
||||||
1. Start by registering a Tavily API key at [Tavily](https://tavily.com/).
|
|
||||||
2. [Optional] Provide the API key directly to the Llama Stack server
|
|
||||||
```bash
|
|
||||||
export TAVILY_SEARCH_API_KEY="your key"
|
|
||||||
```
|
|
||||||
```bash
|
|
||||||
--env TAVILY_SEARCH_API_KEY=${TAVILY_SEARCH_API_KEY}
|
|
||||||
```
|
|
||||||
3. Run the following script.
|
|
||||||
```python
|
|
||||||
from llama_stack_client.lib.agents.agent import Agent
|
|
||||||
from llama_stack_client.types.agent_create_params import AgentConfig
|
|
||||||
from llama_stack_client.lib.agents.event_logger import EventLogger
|
|
||||||
from llama_stack_client import LlamaStackClient
|
|
||||||
|
|
||||||
client = LlamaStackClient(
|
|
||||||
base_url=f"http://localhost:8321",
|
|
||||||
provider_data={
|
|
||||||
"tavily_search_api_key": "your_TAVILY_SEARCH_API_KEY"
|
|
||||||
}, # Set this from the client side. No need to provide it if it has already been configured on the Llama Stack server.
|
|
||||||
)
|
|
||||||
|
|
||||||
agent = Agent(
|
|
||||||
client,
|
|
||||||
model="meta-llama/Llama-3.2-3B-Instruct",
|
|
||||||
instructions=(
|
|
||||||
"You are a web search assistant, must use websearch tool to look up the most current and precise information available. "
|
|
||||||
),
|
|
||||||
tools=["builtin::websearch"],
|
|
||||||
)
|
|
||||||
|
|
||||||
session_id = agent.create_session("websearch-session")
|
|
||||||
|
|
||||||
response = agent.create_turn(
|
|
||||||
messages=[
|
|
||||||
{"role": "user", "content": "How did the USA perform in the last Olympics?"}
|
|
||||||
],
|
|
||||||
session_id=session_id,
|
|
||||||
)
|
|
||||||
for log in EventLogger().log(response):
|
|
||||||
log.print()
|
|
||||||
```
|
|
||||||
|
|
||||||
## Simple Example3: Using an Agent with the WolframAlpha Tool
|
|
||||||
1. Start by registering for a WolframAlpha API key at [WolframAlpha Developer Portal](https://developer.wolframalpha.com/access).
|
|
||||||
2. Provide the API key either when starting the Llama Stack server:
|
|
||||||
```bash
|
|
||||||
--env WOLFRAM_ALPHA_API_KEY=${WOLFRAM_ALPHA_API_KEY}
|
|
||||||
```
|
|
||||||
or from the client side:
|
|
||||||
```python
|
|
||||||
client = LlamaStackClient(
|
|
||||||
base_url="http://localhost:8321",
|
|
||||||
provider_data={"wolfram_alpha_api_key": wolfram_api_key},
|
|
||||||
)
|
|
||||||
```
|
|
||||||
3. Configure the tools in the Agent by setting `tools=["builtin::wolfram_alpha"]`.
|
|
||||||
4. Example user query:
|
|
||||||
```python
|
|
||||||
response = agent.create_turn(
|
|
||||||
messages=[{"role": "user", "content": "Solve x^2 + 2x + 1 = 0 using WolframAlpha"}],
|
|
||||||
session_id=session_id,
|
|
||||||
)
|
|
||||||
```
|
|
||||||
```
|
|
||||||
|
|
@ -1,12 +0,0 @@
|
||||||
## API Providers
|
|
||||||
|
|
||||||
The goal of Llama Stack is to build an ecosystem where users can easily swap out different implementations for the same API. Examples for these include:
|
|
||||||
- LLM inference providers (e.g., Fireworks, Together, AWS Bedrock, Groq, Cerebras, SambaNova, vLLM, etc.),
|
|
||||||
- Vector databases (e.g., ChromaDB, Weaviate, Qdrant, Milvus, FAISS, PGVector, etc.),
|
|
||||||
- Safety providers (e.g., Meta's Llama Guard, AWS Bedrock Guardrails, etc.)
|
|
||||||
|
|
||||||
Providers come in two flavors:
|
|
||||||
- **Remote**: the provider runs as a separate service external to the Llama Stack codebase. Llama Stack contains a small amount of adapter code.
|
|
||||||
- **Inline**: the provider is fully specified and implemented within the Llama Stack codebase. It may be a simple wrapper around an existing library, or a full fledged implementation within Llama Stack.
|
|
||||||
|
|
||||||
Most importantly, Llama Stack always strives to provide at least one fully inline provider for each API so you can iterate on a fully featured environment locally.
|
|
||||||
|
|
@ -1,18 +0,0 @@
|
||||||
## APIs
|
|
||||||
|
|
||||||
A Llama Stack API is described as a collection of REST endpoints. We currently support the following APIs:
|
|
||||||
|
|
||||||
- **Inference**: run inference with a LLM
|
|
||||||
- **Safety**: apply safety policies to the output at a Systems (not only model) level
|
|
||||||
- **Agents**: run multi-step agentic workflows with LLMs with tool usage, memory (RAG), etc.
|
|
||||||
- **DatasetIO**: interface with datasets and data loaders
|
|
||||||
- **Scoring**: evaluate outputs of the system
|
|
||||||
- **Eval**: generate outputs (via Inference or Agents) and perform scoring
|
|
||||||
- **VectorIO**: perform operations on vector stores, such as adding documents, searching, and deleting documents
|
|
||||||
- **Telemetry**: collect telemetry data from the system
|
|
||||||
|
|
||||||
We are working on adding a few more APIs to complete the application lifecycle. These will include:
|
|
||||||
- **Batch Inference**: run inference on a dataset of inputs
|
|
||||||
- **Batch Agents**: run agents on a dataset of inputs
|
|
||||||
- **Post Training**: fine-tune a Llama model
|
|
||||||
- **Synthetic Data Generation**: generate synthetic data for model development
|
|
||||||
|
|
@ -1,9 +0,0 @@
|
||||||
## Distributions
|
|
||||||
|
|
||||||
While there is a lot of flexibility to mix-and-match providers, often users will work with a specific set of providers (hardware support, contractual obligations, etc.) We therefore need to provide a _convenient shorthand_ for such collections. We call this shorthand a **Llama Stack Distribution** or a **Distro**. One can think of it as specific pre-packaged versions of the Llama Stack. Here are some examples:
|
|
||||||
|
|
||||||
**Remotely Hosted Distro**: These are the simplest to consume from a user perspective. You can simply obtain the API key for these providers, point to a URL and have _all_ Llama Stack APIs working out of the box. Currently, [Fireworks](https://fireworks.ai/) and [Together](https://together.xyz/) provide such easy-to-consume Llama Stack distributions.
|
|
||||||
|
|
||||||
**Locally Hosted Distro**: You may want to run Llama Stack on your own hardware. Typically though, you still need to use Inference via an external service. You can use providers like HuggingFace TGI, Fireworks, Together, etc. for this purpose. Or you may have access to GPUs and can run a [vLLM](https://github.com/vllm-project/vllm) or [NVIDIA NIM](https://build.nvidia.com/nim?filters=nimType%3Anim_type_run_anywhere&q=llama) instance. If you "just" have a regular desktop machine, you can use [Ollama](https://ollama.com/) for inference. To provide convenient quick access to these options, we provide a number of such pre-configured locally-hosted Distros.
|
|
||||||
|
|
||||||
**On-device Distro**: To run Llama Stack directly on an edge device (mobile phone or a tablet), we provide Distros for [iOS](https://llama-stack.readthedocs.io/en/latest/distributions/ondevice_distro/ios_sdk.html) and [Android](https://llama-stack.readthedocs.io/en/latest/distributions/ondevice_distro/android_sdk.html)
|
|
||||||
|
|
@ -1,77 +0,0 @@
|
||||||
## Evaluation Concepts
|
|
||||||
|
|
||||||
The Llama Stack Evaluation flow allows you to run evaluations on your GenAI application datasets or pre-registered benchmarks.
|
|
||||||
|
|
||||||
We introduce a set of APIs in Llama Stack for supporting running evaluations of LLM applications.
|
|
||||||
- `/datasetio` + `/datasets` API
|
|
||||||
- `/scoring` + `/scoring_functions` API
|
|
||||||
- `/eval` + `/benchmarks` API
|
|
||||||
|
|
||||||
This guide goes over the sets of APIs and developer experience flow of using Llama Stack to run evaluations for different use cases. Checkout our Colab notebook on working examples with evaluations [here](https://colab.research.google.com/drive/10CHyykee9j2OigaIcRv47BKG9mrNm0tJ?usp=sharing).
|
|
||||||
|
|
||||||
|
|
||||||
The Evaluation APIs are associated with a set of Resources. Please visit the Resources section in our [Core Concepts](../concepts/index.md) guide for better high-level understanding.
|
|
||||||
|
|
||||||
- **DatasetIO**: defines interface with datasets and data loaders.
|
|
||||||
- Associated with `Dataset` resource.
|
|
||||||
- **Scoring**: evaluate outputs of the system.
|
|
||||||
- Associated with `ScoringFunction` resource. We provide a suite of out-of-the box scoring functions and also the ability for you to add custom evaluators. These scoring functions are the core part of defining an evaluation task to output evaluation metrics.
|
|
||||||
- **Eval**: generate outputs (via Inference or Agents) and perform scoring.
|
|
||||||
- Associated with `Benchmark` resource.
|
|
||||||
|
|
||||||
|
|
||||||
### Open-benchmark Eval
|
|
||||||
|
|
||||||
#### List of open-benchmarks Llama Stack support
|
|
||||||
|
|
||||||
Llama stack pre-registers several popular open-benchmarks to easily evaluate model perfomance via CLI.
|
|
||||||
|
|
||||||
The list of open-benchmarks we currently support:
|
|
||||||
- [MMLU-COT](https://arxiv.org/abs/2009.03300) (Measuring Massive Multitask Language Understanding): Benchmark designed to comprehensively evaluate the breadth and depth of a model's academic and professional understanding
|
|
||||||
- [GPQA-COT](https://arxiv.org/abs/2311.12022) (A Graduate-Level Google-Proof Q&A Benchmark): A challenging benchmark of 448 multiple-choice questions written by domain experts in biology, physics, and chemistry.
|
|
||||||
- [SimpleQA](https://openai.com/index/introducing-simpleqa/): Benchmark designed to access models to answer short, fact-seeking questions.
|
|
||||||
- [MMMU](https://arxiv.org/abs/2311.16502) (A Massive Multi-discipline Multimodal Understanding and Reasoning Benchmark for Expert AGI)]: Benchmark designed to evaluate multimodal models.
|
|
||||||
|
|
||||||
|
|
||||||
You can follow this [contributing guide](https://llama-stack.readthedocs.io/en/latest/references/evals_reference/index.html#open-benchmark-contributing-guide) to add more open-benchmarks to Llama Stack
|
|
||||||
|
|
||||||
#### Run evaluation on open-benchmarks via CLI
|
|
||||||
|
|
||||||
We have built-in functionality to run the supported open-benckmarks using llama-stack-client CLI
|
|
||||||
|
|
||||||
#### Spin up Llama Stack server
|
|
||||||
|
|
||||||
Spin up llama stack server with 'open-benchmark' template
|
|
||||||
```
|
|
||||||
llama stack run llama_stack/templates/open-benchmark/run.yaml
|
|
||||||
|
|
||||||
```
|
|
||||||
|
|
||||||
#### Run eval CLI
|
|
||||||
There are 3 necessary inputs to run a benchmark eval
|
|
||||||
- `list of benchmark_ids`: The list of benchmark ids to run evaluation on
|
|
||||||
- `model-id`: The model id to evaluate on
|
|
||||||
- `output_dir`: Path to store the evaluate results
|
|
||||||
```
|
|
||||||
llama-stack-client eval run-benchmark <benchmark_id_1> <benchmark_id_2> ... \
|
|
||||||
--model_id <model id to evaluate on> \
|
|
||||||
--output_dir <directory to store the evaluate results> \
|
|
||||||
```
|
|
||||||
|
|
||||||
You can run
|
|
||||||
```
|
|
||||||
llama-stack-client eval run-benchmark help
|
|
||||||
```
|
|
||||||
to see the description of all the flags that eval run-benchmark has
|
|
||||||
|
|
||||||
|
|
||||||
In the output log, you can find the file path that has your evaluation results. Open that file and you can see you aggregate
|
|
||||||
evaluation results over there.
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
#### What's Next?
|
|
||||||
|
|
||||||
- Check out our Colab notebook on working examples with running benchmark evaluations [here](https://colab.research.google.com/github/meta-llama/llama-stack/blob/main/docs/notebooks/Llama_Stack_Benchmark_Evals.ipynb#scrollTo=mxLCsP4MvFqP).
|
|
||||||
- Check out our [Building Applications - Evaluation](../building_applications/evals.md) guide for more details on how to use the Evaluation APIs to evaluate your applications.
|
|
||||||
- Check out our [Evaluation Reference](../references/evals_reference/index.md) for more details on the APIs.
|
|
||||||
|
|
@ -1,23 +0,0 @@
|
||||||
# Core Concepts
|
|
||||||
|
|
||||||
Given Llama Stack's service-oriented philosophy, a few concepts and workflows arise which may not feel completely natural in the LLM landscape, especially if you are coming with a background in other frameworks.
|
|
||||||
|
|
||||||
```{include} apis.md
|
|
||||||
:start-after: ## APIs
|
|
||||||
```
|
|
||||||
|
|
||||||
```{include} api_providers.md
|
|
||||||
:start-after: ## API Providers
|
|
||||||
```
|
|
||||||
|
|
||||||
```{include} resources.md
|
|
||||||
:start-after: ## Resources
|
|
||||||
```
|
|
||||||
|
|
||||||
```{include} distributions.md
|
|
||||||
:start-after: ## Distributions
|
|
||||||
```
|
|
||||||
|
|
||||||
```{include} evaluation_concepts.md
|
|
||||||
:start-after: ## Evaluation Concepts
|
|
||||||
```
|
|
||||||
|
|
@ -1,19 +0,0 @@
|
||||||
## Resources
|
|
||||||
|
|
||||||
Some of these APIs are associated with a set of **Resources**. Here is the mapping of APIs to resources:
|
|
||||||
|
|
||||||
- **Inference**, **Eval** and **Post Training** are associated with `Model` resources.
|
|
||||||
- **Safety** is associated with `Shield` resources.
|
|
||||||
- **Tool Runtime** is associated with `ToolGroup` resources.
|
|
||||||
- **DatasetIO** is associated with `Dataset` resources.
|
|
||||||
- **VectorIO** is associated with `VectorDB` resources.
|
|
||||||
- **Scoring** is associated with `ScoringFunction` resources.
|
|
||||||
- **Eval** is associated with `Model` and `Benchmark` resources.
|
|
||||||
|
|
||||||
Furthermore, we allow these resources to be **federated** across multiple providers. For example, you may have some Llama models served by Fireworks while others are served by AWS Bedrock. Regardless, they will all work seamlessly with the same uniform Inference API provided by Llama Stack.
|
|
||||||
|
|
||||||
```{admonition} Registering Resources
|
|
||||||
:class: tip
|
|
||||||
|
|
||||||
Given this architecture, it is necessary for the Stack to know which provider to use for a given resource. This means you need to explicitly _register_ resources (including models) before you can use them with the associated APIs.
|
|
||||||
```
|
|
||||||
|
|
@ -1,143 +0,0 @@
|
||||||
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
||||||
# All rights reserved.
|
|
||||||
#
|
|
||||||
# This source code is licensed under the terms described in the LICENSE file in
|
|
||||||
# the root directory of this source tree.
|
|
||||||
|
|
||||||
# Configuration file for the Sphinx documentation builder.
|
|
||||||
#
|
|
||||||
# For the full list of built-in configuration values, see the documentation:
|
|
||||||
# https://www.sphinx-doc.org/en/master/usage/configuration.html
|
|
||||||
|
|
||||||
# -- Project information -----------------------------------------------------
|
|
||||||
# https://www.sphinx-doc.org/en/master/usage/configuration.html#project-information
|
|
||||||
|
|
||||||
import json
|
|
||||||
from datetime import datetime
|
|
||||||
from pathlib import Path
|
|
||||||
|
|
||||||
import requests
|
|
||||||
from docutils import nodes
|
|
||||||
|
|
||||||
# Read version from pyproject.toml
|
|
||||||
with Path(__file__).parent.parent.parent.joinpath("pyproject.toml").open("rb") as f:
|
|
||||||
pypi_url = "https://pypi.org/pypi/llama-stack/json"
|
|
||||||
headers = {
|
|
||||||
'User-Agent': 'pip/23.0.1 (python 3.11)', # Mimic pip's user agent
|
|
||||||
'Accept': 'application/json'
|
|
||||||
}
|
|
||||||
version_tag = json.loads(requests.get(pypi_url, headers=headers).text)["info"]["version"]
|
|
||||||
print(f"{version_tag=}")
|
|
||||||
|
|
||||||
# generate the full link including text and url here
|
|
||||||
llama_stack_version_url = (
|
|
||||||
f"https://github.com/meta-llama/llama-stack/releases/tag/v{version_tag}"
|
|
||||||
)
|
|
||||||
llama_stack_version_link = f"<a href='{llama_stack_version_url}'>release notes</a>"
|
|
||||||
|
|
||||||
project = "llama-stack"
|
|
||||||
copyright = f"{datetime.now().year}, Meta"
|
|
||||||
author = "Meta"
|
|
||||||
|
|
||||||
# -- General configuration ---------------------------------------------------
|
|
||||||
# https://www.sphinx-doc.org/en/master/usage/configuration.html#general-configuration
|
|
||||||
|
|
||||||
extensions = [
|
|
||||||
"myst_parser",
|
|
||||||
"sphinx_copybutton",
|
|
||||||
"sphinx_design",
|
|
||||||
"sphinx_rtd_theme",
|
|
||||||
"sphinx_rtd_dark_mode",
|
|
||||||
"sphinx_tabs.tabs",
|
|
||||||
"sphinxcontrib.redoc",
|
|
||||||
"sphinxcontrib.mermaid",
|
|
||||||
"sphinxcontrib.video",
|
|
||||||
]
|
|
||||||
myst_enable_extensions = ["colon_fence"]
|
|
||||||
|
|
||||||
html_theme = "sphinx_rtd_theme"
|
|
||||||
html_use_relative_paths = True
|
|
||||||
templates_path = ["_templates"]
|
|
||||||
exclude_patterns = ["_build", "Thumbs.db", ".DS_Store"]
|
|
||||||
|
|
||||||
myst_enable_extensions = [
|
|
||||||
"amsmath",
|
|
||||||
"attrs_inline",
|
|
||||||
"attrs_block",
|
|
||||||
"colon_fence",
|
|
||||||
"deflist",
|
|
||||||
"dollarmath",
|
|
||||||
"fieldlist",
|
|
||||||
"html_admonition",
|
|
||||||
"html_image",
|
|
||||||
# "linkify",
|
|
||||||
"replacements",
|
|
||||||
"smartquotes",
|
|
||||||
"strikethrough",
|
|
||||||
"substitution",
|
|
||||||
"tasklist",
|
|
||||||
]
|
|
||||||
|
|
||||||
myst_substitutions = {
|
|
||||||
"docker_hub": "https://hub.docker.com/repository/docker/llamastack",
|
|
||||||
"llama_stack_version": version_tag,
|
|
||||||
"llama_stack_version_link": llama_stack_version_link,
|
|
||||||
}
|
|
||||||
|
|
||||||
suppress_warnings = ["myst.header"]
|
|
||||||
|
|
||||||
# Copy button settings
|
|
||||||
copybutton_prompt_text = "$ " # for bash prompts
|
|
||||||
copybutton_prompt_is_regexp = True
|
|
||||||
copybutton_remove_prompts = True
|
|
||||||
copybutton_line_continuation_character = "\\"
|
|
||||||
|
|
||||||
# Source suffix
|
|
||||||
source_suffix = {
|
|
||||||
".rst": "restructuredtext",
|
|
||||||
".md": "markdown",
|
|
||||||
}
|
|
||||||
|
|
||||||
# -- Options for HTML output -------------------------------------------------
|
|
||||||
# https://www.sphinx-doc.org/en/master/usage/configuration.html#options-for-html-output
|
|
||||||
|
|
||||||
# html_theme = "alabaster"
|
|
||||||
html_theme_options = {
|
|
||||||
"canonical_url": "https://github.com/meta-llama/llama-stack",
|
|
||||||
"collapse_navigation": False,
|
|
||||||
# "style_nav_header_background": "#c3c9d4",
|
|
||||||
'display_version': True,
|
|
||||||
'version_selector': True,
|
|
||||||
}
|
|
||||||
|
|
||||||
default_dark_mode = False
|
|
||||||
|
|
||||||
html_static_path = ["../_static"]
|
|
||||||
# html_logo = "../_static/llama-stack-logo.png"
|
|
||||||
# html_style = "../_static/css/my_theme.css"
|
|
||||||
|
|
||||||
|
|
||||||
def setup(app):
|
|
||||||
app.add_css_file("css/my_theme.css")
|
|
||||||
app.add_js_file("js/detect_theme.js")
|
|
||||||
|
|
||||||
def dockerhub_role(name, rawtext, text, lineno, inliner, options={}, content=[]):
|
|
||||||
url = f"https://hub.docker.com/r/llamastack/{text}"
|
|
||||||
node = nodes.reference(rawtext, text, refuri=url, **options)
|
|
||||||
return [node], []
|
|
||||||
|
|
||||||
def repopath_role(name, rawtext, text, lineno, inliner, options={}, content=[]):
|
|
||||||
parts = text.split("::")
|
|
||||||
if len(parts) == 2:
|
|
||||||
link_text = parts[0]
|
|
||||||
url_path = parts[1]
|
|
||||||
else:
|
|
||||||
link_text = text
|
|
||||||
url_path = text
|
|
||||||
|
|
||||||
url = f"https://github.com/meta-llama/llama-stack/tree/main/{url_path}"
|
|
||||||
node = nodes.reference(rawtext, link_text, refuri=url, **options)
|
|
||||||
return [node], []
|
|
||||||
|
|
||||||
app.add_role("dockerhub", dockerhub_role)
|
|
||||||
app.add_role("repopath", repopath_role)
|
|
||||||
|
|
@ -1,14 +0,0 @@
|
||||||
|
|
||||||
```{include} ../../../CONTRIBUTING.md
|
|
||||||
```
|
|
||||||
|
|
||||||
See the [Adding a New API Provider](new_api_provider.md) which describes how to add new API providers to the Stack.
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
```{toctree}
|
|
||||||
:maxdepth: 1
|
|
||||||
:hidden:
|
|
||||||
|
|
||||||
new_api_provider
|
|
||||||
```
|
|
||||||
|
|
@ -1,48 +0,0 @@
|
||||||
# Adding a New API Provider
|
|
||||||
|
|
||||||
This guide will walk you through the process of adding a new API provider to Llama Stack.
|
|
||||||
|
|
||||||
|
|
||||||
- Begin by reviewing the [core concepts](../concepts/index.md) of Llama Stack and choose the API your provider belongs to (Inference, Safety, VectorIO, etc.)
|
|
||||||
- Determine the provider type ({repopath}`Remote::llama_stack/providers/remote` or {repopath}`Inline::llama_stack/providers/inline`). Remote providers make requests to external services, while inline providers execute implementation locally.
|
|
||||||
- Add your provider to the appropriate {repopath}`Registry::llama_stack/providers/registry/`. Specify pip dependencies necessary.
|
|
||||||
- Update any distribution {repopath}`Templates::llama_stack/templates/` `build.yaml` and `run.yaml` files if they should include your provider by default. Run {repopath}`./scripts/distro_codegen.py` if necessary. Note that `distro_codegen.py` will fail if the new provider causes any distribution template to attempt to import provider-specific dependencies. This usually means the distribution's `get_distribution_template()` code path should only import any necessary Config or model alias definitions from each provider and not the provider's actual implementation.
|
|
||||||
|
|
||||||
|
|
||||||
Here are some example PRs to help you get started:
|
|
||||||
- [Grok Inference Implementation](https://github.com/meta-llama/llama-stack/pull/609)
|
|
||||||
- [Nvidia Inference Implementation](https://github.com/meta-llama/llama-stack/pull/355)
|
|
||||||
- [Model context protocol Tool Runtime](https://github.com/meta-llama/llama-stack/pull/665)
|
|
||||||
|
|
||||||
|
|
||||||
## Testing the Provider
|
|
||||||
|
|
||||||
Before running tests, you must have required dependencies installed. This depends on the providers or distributions you are testing. For example, if you are testing the `together` distribution, you should install dependencies via `llama stack build --template together`.
|
|
||||||
|
|
||||||
### 1. Integration Testing
|
|
||||||
|
|
||||||
Integration tests are located in {repopath}`tests/integration`. These tests use the python client-SDK APIs (from the `llama_stack_client` package) to test functionality. Since these tests use client APIs, they can be run either by pointing to an instance of the Llama Stack server or "inline" by using `LlamaStackAsLibraryClient`.
|
|
||||||
|
|
||||||
Consult {repopath}`tests/integration/README.md` for more details on how to run the tests.
|
|
||||||
|
|
||||||
Note that each provider's `sample_run_config()` method (in the configuration class for that provider)
|
|
||||||
typically references some environment variables for specifying API keys and the like. You can set these in the environment or pass these via the `--env` flag to the test command.
|
|
||||||
|
|
||||||
|
|
||||||
### 2. Unit Testing
|
|
||||||
|
|
||||||
Unit tests are located in {repopath}`tests/unit`. Provider-specific unit tests are located in {repopath}`tests/unit/providers`. These tests are all run automatically as part of the CI process.
|
|
||||||
|
|
||||||
Consult {repopath}`tests/unit/README.md` for more details on how to run the tests manually.
|
|
||||||
|
|
||||||
### 3. Additional end-to-end testing
|
|
||||||
|
|
||||||
1. Start a Llama Stack server with your new provider
|
|
||||||
2. Verify compatibility with existing client scripts in the [llama-stack-apps](https://github.com/meta-llama/llama-stack-apps/tree/main) repository
|
|
||||||
3. Document which scripts are compatible with your provider
|
|
||||||
|
|
||||||
## Submitting Your PR
|
|
||||||
|
|
||||||
1. Ensure all tests pass
|
|
||||||
2. Include a comprehensive test plan in your PR summary
|
|
||||||
3. Document any known limitations or considerations
|
|
||||||
|
|
@ -1,6 +0,0 @@
|
||||||
# Testing Llama Stack
|
|
||||||
|
|
||||||
Tests are of three different kinds:
|
|
||||||
- Unit tests
|
|
||||||
- Provider focused integration tests
|
|
||||||
- Client SDK tests
|
|
||||||
|
|
@ -1,420 +0,0 @@
|
||||||
# Build your own Distribution
|
|
||||||
|
|
||||||
|
|
||||||
This guide will walk you through the steps to get started with building a Llama Stack distribution from scratch with your choice of API providers.
|
|
||||||
|
|
||||||
|
|
||||||
### Setting your log level
|
|
||||||
|
|
||||||
In order to specify the proper logging level users can apply the following environment variable `LLAMA_STACK_LOGGING` with the following format:
|
|
||||||
|
|
||||||
`LLAMA_STACK_LOGGING=server=debug;core=info`
|
|
||||||
|
|
||||||
Where each category in the following list:
|
|
||||||
|
|
||||||
- all
|
|
||||||
- core
|
|
||||||
- server
|
|
||||||
- router
|
|
||||||
- inference
|
|
||||||
- agents
|
|
||||||
- safety
|
|
||||||
- eval
|
|
||||||
- tools
|
|
||||||
- client
|
|
||||||
|
|
||||||
Can be set to any of the following log levels:
|
|
||||||
|
|
||||||
- debug
|
|
||||||
- info
|
|
||||||
- warning
|
|
||||||
- error
|
|
||||||
- critical
|
|
||||||
|
|
||||||
The default global log level is `info`. `all` sets the log level for all components.
|
|
||||||
|
|
||||||
A user can also set `LLAMA_STACK_LOG_FILE` which will pipe the logs to the specified path as well as to the terminal. An example would be: `export LLAMA_STACK_LOG_FILE=server.log`
|
|
||||||
|
|
||||||
### Llama Stack Build
|
|
||||||
|
|
||||||
In order to build your own distribution, we recommend you clone the `llama-stack` repository.
|
|
||||||
|
|
||||||
|
|
||||||
```
|
|
||||||
git clone git@github.com:meta-llama/llama-stack.git
|
|
||||||
cd llama-stack
|
|
||||||
pip install -e .
|
|
||||||
```
|
|
||||||
Use the CLI to build your distribution.
|
|
||||||
The main points to consider are:
|
|
||||||
1. **Image Type** - Do you want a Conda / venv environment or a Container (eg. Docker)
|
|
||||||
2. **Template** - Do you want to use a template to build your distribution? or start from scratch ?
|
|
||||||
3. **Config** - Do you want to use a pre-existing config file to build your distribution?
|
|
||||||
|
|
||||||
```
|
|
||||||
llama stack build -h
|
|
||||||
usage: llama stack build [-h] [--config CONFIG] [--template TEMPLATE] [--list-templates] [--image-type {conda,container,venv}] [--image-name IMAGE_NAME] [--print-deps-only] [--run]
|
|
||||||
|
|
||||||
Build a Llama stack container
|
|
||||||
|
|
||||||
options:
|
|
||||||
-h, --help show this help message and exit
|
|
||||||
--config CONFIG Path to a config file to use for the build. You can find example configs in llama_stack/distributions/**/build.yaml. If this argument is not provided, you will
|
|
||||||
be prompted to enter information interactively (default: None)
|
|
||||||
--template TEMPLATE Name of the example template config to use for build. You may use `llama stack build --list-templates` to check out the available templates (default: None)
|
|
||||||
--list-templates Show the available templates for building a Llama Stack distribution (default: False)
|
|
||||||
--image-type {conda,container,venv}
|
|
||||||
Image Type to use for the build. This can be either conda or container or venv. If not specified, will use the image type from the template config. (default:
|
|
||||||
conda)
|
|
||||||
--image-name IMAGE_NAME
|
|
||||||
[for image-type=conda|container|venv] Name of the conda or virtual environment to use for the build. If not specified, currently active Conda environment will be used if
|
|
||||||
found. (default: None)
|
|
||||||
--print-deps-only Print the dependencies for the stack only, without building the stack (default: False)
|
|
||||||
--run Run the stack after building using the same image type, name, and other applicable arguments (default: False)
|
|
||||||
|
|
||||||
```
|
|
||||||
|
|
||||||
After this step is complete, a file named `<name>-build.yaml` and template file `<name>-run.yaml` will be generated and saved at the output file path specified at the end of the command.
|
|
||||||
|
|
||||||
::::{tab-set}
|
|
||||||
:::{tab-item} Building from a template
|
|
||||||
To build from alternative API providers, we provide distribution templates for users to get started building a distribution backed by different providers.
|
|
||||||
|
|
||||||
The following command will allow you to see the available templates and their corresponding providers.
|
|
||||||
```
|
|
||||||
llama stack build --list-templates
|
|
||||||
```
|
|
||||||
|
|
||||||
```
|
|
||||||
------------------------------+-----------------------------------------------------------------------------+
|
|
||||||
| Template Name | Description |
|
|
||||||
+------------------------------+-----------------------------------------------------------------------------+
|
|
||||||
| hf-serverless | Use (an external) Hugging Face Inference Endpoint for running LLM inference |
|
|
||||||
+------------------------------+-----------------------------------------------------------------------------+
|
|
||||||
| together | Use Together.AI for running LLM inference |
|
|
||||||
+------------------------------+-----------------------------------------------------------------------------+
|
|
||||||
| vllm-gpu | Use a built-in vLLM engine for running LLM inference |
|
|
||||||
+------------------------------+-----------------------------------------------------------------------------+
|
|
||||||
| experimental-post-training | Experimental template for post training |
|
|
||||||
+------------------------------+-----------------------------------------------------------------------------+
|
|
||||||
| remote-vllm | Use (an external) vLLM server for running LLM inference |
|
|
||||||
+------------------------------+-----------------------------------------------------------------------------+
|
|
||||||
| fireworks | Use Fireworks.AI for running LLM inference |
|
|
||||||
+------------------------------+-----------------------------------------------------------------------------+
|
|
||||||
| tgi | Use (an external) TGI server for running LLM inference |
|
|
||||||
+------------------------------+-----------------------------------------------------------------------------+
|
|
||||||
| bedrock | Use AWS Bedrock for running LLM inference and safety |
|
|
||||||
+------------------------------+-----------------------------------------------------------------------------+
|
|
||||||
| meta-reference-gpu | Use Meta Reference for running LLM inference |
|
|
||||||
+------------------------------+-----------------------------------------------------------------------------+
|
|
||||||
| nvidia | Use NVIDIA NIM for running LLM inference |
|
|
||||||
+------------------------------+-----------------------------------------------------------------------------+
|
|
||||||
| cerebras | Use Cerebras for running LLM inference |
|
|
||||||
+------------------------------+-----------------------------------------------------------------------------+
|
|
||||||
| ollama | Use (an external) Ollama server for running LLM inference |
|
|
||||||
+------------------------------+-----------------------------------------------------------------------------+
|
|
||||||
| hf-endpoint | Use (an external) Hugging Face Inference Endpoint for running LLM inference |
|
|
||||||
+------------------------------+-----------------------------------------------------------------------------+
|
|
||||||
```
|
|
||||||
|
|
||||||
You may then pick a template to build your distribution with providers fitted to your liking.
|
|
||||||
|
|
||||||
For example, to build a distribution with TGI as the inference provider, you can run:
|
|
||||||
```
|
|
||||||
$ llama stack build --template tgi
|
|
||||||
...
|
|
||||||
You can now edit ~/.llama/distributions/llamastack-tgi/tgi-run.yaml and run `llama stack run ~/.llama/distributions/llamastack-tgi/tgi-run.yaml`
|
|
||||||
```
|
|
||||||
:::
|
|
||||||
:::{tab-item} Building from Scratch
|
|
||||||
|
|
||||||
If the provided templates do not fit your use case, you could start off with running `llama stack build` which will allow you to a interactively enter wizard where you will be prompted to enter build configurations.
|
|
||||||
|
|
||||||
It would be best to start with a template and understand the structure of the config file and the various concepts ( APIS, providers, resources, etc.) before starting from scratch.
|
|
||||||
```
|
|
||||||
llama stack build
|
|
||||||
|
|
||||||
> Enter a name for your Llama Stack (e.g. my-local-stack): my-stack
|
|
||||||
> Enter the image type you want your Llama Stack to be built as (container or conda or venv): conda
|
|
||||||
|
|
||||||
Llama Stack is composed of several APIs working together. Let's select
|
|
||||||
the provider types (implementations) you want to use for these APIs.
|
|
||||||
|
|
||||||
Tip: use <TAB> to see options for the providers.
|
|
||||||
|
|
||||||
> Enter provider for API inference: inline::meta-reference
|
|
||||||
> Enter provider for API safety: inline::llama-guard
|
|
||||||
> Enter provider for API agents: inline::meta-reference
|
|
||||||
> Enter provider for API memory: inline::faiss
|
|
||||||
> Enter provider for API datasetio: inline::meta-reference
|
|
||||||
> Enter provider for API scoring: inline::meta-reference
|
|
||||||
> Enter provider for API eval: inline::meta-reference
|
|
||||||
> Enter provider for API telemetry: inline::meta-reference
|
|
||||||
|
|
||||||
> (Optional) Enter a short description for your Llama Stack:
|
|
||||||
|
|
||||||
You can now edit ~/.llama/distributions/llamastack-my-local-stack/my-local-stack-run.yaml and run `llama stack run ~/.llama/distributions/llamastack-my-local-stack/my-local-stack-run.yaml`
|
|
||||||
```
|
|
||||||
:::
|
|
||||||
|
|
||||||
:::{tab-item} Building from a pre-existing build config file
|
|
||||||
- In addition to templates, you may customize the build to your liking through editing config files and build from config files with the following command.
|
|
||||||
|
|
||||||
- The config file will be of contents like the ones in `llama_stack/templates/*build.yaml`.
|
|
||||||
|
|
||||||
```
|
|
||||||
$ cat llama_stack/templates/ollama/build.yaml
|
|
||||||
|
|
||||||
name: ollama
|
|
||||||
distribution_spec:
|
|
||||||
description: Like local, but use ollama for running LLM inference
|
|
||||||
providers:
|
|
||||||
inference: remote::ollama
|
|
||||||
memory: inline::faiss
|
|
||||||
safety: inline::llama-guard
|
|
||||||
agents: inline::meta-reference
|
|
||||||
telemetry: inline::meta-reference
|
|
||||||
image_name: ollama
|
|
||||||
image_type: conda
|
|
||||||
|
|
||||||
# If some providers are external, you can specify the path to the implementation
|
|
||||||
external_providers_dir: ~/.llama/providers.d
|
|
||||||
```
|
|
||||||
|
|
||||||
```
|
|
||||||
llama stack build --config llama_stack/templates/ollama/build.yaml
|
|
||||||
```
|
|
||||||
:::
|
|
||||||
|
|
||||||
:::{tab-item} Building with External Providers
|
|
||||||
|
|
||||||
Llama Stack supports external providers that live outside of the main codebase. This allows you to create and maintain your own providers independently or use community-provided providers.
|
|
||||||
|
|
||||||
To build a distribution with external providers, you need to:
|
|
||||||
|
|
||||||
1. Configure the `external_providers_dir` in your build configuration file:
|
|
||||||
|
|
||||||
```yaml
|
|
||||||
# Example my-external-stack.yaml with external providers
|
|
||||||
version: '2'
|
|
||||||
distribution_spec:
|
|
||||||
description: Custom distro for CI tests
|
|
||||||
providers:
|
|
||||||
inference:
|
|
||||||
- remote::custom_ollama
|
|
||||||
# Add more providers as needed
|
|
||||||
image_type: container
|
|
||||||
image_name: ci-test
|
|
||||||
# Path to external provider implementations
|
|
||||||
external_providers_dir: ~/.llama/providers.d
|
|
||||||
```
|
|
||||||
|
|
||||||
Here's an example for a custom Ollama provider:
|
|
||||||
|
|
||||||
```yaml
|
|
||||||
adapter:
|
|
||||||
adapter_type: custom_ollama
|
|
||||||
pip_packages:
|
|
||||||
- ollama
|
|
||||||
- aiohttp
|
|
||||||
- llama-stack-provider-ollama # This is the provider package
|
|
||||||
config_class: llama_stack_ollama_provider.config.OllamaImplConfig
|
|
||||||
module: llama_stack_ollama_provider
|
|
||||||
api_dependencies: []
|
|
||||||
optional_api_dependencies: []
|
|
||||||
```
|
|
||||||
|
|
||||||
The `pip_packages` section lists the Python packages required by the provider, as well as the
|
|
||||||
provider package itself. The package must be available on PyPI or can be provided from a local
|
|
||||||
directory or a git repository (git must be installed on the build environment).
|
|
||||||
|
|
||||||
2. Build your distribution using the config file:
|
|
||||||
|
|
||||||
```
|
|
||||||
llama stack build --config my-external-stack.yaml
|
|
||||||
```
|
|
||||||
|
|
||||||
For more information on external providers, including directory structure, provider types, and implementation requirements, see the [External Providers documentation](../providers/external.md).
|
|
||||||
:::
|
|
||||||
|
|
||||||
:::{tab-item} Building Container
|
|
||||||
|
|
||||||
```{admonition} Podman Alternative
|
|
||||||
:class: tip
|
|
||||||
|
|
||||||
Podman is supported as an alternative to Docker. Set `CONTAINER_BINARY` to `podman` in your environment to use Podman.
|
|
||||||
```
|
|
||||||
|
|
||||||
To build a container image, you may start off from a template and use the `--image-type container` flag to specify `container` as the build image type.
|
|
||||||
|
|
||||||
```
|
|
||||||
llama stack build --template ollama --image-type container
|
|
||||||
```
|
|
||||||
|
|
||||||
```
|
|
||||||
$ llama stack build --template ollama --image-type container
|
|
||||||
...
|
|
||||||
Containerfile created successfully in /tmp/tmp.viA3a3Rdsg/ContainerfileFROM python:3.10-slim
|
|
||||||
...
|
|
||||||
|
|
||||||
You can now edit ~/meta-llama/llama-stack/tmp/configs/ollama-run.yaml and run `llama stack run ~/meta-llama/llama-stack/tmp/configs/ollama-run.yaml`
|
|
||||||
```
|
|
||||||
|
|
||||||
Now set some environment variables for the inference model ID and Llama Stack Port and create a local directory to mount into the container's file system.
|
|
||||||
```
|
|
||||||
export INFERENCE_MODEL="llama3.2:3b"
|
|
||||||
export LLAMA_STACK_PORT=8321
|
|
||||||
mkdir -p ~/.llama
|
|
||||||
```
|
|
||||||
|
|
||||||
After this step is successful, you should be able to find the built container image and test it with the below Docker command:
|
|
||||||
|
|
||||||
```
|
|
||||||
docker run -d \
|
|
||||||
-p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
|
|
||||||
-v ~/.llama:/root/.llama \
|
|
||||||
localhost/distribution-ollama:dev \
|
|
||||||
--port $LLAMA_STACK_PORT \
|
|
||||||
--env INFERENCE_MODEL=$INFERENCE_MODEL \
|
|
||||||
--env OLLAMA_URL=http://host.docker.internal:11434
|
|
||||||
```
|
|
||||||
|
|
||||||
Here are the docker flags and their uses:
|
|
||||||
|
|
||||||
* `-d`: Runs the container in the detached mode as a background process
|
|
||||||
|
|
||||||
* `-p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT`: Maps the container port to the host port for accessing the server
|
|
||||||
|
|
||||||
* `-v ~/.llama:/root/.llama`: Mounts the local .llama directory to persist configurations and data
|
|
||||||
|
|
||||||
* `localhost/distribution-ollama:dev`: The name and tag of the container image to run
|
|
||||||
|
|
||||||
* `--port $LLAMA_STACK_PORT`: Port number for the server to listen on
|
|
||||||
|
|
||||||
* `--env INFERENCE_MODEL=$INFERENCE_MODEL`: Sets the model to use for inference
|
|
||||||
|
|
||||||
* `--env OLLAMA_URL=http://host.docker.internal:11434`: Configures the URL for the Ollama service
|
|
||||||
|
|
||||||
:::
|
|
||||||
|
|
||||||
::::
|
|
||||||
|
|
||||||
|
|
||||||
### Running your Stack server
|
|
||||||
Now, let's start the Llama Stack Distribution Server. You will need the YAML configuration file which was written out at the end by the `llama stack build` step.
|
|
||||||
|
|
||||||
```
|
|
||||||
llama stack run -h
|
|
||||||
usage: llama stack run [-h] [--port PORT] [--image-name IMAGE_NAME] [--env KEY=VALUE] [--tls-keyfile TLS_KEYFILE] [--tls-certfile TLS_CERTFILE]
|
|
||||||
[--image-type {conda,container,venv}]
|
|
||||||
config
|
|
||||||
|
|
||||||
Start the server for a Llama Stack Distribution. You should have already built (or downloaded) and configured the distribution.
|
|
||||||
|
|
||||||
positional arguments:
|
|
||||||
config Path to config file to use for the run
|
|
||||||
|
|
||||||
options:
|
|
||||||
-h, --help show this help message and exit
|
|
||||||
--port PORT Port to run the server on. It can also be passed via the env var LLAMA_STACK_PORT. (default: 8321)
|
|
||||||
--image-name IMAGE_NAME
|
|
||||||
Name of the image to run. Defaults to the current environment (default: None)
|
|
||||||
--env KEY=VALUE Environment variables to pass to the server in KEY=VALUE format. Can be specified multiple times. (default: [])
|
|
||||||
--tls-keyfile TLS_KEYFILE
|
|
||||||
Path to TLS key file for HTTPS (default: None)
|
|
||||||
--tls-certfile TLS_CERTFILE
|
|
||||||
Path to TLS certificate file for HTTPS (default: None)
|
|
||||||
--image-type {conda,container,venv}
|
|
||||||
Image Type used during the build. This can be either conda or container or venv. (default: conda)
|
|
||||||
|
|
||||||
```
|
|
||||||
|
|
||||||
```
|
|
||||||
# Start using template name
|
|
||||||
llama stack run tgi
|
|
||||||
|
|
||||||
# Start using config file
|
|
||||||
llama stack run ~/.llama/distributions/llamastack-my-local-stack/my-local-stack-run.yaml
|
|
||||||
|
|
||||||
# Start using a venv
|
|
||||||
llama stack run --image-type venv ~/.llama/distributions/llamastack-my-local-stack/my-local-stack-run.yaml
|
|
||||||
|
|
||||||
# Start using a conda environment
|
|
||||||
llama stack run --image-type conda ~/.llama/distributions/llamastack-my-local-stack/my-local-stack-run.yaml
|
|
||||||
```
|
|
||||||
|
|
||||||
```
|
|
||||||
$ llama stack run ~/.llama/distributions/llamastack-my-local-stack/my-local-stack-run.yaml
|
|
||||||
|
|
||||||
Serving API inspect
|
|
||||||
GET /health
|
|
||||||
GET /providers/list
|
|
||||||
GET /routes/list
|
|
||||||
Serving API inference
|
|
||||||
POST /inference/chat_completion
|
|
||||||
POST /inference/completion
|
|
||||||
POST /inference/embeddings
|
|
||||||
...
|
|
||||||
Serving API agents
|
|
||||||
POST /agents/create
|
|
||||||
POST /agents/session/create
|
|
||||||
POST /agents/turn/create
|
|
||||||
POST /agents/delete
|
|
||||||
POST /agents/session/delete
|
|
||||||
POST /agents/session/get
|
|
||||||
POST /agents/step/get
|
|
||||||
POST /agents/turn/get
|
|
||||||
|
|
||||||
Listening on ['::', '0.0.0.0']:8321
|
|
||||||
INFO: Started server process [2935911]
|
|
||||||
INFO: Waiting for application startup.
|
|
||||||
INFO: Application startup complete.
|
|
||||||
INFO: Uvicorn running on http://['::', '0.0.0.0']:8321 (Press CTRL+C to quit)
|
|
||||||
INFO: 2401:db00:35c:2d2b:face:0:c9:0:54678 - "GET /models/list HTTP/1.1" 200 OK
|
|
||||||
```
|
|
||||||
### Listing Distributions
|
|
||||||
Using the list command, you can view all existing Llama Stack distributions, including stacks built from templates, from scratch, or using custom configuration files.
|
|
||||||
|
|
||||||
```
|
|
||||||
llama stack list -h
|
|
||||||
usage: llama stack list [-h]
|
|
||||||
|
|
||||||
list the build stacks
|
|
||||||
|
|
||||||
options:
|
|
||||||
-h, --help show this help message and exit
|
|
||||||
```
|
|
||||||
|
|
||||||
Example Usage
|
|
||||||
|
|
||||||
```
|
|
||||||
llama stack list
|
|
||||||
```
|
|
||||||
|
|
||||||
### Removing a Distribution
|
|
||||||
Use the remove command to delete a distribution you've previously built.
|
|
||||||
|
|
||||||
```
|
|
||||||
llama stack rm -h
|
|
||||||
usage: llama stack rm [-h] [--all] [name]
|
|
||||||
|
|
||||||
Remove the build stack
|
|
||||||
|
|
||||||
positional arguments:
|
|
||||||
name Name of the stack to delete (default: None)
|
|
||||||
|
|
||||||
options:
|
|
||||||
-h, --help show this help message and exit
|
|
||||||
--all, -a Delete all stacks (use with caution) (default: False)
|
|
||||||
```
|
|
||||||
|
|
||||||
Example
|
|
||||||
```
|
|
||||||
llama stack rm llamastack-test
|
|
||||||
```
|
|
||||||
|
|
||||||
To keep your environment organized and avoid clutter, consider using `llama stack list` to review old or unused distributions and `llama stack rm <name>` to delete them when they’re no longer needed.
|
|
||||||
|
|
||||||
### Troubleshooting
|
|
||||||
|
|
||||||
If you encounter any issues, ask questions in our discord or search through our [GitHub Issues](https://github.com/meta-llama/llama-stack/issues), or file an new issue.
|
|
||||||
|
|
@ -1,401 +0,0 @@
|
||||||
# Configuring a "Stack"
|
|
||||||
|
|
||||||
The Llama Stack runtime configuration is specified as a YAML file. Here is a simplified version of an example configuration file for the Ollama distribution:
|
|
||||||
|
|
||||||
```{dropdown} 👋 Click here for a Sample Configuration File
|
|
||||||
|
|
||||||
```yaml
|
|
||||||
version: 2
|
|
||||||
conda_env: ollama
|
|
||||||
apis:
|
|
||||||
- agents
|
|
||||||
- inference
|
|
||||||
- vector_io
|
|
||||||
- safety
|
|
||||||
- telemetry
|
|
||||||
providers:
|
|
||||||
inference:
|
|
||||||
- provider_id: ollama
|
|
||||||
provider_type: remote::ollama
|
|
||||||
config:
|
|
||||||
url: ${env.OLLAMA_URL:http://localhost:11434}
|
|
||||||
vector_io:
|
|
||||||
- provider_id: faiss
|
|
||||||
provider_type: inline::faiss
|
|
||||||
config:
|
|
||||||
kvstore:
|
|
||||||
type: sqlite
|
|
||||||
namespace: null
|
|
||||||
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ollama}/faiss_store.db
|
|
||||||
safety:
|
|
||||||
- provider_id: llama-guard
|
|
||||||
provider_type: inline::llama-guard
|
|
||||||
config: {}
|
|
||||||
agents:
|
|
||||||
- provider_id: meta-reference
|
|
||||||
provider_type: inline::meta-reference
|
|
||||||
config:
|
|
||||||
persistence_store:
|
|
||||||
type: sqlite
|
|
||||||
namespace: null
|
|
||||||
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ollama}/agents_store.db
|
|
||||||
telemetry:
|
|
||||||
- provider_id: meta-reference
|
|
||||||
provider_type: inline::meta-reference
|
|
||||||
config: {}
|
|
||||||
metadata_store:
|
|
||||||
namespace: null
|
|
||||||
type: sqlite
|
|
||||||
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ollama}/registry.db
|
|
||||||
models:
|
|
||||||
- metadata: {}
|
|
||||||
model_id: ${env.INFERENCE_MODEL}
|
|
||||||
provider_id: ollama
|
|
||||||
provider_model_id: null
|
|
||||||
shields: []
|
|
||||||
server:
|
|
||||||
port: 8321
|
|
||||||
auth:
|
|
||||||
provider_type: "kubernetes"
|
|
||||||
config:
|
|
||||||
api_server_url: "https://kubernetes.default.svc"
|
|
||||||
ca_cert_path: "/path/to/ca.crt"
|
|
||||||
```
|
|
||||||
|
|
||||||
Let's break this down into the different sections. The first section specifies the set of APIs that the stack server will serve:
|
|
||||||
```yaml
|
|
||||||
apis:
|
|
||||||
- agents
|
|
||||||
- inference
|
|
||||||
- memory
|
|
||||||
- safety
|
|
||||||
- telemetry
|
|
||||||
```
|
|
||||||
|
|
||||||
## Providers
|
|
||||||
Next up is the most critical part: the set of providers that the stack will use to serve the above APIs. Consider the `inference` API:
|
|
||||||
```yaml
|
|
||||||
providers:
|
|
||||||
inference:
|
|
||||||
# provider_id is a string you can choose freely
|
|
||||||
- provider_id: ollama
|
|
||||||
# provider_type is a string that specifies the type of provider.
|
|
||||||
# in this case, the provider for inference is ollama and it is run remotely (outside of the distribution)
|
|
||||||
provider_type: remote::ollama
|
|
||||||
# config is a dictionary that contains the configuration for the provider.
|
|
||||||
# in this case, the configuration is the url of the ollama server
|
|
||||||
config:
|
|
||||||
url: ${env.OLLAMA_URL:http://localhost:11434}
|
|
||||||
```
|
|
||||||
A few things to note:
|
|
||||||
- A _provider instance_ is identified with an (id, type, configuration) triplet.
|
|
||||||
- The id is a string you can choose freely.
|
|
||||||
- You can instantiate any number of provider instances of the same type.
|
|
||||||
- The configuration dictionary is provider-specific.
|
|
||||||
- Notice that configuration can reference environment variables (with default values), which are expanded at runtime. When you run a stack server (via docker or via `llama stack run`), you can specify `--env OLLAMA_URL=http://my-server:11434` to override the default value.
|
|
||||||
|
|
||||||
## Resources
|
|
||||||
|
|
||||||
Finally, let's look at the `models` section:
|
|
||||||
|
|
||||||
```yaml
|
|
||||||
models:
|
|
||||||
- metadata: {}
|
|
||||||
model_id: ${env.INFERENCE_MODEL}
|
|
||||||
provider_id: ollama
|
|
||||||
provider_model_id: null
|
|
||||||
```
|
|
||||||
A Model is an instance of a "Resource" (see [Concepts](../concepts/index)) and is associated with a specific inference provider (in this case, the provider with identifier `ollama`). This is an instance of a "pre-registered" model. While we always encourage the clients to always register models before using them, some Stack servers may come up a list of "already known and available" models.
|
|
||||||
|
|
||||||
What's with the `provider_model_id` field? This is an identifier for the model inside the provider's model catalog. Contrast it with `model_id` which is the identifier for the same model for Llama Stack's purposes. For example, you may want to name "llama3.2:vision-11b" as "image_captioning_model" when you use it in your Stack interactions. When omitted, the server will set `provider_model_id` to be the same as `model_id`.
|
|
||||||
|
|
||||||
## Server Configuration
|
|
||||||
|
|
||||||
The `server` section configures the HTTP server that serves the Llama Stack APIs:
|
|
||||||
|
|
||||||
```yaml
|
|
||||||
server:
|
|
||||||
port: 8321 # Port to listen on (default: 8321)
|
|
||||||
tls_certfile: "/path/to/cert.pem" # Optional: Path to TLS certificate for HTTPS
|
|
||||||
tls_keyfile: "/path/to/key.pem" # Optional: Path to TLS key for HTTPS
|
|
||||||
```
|
|
||||||
|
|
||||||
### Authentication Configuration
|
|
||||||
|
|
||||||
The `auth` section configures authentication for the server. When configured, all API requests must include a valid Bearer token in the Authorization header:
|
|
||||||
|
|
||||||
```
|
|
||||||
Authorization: Bearer <token>
|
|
||||||
```
|
|
||||||
|
|
||||||
The server supports multiple authentication providers:
|
|
||||||
|
|
||||||
#### OAuth 2.0/OpenID Connect Provider with Kubernetes
|
|
||||||
|
|
||||||
The Kubernetes cluster must be configured to use a service account for authentication.
|
|
||||||
|
|
||||||
```bash
|
|
||||||
kubectl create namespace llama-stack
|
|
||||||
kubectl create serviceaccount llama-stack-auth -n llama-stack
|
|
||||||
kubectl create rolebinding llama-stack-auth-rolebinding --clusterrole=admin --serviceaccount=llama-stack:llama-stack-auth -n llama-stack
|
|
||||||
kubectl create token llama-stack-auth -n llama-stack > llama-stack-auth-token
|
|
||||||
```
|
|
||||||
|
|
||||||
Make sure the `kube-apiserver` runs with `--anonymous-auth=true` to allow unauthenticated requests
|
|
||||||
and that the correct RoleBinding is created to allow the service account to access the necessary
|
|
||||||
resources. If that is not the case, you can create a RoleBinding for the service account to access
|
|
||||||
the necessary resources:
|
|
||||||
|
|
||||||
```yaml
|
|
||||||
# allow-anonymous-openid.yaml
|
|
||||||
apiVersion: rbac.authorization.k8s.io/v1
|
|
||||||
kind: ClusterRole
|
|
||||||
metadata:
|
|
||||||
name: allow-anonymous-openid
|
|
||||||
rules:
|
|
||||||
- nonResourceURLs: ["/openid/v1/jwks"]
|
|
||||||
verbs: ["get"]
|
|
||||||
---
|
|
||||||
apiVersion: rbac.authorization.k8s.io/v1
|
|
||||||
kind: ClusterRoleBinding
|
|
||||||
metadata:
|
|
||||||
name: allow-anonymous-openid
|
|
||||||
roleRef:
|
|
||||||
apiGroup: rbac.authorization.k8s.io
|
|
||||||
kind: ClusterRole
|
|
||||||
name: allow-anonymous-openid
|
|
||||||
subjects:
|
|
||||||
- kind: User
|
|
||||||
name: system:anonymous
|
|
||||||
apiGroup: rbac.authorization.k8s.io
|
|
||||||
```
|
|
||||||
|
|
||||||
And then apply the configuration:
|
|
||||||
```bash
|
|
||||||
kubectl apply -f allow-anonymous-openid.yaml
|
|
||||||
```
|
|
||||||
|
|
||||||
Validates tokens against the Kubernetes API server through the OIDC provider:
|
|
||||||
```yaml
|
|
||||||
server:
|
|
||||||
auth:
|
|
||||||
provider_type: "oauth2_token"
|
|
||||||
config:
|
|
||||||
jwks:
|
|
||||||
uri: "https://kubernetes.default.svc"
|
|
||||||
key_recheck_period: 3600
|
|
||||||
tls_cafile: "/path/to/ca.crt"
|
|
||||||
issuer: "https://kubernetes.default.svc"
|
|
||||||
audience: "https://kubernetes.default.svc"
|
|
||||||
```
|
|
||||||
|
|
||||||
To find your cluster's audience, run:
|
|
||||||
```bash
|
|
||||||
kubectl create token default --duration=1h | cut -d. -f2 | base64 -d | jq .aud
|
|
||||||
```
|
|
||||||
|
|
||||||
For the issuer, you can use the OIDC provider's URL:
|
|
||||||
```bash
|
|
||||||
kubectl get --raw /.well-known/openid-configuration| jq .issuer
|
|
||||||
```
|
|
||||||
|
|
||||||
For the tls_cafile, you can use the CA certificate of the OIDC provider:
|
|
||||||
```bash
|
|
||||||
kubectl config view --minify -o jsonpath='{.clusters[0].cluster.certificate-authority}'
|
|
||||||
```
|
|
||||||
|
|
||||||
The provider extracts user information from the JWT token:
|
|
||||||
- Username from the `sub` claim becomes a role
|
|
||||||
- Kubernetes groups become teams
|
|
||||||
|
|
||||||
You can easily validate a request by running:
|
|
||||||
|
|
||||||
```bash
|
|
||||||
curl -s -L -H "Authorization: Bearer $(cat llama-stack-auth-token)" http://127.0.0.1:8321/v1/providers
|
|
||||||
```
|
|
||||||
|
|
||||||
#### Custom Provider
|
|
||||||
Validates tokens against a custom authentication endpoint:
|
|
||||||
```yaml
|
|
||||||
server:
|
|
||||||
auth:
|
|
||||||
provider_type: "custom"
|
|
||||||
config:
|
|
||||||
endpoint: "https://auth.example.com/validate" # URL of the auth endpoint
|
|
||||||
```
|
|
||||||
|
|
||||||
The custom endpoint receives a POST request with:
|
|
||||||
```json
|
|
||||||
{
|
|
||||||
"api_key": "<token>",
|
|
||||||
"request": {
|
|
||||||
"path": "/api/v1/endpoint",
|
|
||||||
"headers": {
|
|
||||||
"content-type": "application/json",
|
|
||||||
"user-agent": "curl/7.64.1"
|
|
||||||
},
|
|
||||||
"params": {
|
|
||||||
"key": ["value"]
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
```
|
|
||||||
|
|
||||||
And must respond with:
|
|
||||||
```json
|
|
||||||
{
|
|
||||||
"access_attributes": {
|
|
||||||
"roles": ["admin", "user"],
|
|
||||||
"teams": ["ml-team", "nlp-team"],
|
|
||||||
"projects": ["llama-3", "project-x"],
|
|
||||||
"namespaces": ["research"]
|
|
||||||
},
|
|
||||||
"message": "Authentication successful"
|
|
||||||
}
|
|
||||||
```
|
|
||||||
|
|
||||||
If no access attributes are returned, the token is used as a namespace.
|
|
||||||
|
|
||||||
### Quota Configuration
|
|
||||||
|
|
||||||
The `quota` section allows you to enable server-side request throttling for both
|
|
||||||
authenticated and anonymous clients. This is useful for preventing abuse, enforcing
|
|
||||||
fairness across tenants, and controlling infrastructure costs without requiring
|
|
||||||
client-side rate limiting or external proxies.
|
|
||||||
|
|
||||||
Quotas are disabled by default. When enabled, each client is tracked using either:
|
|
||||||
|
|
||||||
* Their authenticated `client_id` (derived from the Bearer token), or
|
|
||||||
* Their IP address (fallback for anonymous requests)
|
|
||||||
|
|
||||||
Quota state is stored in a SQLite-backed key-value store, and rate limits are applied
|
|
||||||
within a configurable time window (currently only `day` is supported).
|
|
||||||
|
|
||||||
#### Example
|
|
||||||
|
|
||||||
```yaml
|
|
||||||
server:
|
|
||||||
quota:
|
|
||||||
kvstore:
|
|
||||||
type: sqlite
|
|
||||||
db_path: ./quotas.db
|
|
||||||
anonymous_max_requests: 100
|
|
||||||
authenticated_max_requests: 1000
|
|
||||||
period: day
|
|
||||||
```
|
|
||||||
|
|
||||||
#### Configuration Options
|
|
||||||
|
|
||||||
| Field | Description |
|
|
||||||
| ---------------------------- | -------------------------------------------------------------------------- |
|
|
||||||
| `kvstore` | Required. Backend storage config for tracking request counts. |
|
|
||||||
| `kvstore.type` | Must be `"sqlite"` for now. Other backends may be supported in the future. |
|
|
||||||
| `kvstore.db_path` | File path to the SQLite database. |
|
|
||||||
| `anonymous_max_requests` | Max requests per period for unauthenticated clients. |
|
|
||||||
| `authenticated_max_requests` | Max requests per period for authenticated clients. |
|
|
||||||
| `period` | Time window for quota enforcement. Only `"day"` is supported. |
|
|
||||||
|
|
||||||
> Note: if `authenticated_max_requests` is set but no authentication provider is
|
|
||||||
configured, the server will fall back to applying `anonymous_max_requests` to all
|
|
||||||
clients.
|
|
||||||
|
|
||||||
#### Example with Authentication Enabled
|
|
||||||
|
|
||||||
```yaml
|
|
||||||
server:
|
|
||||||
port: 8321
|
|
||||||
auth:
|
|
||||||
provider_type: custom
|
|
||||||
config:
|
|
||||||
endpoint: https://auth.example.com/validate
|
|
||||||
quota:
|
|
||||||
kvstore:
|
|
||||||
type: sqlite
|
|
||||||
db_path: ./quotas.db
|
|
||||||
anonymous_max_requests: 100
|
|
||||||
authenticated_max_requests: 1000
|
|
||||||
period: day
|
|
||||||
```
|
|
||||||
|
|
||||||
If a client exceeds their limit, the server responds with:
|
|
||||||
|
|
||||||
```http
|
|
||||||
HTTP/1.1 429 Too Many Requests
|
|
||||||
Content-Type: application/json
|
|
||||||
|
|
||||||
{
|
|
||||||
"error": {
|
|
||||||
"message": "Quota exceeded"
|
|
||||||
}
|
|
||||||
}
|
|
||||||
```
|
|
||||||
|
|
||||||
## Extending to handle Safety
|
|
||||||
|
|
||||||
Configuring Safety can be a little involved so it is instructive to go through an example.
|
|
||||||
|
|
||||||
The Safety API works with the associated Resource called a `Shield`. Providers can support various kinds of Shields. Good examples include the [Llama Guard](https://ai.meta.com/research/publications/llama-guard-llm-based-input-output-safeguard-for-human-ai-conversations/) system-safety models, or [Bedrock Guardrails](https://aws.amazon.com/bedrock/guardrails/).
|
|
||||||
|
|
||||||
To configure a Bedrock Shield, you would need to add:
|
|
||||||
- A Safety API provider instance with type `remote::bedrock`
|
|
||||||
- A Shield resource served by this provider.
|
|
||||||
|
|
||||||
```yaml
|
|
||||||
...
|
|
||||||
providers:
|
|
||||||
safety:
|
|
||||||
- provider_id: bedrock
|
|
||||||
provider_type: remote::bedrock
|
|
||||||
config:
|
|
||||||
aws_access_key_id: ${env.AWS_ACCESS_KEY_ID}
|
|
||||||
aws_secret_access_key: ${env.AWS_SECRET_ACCESS_KEY}
|
|
||||||
...
|
|
||||||
shields:
|
|
||||||
- provider_id: bedrock
|
|
||||||
params:
|
|
||||||
guardrailVersion: ${env.GUARDRAIL_VERSION}
|
|
||||||
provider_shield_id: ${env.GUARDRAIL_ID}
|
|
||||||
...
|
|
||||||
```
|
|
||||||
|
|
||||||
The situation is more involved if the Shield needs _Inference_ of an associated model. This is the case with Llama Guard. In that case, you would need to add:
|
|
||||||
- A Safety API provider instance with type `inline::llama-guard`
|
|
||||||
- An Inference API provider instance for serving the model.
|
|
||||||
- A Model resource associated with this provider.
|
|
||||||
- A Shield resource served by the Safety provider.
|
|
||||||
|
|
||||||
The yaml configuration for this setup, assuming you were using vLLM as your inference server, would look like:
|
|
||||||
```yaml
|
|
||||||
...
|
|
||||||
providers:
|
|
||||||
safety:
|
|
||||||
- provider_id: llama-guard
|
|
||||||
provider_type: inline::llama-guard
|
|
||||||
config: {}
|
|
||||||
inference:
|
|
||||||
# this vLLM server serves the "normal" inference model (e.g., llama3.2:3b)
|
|
||||||
- provider_id: vllm-0
|
|
||||||
provider_type: remote::vllm
|
|
||||||
config:
|
|
||||||
url: ${env.VLLM_URL:http://localhost:8000}
|
|
||||||
# this vLLM server serves the llama-guard model (e.g., llama-guard:3b)
|
|
||||||
- provider_id: vllm-1
|
|
||||||
provider_type: remote::vllm
|
|
||||||
config:
|
|
||||||
url: ${env.SAFETY_VLLM_URL:http://localhost:8001}
|
|
||||||
...
|
|
||||||
models:
|
|
||||||
- metadata: {}
|
|
||||||
model_id: ${env.INFERENCE_MODEL}
|
|
||||||
provider_id: vllm-0
|
|
||||||
provider_model_id: null
|
|
||||||
- metadata: {}
|
|
||||||
model_id: ${env.SAFETY_MODEL}
|
|
||||||
provider_id: vllm-1
|
|
||||||
provider_model_id: null
|
|
||||||
shields:
|
|
||||||
- provider_id: llama-guard
|
|
||||||
shield_id: ${env.SAFETY_MODEL} # Llama Guard shields are identified by the corresponding LlamaGuard model
|
|
||||||
provider_shield_id: null
|
|
||||||
...
|
|
||||||
```
|
|
||||||
|
|
@ -1,36 +0,0 @@
|
||||||
# Using Llama Stack as a Library
|
|
||||||
|
|
||||||
## Setup Llama Stack without a Server
|
|
||||||
If you are planning to use an external service for Inference (even Ollama or TGI counts as external), it is often easier to use Llama Stack as a library.
|
|
||||||
This avoids the overhead of setting up a server.
|
|
||||||
```bash
|
|
||||||
# setup
|
|
||||||
uv pip install llama-stack
|
|
||||||
llama stack build --template ollama --image-type venv
|
|
||||||
```
|
|
||||||
|
|
||||||
```python
|
|
||||||
from llama_stack.distribution.library_client import LlamaStackAsLibraryClient
|
|
||||||
|
|
||||||
client = LlamaStackAsLibraryClient(
|
|
||||||
"ollama",
|
|
||||||
# provider_data is optional, but if you need to pass in any provider specific data, you can do so here.
|
|
||||||
provider_data={"tavily_search_api_key": os.environ["TAVILY_SEARCH_API_KEY"]},
|
|
||||||
)
|
|
||||||
client.initialize()
|
|
||||||
```
|
|
||||||
|
|
||||||
This will parse your config and set up any inline implementations and remote clients needed for your implementation.
|
|
||||||
|
|
||||||
Then, you can access the APIs like `models` and `inference` on the client and call their methods directly:
|
|
||||||
|
|
||||||
```python
|
|
||||||
response = client.models.list()
|
|
||||||
```
|
|
||||||
|
|
||||||
If you've created a [custom distribution](https://llama-stack.readthedocs.io/en/latest/distributions/building_distro.html), you can also use the run.yaml configuration file directly:
|
|
||||||
|
|
||||||
```python
|
|
||||||
client = LlamaStackAsLibraryClient(config_path)
|
|
||||||
client.initialize()
|
|
||||||
```
|
|
||||||
|
|
@ -1,18 +0,0 @@
|
||||||
# Distributions Overview
|
|
||||||
|
|
||||||
A distribution is a pre-packaged set of Llama Stack components that can be deployed together.
|
|
||||||
|
|
||||||
This section provides an overview of the distributions available in Llama Stack.
|
|
||||||
|
|
||||||
```{toctree}
|
|
||||||
:maxdepth: 3
|
|
||||||
|
|
||||||
importing_as_library
|
|
||||||
configuration
|
|
||||||
list_of_distributions
|
|
||||||
kubernetes_deployment
|
|
||||||
building_distro
|
|
||||||
on_device_distro
|
|
||||||
remote_hosted_distro
|
|
||||||
self_hosted_distro
|
|
||||||
```
|
|
||||||
|
|
@ -1,32 +0,0 @@
|
||||||
#!/usr/bin/env bash
|
|
||||||
|
|
||||||
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
||||||
# All rights reserved.
|
|
||||||
#
|
|
||||||
# This source code is licensed under the terms described in the LICENSE file in
|
|
||||||
# the root directory of this source tree.
|
|
||||||
|
|
||||||
export POSTGRES_USER=${POSTGRES_USER:-llamastack}
|
|
||||||
export POSTGRES_DB=${POSTGRES_DB:-llamastack}
|
|
||||||
export POSTGRES_PASSWORD=${POSTGRES_PASSWORD:-llamastack}
|
|
||||||
|
|
||||||
export INFERENCE_MODEL=${INFERENCE_MODEL:-meta-llama/Llama-3.2-3B-Instruct}
|
|
||||||
export SAFETY_MODEL=${SAFETY_MODEL:-meta-llama/Llama-Guard-3-1B}
|
|
||||||
|
|
||||||
set -euo pipefail
|
|
||||||
set -x
|
|
||||||
|
|
||||||
envsubst < ./vllm-k8s.yaml.template | kubectl apply -f -
|
|
||||||
envsubst < ./vllm-safety-k8s.yaml.template | kubectl apply -f -
|
|
||||||
envsubst < ./postgres-k8s.yaml.template | kubectl apply -f -
|
|
||||||
envsubst < ./chroma-k8s.yaml.template | kubectl apply -f -
|
|
||||||
|
|
||||||
kubectl create configmap llama-stack-config --from-file=stack_run_config.yaml \
|
|
||||||
--dry-run=client -o yaml > stack-configmap.yaml
|
|
||||||
|
|
||||||
kubectl apply -f stack-configmap.yaml
|
|
||||||
|
|
||||||
envsubst < ./stack-k8s.yaml.template | kubectl apply -f -
|
|
||||||
envsubst < ./ingress-k8s.yaml.template | kubectl apply -f -
|
|
||||||
|
|
||||||
envsubst < ./ui-k8s.yaml.template | kubectl apply -f -
|
|
||||||
|
|
@ -1,66 +0,0 @@
|
||||||
apiVersion: v1
|
|
||||||
kind: PersistentVolumeClaim
|
|
||||||
metadata:
|
|
||||||
name: chromadb-pvc
|
|
||||||
spec:
|
|
||||||
accessModes:
|
|
||||||
- ReadWriteOnce
|
|
||||||
resources:
|
|
||||||
requests:
|
|
||||||
storage: 20Gi
|
|
||||||
---
|
|
||||||
apiVersion: apps/v1
|
|
||||||
kind: Deployment
|
|
||||||
metadata:
|
|
||||||
name: chromadb
|
|
||||||
spec:
|
|
||||||
replicas: 1
|
|
||||||
selector:
|
|
||||||
matchLabels:
|
|
||||||
app: chromadb
|
|
||||||
template:
|
|
||||||
metadata:
|
|
||||||
labels:
|
|
||||||
app: chromadb
|
|
||||||
spec:
|
|
||||||
containers:
|
|
||||||
- name: chromadb
|
|
||||||
image: chromadb/chroma:latest
|
|
||||||
ports:
|
|
||||||
- containerPort: 6000
|
|
||||||
env:
|
|
||||||
- name: CHROMA_HOST
|
|
||||||
value: "0.0.0.0"
|
|
||||||
- name: CHROMA_PORT
|
|
||||||
value: "6000"
|
|
||||||
- name: PERSIST_DIRECTORY
|
|
||||||
value: "/chroma/chroma"
|
|
||||||
- name: CHROMA_DB_IMPL
|
|
||||||
value: "duckdb+parquet"
|
|
||||||
resources:
|
|
||||||
requests:
|
|
||||||
memory: "512Mi"
|
|
||||||
cpu: "250m"
|
|
||||||
limits:
|
|
||||||
memory: "2Gi"
|
|
||||||
cpu: "1000m"
|
|
||||||
volumeMounts:
|
|
||||||
- name: chromadb-storage
|
|
||||||
mountPath: /chroma/chroma
|
|
||||||
volumes:
|
|
||||||
- name: chromadb-storage
|
|
||||||
persistentVolumeClaim:
|
|
||||||
claimName: chromadb-pvc
|
|
||||||
---
|
|
||||||
apiVersion: v1
|
|
||||||
kind: Service
|
|
||||||
metadata:
|
|
||||||
name: chromadb
|
|
||||||
spec:
|
|
||||||
selector:
|
|
||||||
app: chromadb
|
|
||||||
ports:
|
|
||||||
- protocol: TCP
|
|
||||||
port: 6000
|
|
||||||
targetPort: 6000
|
|
||||||
type: ClusterIP
|
|
||||||
|
|
@ -1,17 +0,0 @@
|
||||||
apiVersion: v1
|
|
||||||
kind: Service
|
|
||||||
metadata:
|
|
||||||
name: llama-stack-service
|
|
||||||
spec:
|
|
||||||
type: LoadBalancer
|
|
||||||
selector:
|
|
||||||
app.kubernetes.io/name: llama-stack
|
|
||||||
ports:
|
|
||||||
- name: llama-stack-api
|
|
||||||
port: 8321
|
|
||||||
targetPort: 8321
|
|
||||||
protocol: TCP
|
|
||||||
- name: llama-stack-ui
|
|
||||||
port: 8322
|
|
||||||
targetPort: 8322
|
|
||||||
protocol: TCP
|
|
||||||